MediaWiki master
DiffHistoryBlob.php
Go to the documentation of this file.
1<?php
30class DiffHistoryBlob implements HistoryBlob {
32 public $mItems = [];
33
35 public $mSize = 0;
36
45 public $mDiffs;
46
48 public $mDiffMap;
49
53
56
58 public $mFrozen = false;
59
64 public $mMaxSize = 10_000_000;
65
67 public $mMaxCount = 100;
68
70 private const XDL_BDOP_INS = 1;
71 private const XDL_BDOP_CPY = 2;
72 private const XDL_BDOP_INSB = 3;
73
74 public function __construct() {
75 if ( !function_exists( 'gzdeflate' ) ) {
76 throw new RuntimeException( "Need zlib support to read or write DiffHistoryBlob\n" );
77 }
78 }
79
84 public function addItem( $text ) {
85 if ( $this->mFrozen ) {
86 throw new BadMethodCallException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
87 }
88
89 $this->mItems[] = $text;
90 $this->mSize += strlen( $text );
91 $this->mDiffs = null; // later
92 return (string)( count( $this->mItems ) - 1 );
93 }
94
99 public function getItem( $key ) {
100 return $this->mItems[(int)$key];
101 }
102
106 public function setText( $text ) {
107 $this->mDefaultKey = $this->addItem( $text );
108 }
109
113 public function getText() {
114 return $this->getItem( $this->mDefaultKey );
115 }
116
117 private function compress() {
118 if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
119 throw new RuntimeException( "Need xdiff support to write DiffHistoryBlob\n" );
120 }
121 if ( isset( $this->mDiffs ) ) {
122 // Already compressed
123 return;
124 }
125 if ( $this->mItems === [] ) {
126 return;
127 }
128
129 // Create two diff sequences: one for main text and one for small text
130 $sequences = [
131 'small' => [
132 'tail' => '',
133 'diffs' => [],
134 'map' => [],
135 ],
136 'main' => [
137 'tail' => '',
138 'diffs' => [],
139 'map' => [],
140 ],
141 ];
142 $smallFactor = 0.5;
143
144 $mItemsCount = count( $this->mItems );
145 for ( $i = 0; $i < $mItemsCount; $i++ ) {
146 $text = $this->mItems[$i];
147 if ( $i == 0 ) {
148 $seqName = 'main';
149 } else {
150 $mainTail = $sequences['main']['tail'];
151 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
152 $seqName = 'small';
153 } else {
154 $seqName = 'main';
155 }
156 }
157
158 $tail = $sequences[$seqName]['tail'];
159 $diff = $this->diff( $tail, $text );
160 $sequences[$seqName]['diffs'][] = $diff;
161 $sequences[$seqName]['map'][] = $i;
162 $sequences[$seqName]['tail'] = $text;
163 }
164
165 // Knit the sequences together
166 $tail = '';
167 $this->mDiffs = [];
168 $this->mDiffMap = [];
169 foreach ( $sequences as $seq ) {
170 if ( $seq['diffs'] === [] ) {
171 continue;
172 }
173 if ( $tail === '' ) {
174 $this->mDiffs[] = $seq['diffs'][0];
175 } else {
176 $head = $this->patch( '', $seq['diffs'][0] );
177 $this->mDiffs[] = $this->diff( $tail, $head );
178 }
179 $this->mDiffMap[] = $seq['map'][0];
180 $diffsCount = count( $seq['diffs'] );
181 for ( $i = 1; $i < $diffsCount; $i++ ) {
182 $this->mDiffs[] = $seq['diffs'][$i];
183 $this->mDiffMap[] = $seq['map'][$i];
184 }
185 $tail = $seq['tail'];
186 }
187 }
188
194 private function diff( $t1, $t2 ) {
195 return xdiff_string_rabdiff( $t1, $t2 );
196 }
197
203 private function patch( $base, $diff ) {
204 if ( function_exists( 'xdiff_string_bpatch' ) ) {
205 return xdiff_string_bpatch( $base, $diff );
206 }
207
208 # Pure PHP implementation
209
210 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
211
212 # Check the checksum
213 $ofp = $this->xdiffAdler32( $base );
214 if ( $ofp !== substr( $diff, 0, 4 ) ) {
215 wfDebug( __METHOD__ . ": incorrect base checksum" );
216 return false;
217 }
218 if ( $header['csize'] != strlen( $base ) ) {
219 wfDebug( __METHOD__ . ": incorrect base length" );
220 return false;
221 }
222
223 $p = 8;
224 $out = '';
225 while ( $p < strlen( $diff ) ) {
226 $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
227 $op = $x['op'];
228 ++$p;
229 switch ( $op ) {
230 case self::XDL_BDOP_INS:
231 $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
232 $p++;
233 $out .= substr( $diff, $p, $x['size'] );
234 $p += $x['size'];
235 break;
236 case self::XDL_BDOP_INSB:
237 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
238 $p += 4;
239 $out .= substr( $diff, $p, $x['csize'] );
240 $p += $x['csize'];
241 break;
242 case self::XDL_BDOP_CPY:
243 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
244 $p += 8;
245 $out .= substr( $base, $x['off'], $x['csize'] );
246 break;
247 default:
248 wfDebug( __METHOD__ . ": invalid op" );
249 return false;
250 }
251 }
252 return $out;
253 }
254
262 public function xdiffAdler32( $s ) {
263 static $init = null;
264 $init ??= str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
265
266 // The real Adler-32 checksum of $init is zero, so it initialises the
267 // state to zero, as it is at the start of LibXDiff's checksum
268 // algorithm. Appending the subject string then simulates LibXDiff.
269 return strrev( hash( 'adler32', $init . $s, true ) );
270 }
271
272 private function uncompress() {
273 if ( !$this->mDiffs ) {
274 return;
275 }
276 $tail = '';
277 $mDiffsCount = count( $this->mDiffs );
278 for ( $diffKey = 0; $diffKey < $mDiffsCount; $diffKey++ ) {
279 $textKey = $this->mDiffMap[$diffKey];
280 $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
281 $this->mItems[$textKey] = $text;
282 $tail = $text;
283 }
284 }
285
289 public function __sleep() {
290 $this->compress();
291 if ( $this->mItems === [] ) {
292 $info = false;
293 } else {
294 // Take forward differences to improve the compression ratio for sequences
295 $map = '';
296 $prev = 0;
297 foreach ( $this->mDiffMap as $i ) {
298 if ( $map !== '' ) {
299 $map .= ',';
300 }
301 $map .= $i - $prev;
302 $prev = $i;
303 }
304 $info = [
305 'diffs' => $this->mDiffs,
306 'map' => $map
307 ];
308 }
309 if ( isset( $this->mDefaultKey ) ) {
310 $info['default'] = $this->mDefaultKey;
311 }
312 $this->mCompressed = gzdeflate( serialize( $info ) );
313 return [ 'mCompressed' ];
314 }
315
316 public function __wakeup() {
317 // addItem() doesn't work if mItems is partially filled from mDiffs
318 $this->mFrozen = true;
319 $info = HistoryBlobUtils::unserializeArray( gzinflate( $this->mCompressed ) );
320 $this->mCompressed = null;
321
322 if ( !$info ) {
323 // Empty object
324 return;
325 }
326
327 if ( isset( $info['default'] ) ) {
328 $this->mDefaultKey = $info['default'];
329 }
330 $this->mDiffs = $info['diffs'];
331 if ( isset( $info['base'] ) ) {
332 // Old format
333 $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
334 array_unshift( $this->mDiffs,
335 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
336 $info['base'] );
337 } else {
338 // New format
339 $map = explode( ',', $info['map'] );
340 $cur = 0;
341 $this->mDiffMap = [];
342 foreach ( $map as $i ) {
343 $cur += $i;
344 $this->mDiffMap[] = $cur;
345 }
346 }
347 $this->uncompress();
348 }
349
356 public function isHappy() {
357 return $this->mSize < $this->mMaxSize
358 && count( $this->mItems ) < $this->mMaxCount;
359 }
360
361}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Diff-based history compression Requires xdiff and zlib.
string[] $mItems
Uncompressed item cache.
string null $mCompressed
Compressed storage.
array $mDiffMap
The diff map, see above.
bool $mFrozen
True if the object is locked against further writes.
int $mSize
Total uncompressed size.
xdiffAdler32( $s)
Compute a binary "Adler-32" checksum as defined by LibXDiff, i.e.
int $mMaxSize
The maximum uncompressed size before the object becomes sad Should be less than max_allowed_packet.
int $mMaxCount
The maximum number of text items before the object becomes sad.
isHappy()
Helper function for compression jobs Returns true until the object is "full" and ready to be committe...
array null $mDiffs
Array of diffs.
string $mDefaultKey
The key for getText()
Base class for general text storage via the "object" flag in old_flags, or two-part external storage ...
$header