MediaWiki REL1_34
DiffHistoryBlob.php
Go to the documentation of this file.
1<?php
27class DiffHistoryBlob implements HistoryBlob {
29 public $mItems = [];
30
32 public $mSize = 0;
33
42 public $mDiffs;
43
45 public $mDiffMap;
46
50
53
55 public $mFrozen = false;
56
61 public $mMaxSize = 10000000;
62
64 public $mMaxCount = 100;
65
67 const XDL_BDOP_INS = 1;
68 const XDL_BDOP_CPY = 2;
69 const XDL_BDOP_INSB = 3;
70
71 function __construct() {
72 if ( !function_exists( 'gzdeflate' ) ) {
73 throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
74 }
75 }
76
82 function addItem( $text ) {
83 if ( $this->mFrozen ) {
84 throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
85 }
86
87 $this->mItems[] = $text;
88 $this->mSize += strlen( $text );
89 $this->mDiffs = null; // later
90 return count( $this->mItems ) - 1;
91 }
92
97 function getItem( $key ) {
98 return $this->mItems[$key];
99 }
100
104 function setText( $text ) {
105 $this->mDefaultKey = $this->addItem( $text );
106 }
107
111 function getText() {
112 return $this->getItem( $this->mDefaultKey );
113 }
114
118 function compress() {
119 if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
120 throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" );
121 }
122 if ( isset( $this->mDiffs ) ) {
123 // Already compressed
124 return;
125 }
126 if ( $this->mItems === [] ) {
127 return;
128 }
129
130 // Create two diff sequences: one for main text and one for small text
131 $sequences = [
132 'small' => [
133 'tail' => '',
134 'diffs' => [],
135 'map' => [],
136 ],
137 'main' => [
138 'tail' => '',
139 'diffs' => [],
140 'map' => [],
141 ],
142 ];
143 $smallFactor = 0.5;
144
145 $mItemsCount = count( $this->mItems );
146 for ( $i = 0; $i < $mItemsCount; $i++ ) {
147 $text = $this->mItems[$i];
148 if ( $i == 0 ) {
149 $seqName = 'main';
150 } else {
151 $mainTail = $sequences['main']['tail'];
152 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
153 $seqName = 'small';
154 } else {
155 $seqName = 'main';
156 }
157 }
158
159 $tail = $sequences[$seqName]['tail'];
160 $diff = $this->diff( $tail, $text );
161 $sequences[$seqName]['diffs'][] = $diff;
162 $sequences[$seqName]['map'][] = $i;
163 $sequences[$seqName]['tail'] = $text;
164 }
165
166 // Knit the sequences together
167 $tail = '';
168 $this->mDiffs = [];
169 $this->mDiffMap = [];
170 foreach ( $sequences as $seq ) {
171 if ( $seq['diffs'] === [] ) {
172 continue;
173 }
174 if ( $tail === '' ) {
175 $this->mDiffs[] = $seq['diffs'][0];
176 } else {
177 $head = $this->patch( '', $seq['diffs'][0] );
178 $this->mDiffs[] = $this->diff( $tail, $head );
179 }
180 $this->mDiffMap[] = $seq['map'][0];
181 $diffsCount = count( $seq['diffs'] );
182 for ( $i = 1; $i < $diffsCount; $i++ ) {
183 $this->mDiffs[] = $seq['diffs'][$i];
184 $this->mDiffMap[] = $seq['map'][$i];
185 }
186 $tail = $seq['tail'];
187 }
188 }
189
195 function diff( $t1, $t2 ) {
196 # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
197 # "String is not zero-terminated"
198 Wikimedia\suppressWarnings();
199 $diff = xdiff_string_rabdiff( $t1, $t2 ) . '';
200 Wikimedia\restoreWarnings();
201 return $diff;
202 }
203
209 function patch( $base, $diff ) {
210 if ( function_exists( 'xdiff_string_bpatch' ) ) {
211 Wikimedia\suppressWarnings();
212 $text = xdiff_string_bpatch( $base, $diff ) . '';
213 Wikimedia\restoreWarnings();
214 return $text;
215 }
216
217 # Pure PHP implementation
218
219 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
220
221 # Check the checksum if hash extension is available
222 $ofp = $this->xdiffAdler32( $base );
223 if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) {
224 wfDebug( __METHOD__ . ": incorrect base checksum\n" );
225 return false;
226 }
227 if ( $header['csize'] != strlen( $base ) ) {
228 wfDebug( __METHOD__ . ": incorrect base length\n" );
229 return false;
230 }
231
232 $p = 8;
233 $out = '';
234 while ( $p < strlen( $diff ) ) {
235 $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
236 $op = $x['op'];
237 ++$p;
238 switch ( $op ) {
240 $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
241 $p++;
242 $out .= substr( $diff, $p, $x['size'] );
243 $p += $x['size'];
244 break;
246 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
247 $p += 4;
248 $out .= substr( $diff, $p, $x['csize'] );
249 $p += $x['csize'];
250 break;
252 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
253 $p += 8;
254 $out .= substr( $base, $x['off'], $x['csize'] );
255 break;
256 default:
257 wfDebug( __METHOD__ . ": invalid op\n" );
258 return false;
259 }
260 }
261 return $out;
262 }
263
271 function xdiffAdler32( $s ) {
272 if ( !function_exists( 'hash' ) ) {
273 return false;
274 }
275
276 static $init;
277 if ( $init === null ) {
278 $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
279 }
280
281 // The real Adler-32 checksum of $init is zero, so it initialises the
282 // state to zero, as it is at the start of LibXDiff's checksum
283 // algorithm. Appending the subject string then simulates LibXDiff.
284 return strrev( hash( 'adler32', $init . $s, true ) );
285 }
286
287 function uncompress() {
288 if ( !$this->mDiffs ) {
289 return;
290 }
291 $tail = '';
292 $mDiffsCount = count( $this->mDiffs );
293 for ( $diffKey = 0; $diffKey < $mDiffsCount; $diffKey++ ) {
294 $textKey = $this->mDiffMap[$diffKey];
295 $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
296 $this->mItems[$textKey] = $text;
297 $tail = $text;
298 }
299 }
300
304 function __sleep() {
305 $this->compress();
306 if ( $this->mItems === [] ) {
307 $info = false;
308 } else {
309 // Take forward differences to improve the compression ratio for sequences
310 $map = '';
311 $prev = 0;
312 foreach ( $this->mDiffMap as $i ) {
313 if ( $map !== '' ) {
314 $map .= ',';
315 }
316 $map .= $i - $prev;
317 $prev = $i;
318 }
319 $info = [
320 'diffs' => $this->mDiffs,
321 'map' => $map
322 ];
323 }
324 if ( isset( $this->mDefaultKey ) ) {
325 $info['default'] = $this->mDefaultKey;
326 }
327 $this->mCompressed = gzdeflate( serialize( $info ) );
328 return [ 'mCompressed' ];
329 }
330
331 function __wakeup() {
332 // addItem() doesn't work if mItems is partially filled from mDiffs
333 $this->mFrozen = true;
334 $info = unserialize( gzinflate( $this->mCompressed ) );
335 $this->mCompressed = null;
336
337 if ( !$info ) {
338 // Empty object
339 return;
340 }
341
342 if ( isset( $info['default'] ) ) {
343 $this->mDefaultKey = $info['default'];
344 }
345 $this->mDiffs = $info['diffs'];
346 if ( isset( $info['base'] ) ) {
347 // Old format
348 $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
349 array_unshift( $this->mDiffs,
350 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
351 $info['base'] );
352 } else {
353 // New format
354 $map = explode( ',', $info['map'] );
355 $cur = 0;
356 $this->mDiffMap = [];
357 foreach ( $map as $i ) {
358 $cur += $i;
359 $this->mDiffMap[] = $cur;
360 }
361 }
362 $this->uncompress();
363 }
364
371 function isHappy() {
372 return $this->mSize < $this->mMaxSize
373 && count( $this->mItems ) < $this->mMaxCount;
374 }
375
376}
serialize()
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Diff-based history compression Requires xdiff 1.5+ and zlib.
patch( $base, $diff)
string $mCompressed
Compressed storage.
array $mDiffMap
The diff map, see above.
bool $mFrozen
True if the object is locked against further writes.
int $mSize
Total uncompressed size.
array $mItems
Uncompressed item cache.
xdiffAdler32( $s)
Compute a binary "Adler-32" checksum as defined by LibXDiff, i.e.
int $mMaxSize
The maximum uncompressed size before the object becomes sad Should be less than max_allowed_packet.
int $mMaxCount
The maximum number of text items before the object becomes sad.
isHappy()
Helper function for compression jobs Returns true until the object is "full" and ready to be committe...
int $mDefaultKey
The key for getText()
array $mDiffs
Array of diffs.
const XDL_BDOP_INS
Constants from xdiff.h.
MediaWiki exception.
Base class for general text storage via the "object" flag in old_flags, or two-part external storage ...
$header