MediaWiki 1.41.2
DiffHistoryBlob.php
Go to the documentation of this file.
1<?php
30class DiffHistoryBlob implements HistoryBlob {
32 public $mItems = [];
33
35 public $mSize = 0;
36
45 public $mDiffs;
46
48 public $mDiffMap;
49
53
56
58 public $mFrozen = false;
59
64 public $mMaxSize = 10000000;
65
67 public $mMaxCount = 100;
68
70 private const XDL_BDOP_INS = 1;
71 private const XDL_BDOP_CPY = 2;
72 private const XDL_BDOP_INSB = 3;
73
74 public function __construct() {
75 if ( !function_exists( 'gzdeflate' ) ) {
76 throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
77 }
78 }
79
85 public function addItem( $text ) {
86 if ( $this->mFrozen ) {
87 throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
88 }
89
90 $this->mItems[] = $text;
91 $this->mSize += strlen( $text );
92 $this->mDiffs = null; // later
93 return (string)( count( $this->mItems ) - 1 );
94 }
95
100 public function getItem( $key ) {
101 return $this->mItems[(int)$key];
102 }
103
107 public function setText( $text ) {
108 $this->mDefaultKey = $this->addItem( $text );
109 }
110
114 public function getText() {
115 return $this->getItem( $this->mDefaultKey );
116 }
117
121 private function compress() {
122 if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
123 throw new MWException( "Need xdiff support to write DiffHistoryBlob\n" );
124 }
125 if ( isset( $this->mDiffs ) ) {
126 // Already compressed
127 return;
128 }
129 if ( $this->mItems === [] ) {
130 return;
131 }
132
133 // Create two diff sequences: one for main text and one for small text
134 $sequences = [
135 'small' => [
136 'tail' => '',
137 'diffs' => [],
138 'map' => [],
139 ],
140 'main' => [
141 'tail' => '',
142 'diffs' => [],
143 'map' => [],
144 ],
145 ];
146 $smallFactor = 0.5;
147
148 $mItemsCount = count( $this->mItems );
149 for ( $i = 0; $i < $mItemsCount; $i++ ) {
150 $text = $this->mItems[$i];
151 if ( $i == 0 ) {
152 $seqName = 'main';
153 } else {
154 $mainTail = $sequences['main']['tail'];
155 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
156 $seqName = 'small';
157 } else {
158 $seqName = 'main';
159 }
160 }
161
162 $tail = $sequences[$seqName]['tail'];
163 $diff = $this->diff( $tail, $text );
164 $sequences[$seqName]['diffs'][] = $diff;
165 $sequences[$seqName]['map'][] = $i;
166 $sequences[$seqName]['tail'] = $text;
167 }
168
169 // Knit the sequences together
170 $tail = '';
171 $this->mDiffs = [];
172 $this->mDiffMap = [];
173 foreach ( $sequences as $seq ) {
174 if ( $seq['diffs'] === [] ) {
175 continue;
176 }
177 if ( $tail === '' ) {
178 $this->mDiffs[] = $seq['diffs'][0];
179 } else {
180 $head = $this->patch( '', $seq['diffs'][0] );
181 $this->mDiffs[] = $this->diff( $tail, $head );
182 }
183 $this->mDiffMap[] = $seq['map'][0];
184 $diffsCount = count( $seq['diffs'] );
185 for ( $i = 1; $i < $diffsCount; $i++ ) {
186 $this->mDiffs[] = $seq['diffs'][$i];
187 $this->mDiffMap[] = $seq['map'][$i];
188 }
189 $tail = $seq['tail'];
190 }
191 }
192
198 private function diff( $t1, $t2 ) {
199 return xdiff_string_rabdiff( $t1, $t2 );
200 }
201
207 private function patch( $base, $diff ) {
208 if ( function_exists( 'xdiff_string_bpatch' ) ) {
209 return xdiff_string_bpatch( $base, $diff );
210 }
211
212 # Pure PHP implementation
213
214 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
215
216 # Check the checksum
217 $ofp = $this->xdiffAdler32( $base );
218 if ( $ofp !== substr( $diff, 0, 4 ) ) {
219 wfDebug( __METHOD__ . ": incorrect base checksum" );
220 return false;
221 }
222 if ( $header['csize'] != strlen( $base ) ) {
223 wfDebug( __METHOD__ . ": incorrect base length" );
224 return false;
225 }
226
227 $p = 8;
228 $out = '';
229 while ( $p < strlen( $diff ) ) {
230 $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
231 $op = $x['op'];
232 ++$p;
233 switch ( $op ) {
234 case self::XDL_BDOP_INS:
235 $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
236 $p++;
237 $out .= substr( $diff, $p, $x['size'] );
238 $p += $x['size'];
239 break;
240 case self::XDL_BDOP_INSB:
241 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
242 $p += 4;
243 $out .= substr( $diff, $p, $x['csize'] );
244 $p += $x['csize'];
245 break;
246 case self::XDL_BDOP_CPY:
247 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
248 $p += 8;
249 $out .= substr( $base, $x['off'], $x['csize'] );
250 break;
251 default:
252 wfDebug( __METHOD__ . ": invalid op" );
253 return false;
254 }
255 }
256 return $out;
257 }
258
266 public function xdiffAdler32( $s ) {
267 static $init;
268 if ( $init === null ) {
269 $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
270 }
271
272 // The real Adler-32 checksum of $init is zero, so it initialises the
273 // state to zero, as it is at the start of LibXDiff's checksum
274 // algorithm. Appending the subject string then simulates LibXDiff.
275 return strrev( hash( 'adler32', $init . $s, true ) );
276 }
277
278 private function uncompress() {
279 if ( !$this->mDiffs ) {
280 return;
281 }
282 $tail = '';
283 $mDiffsCount = count( $this->mDiffs );
284 for ( $diffKey = 0; $diffKey < $mDiffsCount; $diffKey++ ) {
285 $textKey = $this->mDiffMap[$diffKey];
286 $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
287 $this->mItems[$textKey] = $text;
288 $tail = $text;
289 }
290 }
291
295 public function __sleep() {
296 $this->compress();
297 if ( $this->mItems === [] ) {
298 $info = false;
299 } else {
300 // Take forward differences to improve the compression ratio for sequences
301 $map = '';
302 $prev = 0;
303 foreach ( $this->mDiffMap as $i ) {
304 if ( $map !== '' ) {
305 $map .= ',';
306 }
307 $map .= $i - $prev;
308 $prev = $i;
309 }
310 $info = [
311 'diffs' => $this->mDiffs,
312 'map' => $map
313 ];
314 }
315 if ( isset( $this->mDefaultKey ) ) {
316 $info['default'] = $this->mDefaultKey;
317 }
318 $this->mCompressed = gzdeflate( serialize( $info ) );
319 return [ 'mCompressed' ];
320 }
321
322 public function __wakeup() {
323 // addItem() doesn't work if mItems is partially filled from mDiffs
324 $this->mFrozen = true;
325 $info = HistoryBlobUtils::unserializeArray( gzinflate( $this->mCompressed ) );
326 $this->mCompressed = null;
327
328 if ( !$info ) {
329 // Empty object
330 return;
331 }
332
333 if ( isset( $info['default'] ) ) {
334 $this->mDefaultKey = $info['default'];
335 }
336 $this->mDiffs = $info['diffs'];
337 if ( isset( $info['base'] ) ) {
338 // Old format
339 $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
340 array_unshift( $this->mDiffs,
341 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
342 $info['base'] );
343 } else {
344 // New format
345 $map = explode( ',', $info['map'] );
346 $cur = 0;
347 $this->mDiffMap = [];
348 foreach ( $map as $i ) {
349 $cur += $i;
350 $this->mDiffMap[] = $cur;
351 }
352 }
353 $this->uncompress();
354 }
355
362 public function isHappy() {
363 return $this->mSize < $this->mMaxSize
364 && count( $this->mItems ) < $this->mMaxCount;
365 }
366
367}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Diff-based history compression Requires xdiff and zlib.
string[] $mItems
Uncompressed item cache.
string null $mCompressed
Compressed storage.
array $mDiffMap
The diff map, see above.
bool $mFrozen
True if the object is locked against further writes.
int $mSize
Total uncompressed size.
xdiffAdler32( $s)
Compute a binary "Adler-32" checksum as defined by LibXDiff, i.e.
int $mMaxSize
The maximum uncompressed size before the object becomes sad Should be less than max_allowed_packet.
int $mMaxCount
The maximum number of text items before the object becomes sad.
isHappy()
Helper function for compression jobs Returns true until the object is "full" and ready to be committe...
array null $mDiffs
Array of diffs.
string $mDefaultKey
The key for getText()
MediaWiki exception.
Base class for general text storage via the "object" flag in old_flags, or two-part external storage ...
$header