MediaWiki 1.39.10
DiffHistoryBlob.php
Go to the documentation of this file.
1<?php
27class DiffHistoryBlob implements HistoryBlob {
29 public $mItems = [];
30
32 public $mSize = 0;
33
42 public $mDiffs;
43
45 public $mDiffMap;
46
50
53
55 public $mFrozen = false;
56
61 public $mMaxSize = 10000000;
62
64 public $mMaxCount = 100;
65
67 private const XDL_BDOP_INS = 1;
68 private const XDL_BDOP_CPY = 2;
69 private const XDL_BDOP_INSB = 3;
70
71 public function __construct() {
72 if ( !function_exists( 'gzdeflate' ) ) {
73 throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
74 }
75 }
76
82 public function addItem( $text ) {
83 if ( $this->mFrozen ) {
84 throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
85 }
86
87 $this->mItems[] = $text;
88 $this->mSize += strlen( $text );
89 $this->mDiffs = null; // later
90 return (string)( count( $this->mItems ) - 1 );
91 }
92
97 public function getItem( $key ) {
98 return $this->mItems[(int)$key];
99 }
100
104 public function setText( $text ) {
105 $this->mDefaultKey = $this->addItem( $text );
106 }
107
111 public function getText() {
112 return $this->getItem( $this->mDefaultKey );
113 }
114
118 private function compress() {
119 if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
120 throw new MWException( "Need xdiff support to write DiffHistoryBlob\n" );
121 }
122 if ( isset( $this->mDiffs ) ) {
123 // Already compressed
124 return;
125 }
126 if ( $this->mItems === [] ) {
127 return;
128 }
129
130 // Create two diff sequences: one for main text and one for small text
131 $sequences = [
132 'small' => [
133 'tail' => '',
134 'diffs' => [],
135 'map' => [],
136 ],
137 'main' => [
138 'tail' => '',
139 'diffs' => [],
140 'map' => [],
141 ],
142 ];
143 $smallFactor = 0.5;
144
145 $mItemsCount = count( $this->mItems );
146 for ( $i = 0; $i < $mItemsCount; $i++ ) {
147 $text = $this->mItems[$i];
148 if ( $i == 0 ) {
149 $seqName = 'main';
150 } else {
151 $mainTail = $sequences['main']['tail'];
152 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
153 $seqName = 'small';
154 } else {
155 $seqName = 'main';
156 }
157 }
158
159 $tail = $sequences[$seqName]['tail'];
160 $diff = $this->diff( $tail, $text );
161 $sequences[$seqName]['diffs'][] = $diff;
162 $sequences[$seqName]['map'][] = $i;
163 $sequences[$seqName]['tail'] = $text;
164 }
165
166 // Knit the sequences together
167 $tail = '';
168 $this->mDiffs = [];
169 $this->mDiffMap = [];
170 foreach ( $sequences as $seq ) {
171 if ( $seq['diffs'] === [] ) {
172 continue;
173 }
174 if ( $tail === '' ) {
175 $this->mDiffs[] = $seq['diffs'][0];
176 } else {
177 $head = $this->patch( '', $seq['diffs'][0] );
178 $this->mDiffs[] = $this->diff( $tail, $head );
179 }
180 $this->mDiffMap[] = $seq['map'][0];
181 $diffsCount = count( $seq['diffs'] );
182 for ( $i = 1; $i < $diffsCount; $i++ ) {
183 $this->mDiffs[] = $seq['diffs'][$i];
184 $this->mDiffMap[] = $seq['map'][$i];
185 }
186 $tail = $seq['tail'];
187 }
188 }
189
195 private function diff( $t1, $t2 ) {
196 return xdiff_string_rabdiff( $t1, $t2 );
197 }
198
204 private function patch( $base, $diff ) {
205 if ( function_exists( 'xdiff_string_bpatch' ) ) {
206 return xdiff_string_bpatch( $base, $diff );
207 }
208
209 # Pure PHP implementation
210
211 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
212
213 # Check the checksum
214 $ofp = $this->xdiffAdler32( $base );
215 if ( $ofp !== substr( $diff, 0, 4 ) ) {
216 wfDebug( __METHOD__ . ": incorrect base checksum" );
217 return false;
218 }
219 if ( $header['csize'] != strlen( $base ) ) {
220 wfDebug( __METHOD__ . ": incorrect base length" );
221 return false;
222 }
223
224 $p = 8;
225 $out = '';
226 while ( $p < strlen( $diff ) ) {
227 $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
228 $op = $x['op'];
229 ++$p;
230 switch ( $op ) {
231 case self::XDL_BDOP_INS:
232 $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
233 $p++;
234 $out .= substr( $diff, $p, $x['size'] );
235 $p += $x['size'];
236 break;
237 case self::XDL_BDOP_INSB:
238 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
239 $p += 4;
240 $out .= substr( $diff, $p, $x['csize'] );
241 $p += $x['csize'];
242 break;
243 case self::XDL_BDOP_CPY:
244 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
245 $p += 8;
246 $out .= substr( $base, $x['off'], $x['csize'] );
247 break;
248 default:
249 wfDebug( __METHOD__ . ": invalid op" );
250 return false;
251 }
252 }
253 return $out;
254 }
255
263 public function xdiffAdler32( $s ) {
264 static $init;
265 if ( $init === null ) {
266 $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
267 }
268
269 // The real Adler-32 checksum of $init is zero, so it initialises the
270 // state to zero, as it is at the start of LibXDiff's checksum
271 // algorithm. Appending the subject string then simulates LibXDiff.
272 return strrev( hash( 'adler32', $init . $s, true ) );
273 }
274
275 private function uncompress() {
276 if ( !$this->mDiffs ) {
277 return;
278 }
279 $tail = '';
280 $mDiffsCount = count( $this->mDiffs );
281 for ( $diffKey = 0; $diffKey < $mDiffsCount; $diffKey++ ) {
282 $textKey = $this->mDiffMap[$diffKey];
283 $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
284 $this->mItems[$textKey] = $text;
285 $tail = $text;
286 }
287 }
288
292 public function __sleep() {
293 $this->compress();
294 if ( $this->mItems === [] ) {
295 $info = false;
296 } else {
297 // Take forward differences to improve the compression ratio for sequences
298 $map = '';
299 $prev = 0;
300 foreach ( $this->mDiffMap as $i ) {
301 if ( $map !== '' ) {
302 $map .= ',';
303 }
304 $map .= $i - $prev;
305 $prev = $i;
306 }
307 $info = [
308 'diffs' => $this->mDiffs,
309 'map' => $map
310 ];
311 }
312 if ( isset( $this->mDefaultKey ) ) {
313 $info['default'] = $this->mDefaultKey;
314 }
315 $this->mCompressed = gzdeflate( serialize( $info ) );
316 return [ 'mCompressed' ];
317 }
318
319 public function __wakeup() {
320 // addItem() doesn't work if mItems is partially filled from mDiffs
321 $this->mFrozen = true;
322 $info = unserialize( gzinflate( $this->mCompressed ) );
323 $this->mCompressed = null;
324
325 if ( !$info ) {
326 // Empty object
327 return;
328 }
329
330 if ( isset( $info['default'] ) ) {
331 $this->mDefaultKey = $info['default'];
332 }
333 $this->mDiffs = $info['diffs'];
334 if ( isset( $info['base'] ) ) {
335 // Old format
336 $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
337 array_unshift( $this->mDiffs,
338 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
339 $info['base'] );
340 } else {
341 // New format
342 $map = explode( ',', $info['map'] );
343 $cur = 0;
344 $this->mDiffMap = [];
345 foreach ( $map as $i ) {
346 $cur += $i;
347 $this->mDiffMap[] = $cur;
348 }
349 }
350 $this->uncompress();
351 }
352
359 public function isHappy() {
360 return $this->mSize < $this->mMaxSize
361 && count( $this->mItems ) < $this->mMaxCount;
362 }
363
364}
serialize()
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Diff-based history compression Requires xdiff and zlib.
string[] $mItems
Uncompressed item cache.
string null $mCompressed
Compressed storage.
array $mDiffMap
The diff map, see above.
bool $mFrozen
True if the object is locked against further writes.
int $mSize
Total uncompressed size.
xdiffAdler32( $s)
Compute a binary "Adler-32" checksum as defined by LibXDiff, i.e.
int $mMaxSize
The maximum uncompressed size before the object becomes sad Should be less than max_allowed_packet.
int $mMaxCount
The maximum number of text items before the object becomes sad.
isHappy()
Helper function for compression jobs Returns true until the object is "full" and ready to be committe...
array null $mDiffs
Array of diffs.
string $mDefaultKey
The key for getText()
MediaWiki exception.
Base class for general text storage via the "object" flag in old_flags, or two-part external storage ...
foreach( $mmfl['setupFiles'] as $fileName) if($queue) if(empty( $mmfl['quiet'])) $s
$header