MediaWiki master
DiffHistoryBlob.php
Go to the documentation of this file.
1<?php
30class DiffHistoryBlob implements HistoryBlob {
32 public $mItems = [];
33
35 public $mSize = 0;
36
45 public $mDiffs;
46
48 public $mDiffMap;
49
53
56
58 public $mFrozen = false;
59
64 public $mMaxSize = 10_000_000;
65
67 public $mMaxCount = 100;
68
70 private const XDL_BDOP_INS = 1;
71 private const XDL_BDOP_CPY = 2;
72 private const XDL_BDOP_INSB = 3;
73
74 public function __construct() {
75 if ( !function_exists( 'gzdeflate' ) ) {
76 throw new RuntimeException( "Need zlib support to read or write DiffHistoryBlob\n" );
77 }
78 }
79
84 public function addItem( $text ) {
85 if ( $this->mFrozen ) {
86 throw new BadMethodCallException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
87 }
88
89 $this->mItems[] = $text;
90 $this->mSize += strlen( $text );
91 $this->mDiffs = null; // later
92 return (string)( count( $this->mItems ) - 1 );
93 }
94
99 public function getItem( $key ) {
100 return $this->mItems[(int)$key];
101 }
102
106 public function setText( $text ) {
107 $this->mDefaultKey = $this->addItem( $text );
108 }
109
113 public function getText() {
114 return $this->getItem( $this->mDefaultKey );
115 }
116
117 private function compress() {
118 if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
119 throw new RuntimeException( "Need xdiff support to write DiffHistoryBlob\n" );
120 }
121 if ( isset( $this->mDiffs ) ) {
122 // Already compressed
123 return;
124 }
125 if ( $this->mItems === [] ) {
126 return;
127 }
128
129 // Create two diff sequences: one for main text and one for small text
130 $sequences = [
131 'small' => [
132 'tail' => '',
133 'diffs' => [],
134 'map' => [],
135 ],
136 'main' => [
137 'tail' => '',
138 'diffs' => [],
139 'map' => [],
140 ],
141 ];
142 $smallFactor = 0.5;
143
144 $mItemsCount = count( $this->mItems );
145 for ( $i = 0; $i < $mItemsCount; $i++ ) {
146 $text = $this->mItems[$i];
147 if ( $i == 0 ) {
148 $seqName = 'main';
149 } else {
150 $mainTail = $sequences['main']['tail'];
151 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
152 $seqName = 'small';
153 } else {
154 $seqName = 'main';
155 }
156 }
157
158 $tail = $sequences[$seqName]['tail'];
159 $diff = $this->diff( $tail, $text );
160 $sequences[$seqName]['diffs'][] = $diff;
161 $sequences[$seqName]['map'][] = $i;
162 $sequences[$seqName]['tail'] = $text;
163 }
164
165 // Knit the sequences together
166 $tail = '';
167 $this->mDiffs = [];
168 $this->mDiffMap = [];
169 foreach ( $sequences as $seq ) {
170 if ( $seq['diffs'] === [] ) {
171 continue;
172 }
173 if ( $tail === '' ) {
174 $this->mDiffs[] = $seq['diffs'][0];
175 } else {
176 $head = $this->patch( '', $seq['diffs'][0] );
177 $this->mDiffs[] = $this->diff( $tail, $head );
178 }
179 $this->mDiffMap[] = $seq['map'][0];
180 $diffsCount = count( $seq['diffs'] );
181 for ( $i = 1; $i < $diffsCount; $i++ ) {
182 $this->mDiffs[] = $seq['diffs'][$i];
183 $this->mDiffMap[] = $seq['map'][$i];
184 }
185 $tail = $seq['tail'];
186 }
187 }
188
194 private function diff( $t1, $t2 ) {
195 return xdiff_string_rabdiff( $t1, $t2 );
196 }
197
203 private function patch( $base, $diff ) {
204 if ( function_exists( 'xdiff_string_bpatch' ) ) {
205 return xdiff_string_bpatch( $base, $diff );
206 }
207
208 # Pure PHP implementation
209
210 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
211
212 # Check the checksum
213 $ofp = $this->xdiffAdler32( $base );
214 if ( $ofp !== substr( $diff, 0, 4 ) ) {
215 wfDebug( __METHOD__ . ": incorrect base checksum" );
216 return false;
217 }
218 if ( $header['csize'] != strlen( $base ) ) {
219 wfDebug( __METHOD__ . ": incorrect base length" );
220 return false;
221 }
222
223 $p = 8;
224 $out = '';
225 while ( $p < strlen( $diff ) ) {
226 $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
227 $op = $x['op'];
228 ++$p;
229 switch ( $op ) {
230 case self::XDL_BDOP_INS:
231 $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
232 $p++;
233 $out .= substr( $diff, $p, $x['size'] );
234 $p += $x['size'];
235 break;
236 case self::XDL_BDOP_INSB:
237 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
238 $p += 4;
239 $out .= substr( $diff, $p, $x['csize'] );
240 $p += $x['csize'];
241 break;
242 case self::XDL_BDOP_CPY:
243 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
244 $p += 8;
245 $out .= substr( $base, $x['off'], $x['csize'] );
246 break;
247 default:
248 wfDebug( __METHOD__ . ": invalid op" );
249 return false;
250 }
251 }
252 return $out;
253 }
254
262 public function xdiffAdler32( $s ) {
263 static $init;
264 if ( $init === null ) {
265 $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
266 }
267
268 // The real Adler-32 checksum of $init is zero, so it initialises the
269 // state to zero, as it is at the start of LibXDiff's checksum
270 // algorithm. Appending the subject string then simulates LibXDiff.
271 return strrev( hash( 'adler32', $init . $s, true ) );
272 }
273
274 private function uncompress() {
275 if ( !$this->mDiffs ) {
276 return;
277 }
278 $tail = '';
279 $mDiffsCount = count( $this->mDiffs );
280 for ( $diffKey = 0; $diffKey < $mDiffsCount; $diffKey++ ) {
281 $textKey = $this->mDiffMap[$diffKey];
282 $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
283 $this->mItems[$textKey] = $text;
284 $tail = $text;
285 }
286 }
287
291 public function __sleep() {
292 $this->compress();
293 if ( $this->mItems === [] ) {
294 $info = false;
295 } else {
296 // Take forward differences to improve the compression ratio for sequences
297 $map = '';
298 $prev = 0;
299 foreach ( $this->mDiffMap as $i ) {
300 if ( $map !== '' ) {
301 $map .= ',';
302 }
303 $map .= $i - $prev;
304 $prev = $i;
305 }
306 $info = [
307 'diffs' => $this->mDiffs,
308 'map' => $map
309 ];
310 }
311 if ( isset( $this->mDefaultKey ) ) {
312 $info['default'] = $this->mDefaultKey;
313 }
314 $this->mCompressed = gzdeflate( serialize( $info ) );
315 return [ 'mCompressed' ];
316 }
317
318 public function __wakeup() {
319 // addItem() doesn't work if mItems is partially filled from mDiffs
320 $this->mFrozen = true;
321 $info = HistoryBlobUtils::unserializeArray( gzinflate( $this->mCompressed ) );
322 $this->mCompressed = null;
323
324 if ( !$info ) {
325 // Empty object
326 return;
327 }
328
329 if ( isset( $info['default'] ) ) {
330 $this->mDefaultKey = $info['default'];
331 }
332 $this->mDiffs = $info['diffs'];
333 if ( isset( $info['base'] ) ) {
334 // Old format
335 $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
336 array_unshift( $this->mDiffs,
337 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
338 $info['base'] );
339 } else {
340 // New format
341 $map = explode( ',', $info['map'] );
342 $cur = 0;
343 $this->mDiffMap = [];
344 foreach ( $map as $i ) {
345 $cur += $i;
346 $this->mDiffMap[] = $cur;
347 }
348 }
349 $this->uncompress();
350 }
351
358 public function isHappy() {
359 return $this->mSize < $this->mMaxSize
360 && count( $this->mItems ) < $this->mMaxCount;
361 }
362
363}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Diff-based history compression Requires xdiff and zlib.
string[] $mItems
Uncompressed item cache.
string null $mCompressed
Compressed storage.
array $mDiffMap
The diff map, see above.
bool $mFrozen
True if the object is locked against further writes.
int $mSize
Total uncompressed size.
xdiffAdler32( $s)
Compute a binary "Adler-32" checksum as defined by LibXDiff, i.e.
int $mMaxSize
The maximum uncompressed size before the object becomes sad Should be less than max_allowed_packet.
int $mMaxCount
The maximum number of text items before the object becomes sad.
isHappy()
Helper function for compression jobs Returns true until the object is "full" and ready to be committe...
array null $mDiffs
Array of diffs.
string $mDefaultKey
The key for getText()
Base class for general text storage via the "object" flag in old_flags, or two-part external storage ...
$header