MediaWiki REL1_31
HistoryBlob.php
Go to the documentation of this file.
1<?php
28interface HistoryBlob {
38 function addItem( $text );
39
47 function getItem( $key );
48
59 function setText( $text );
60
66 function getText();
67}
68
74 public $mVersion = 0, $mCompressed = false, $mItems = [], $mDefaultHash = '';
75 public $mSize = 0;
76 public $mMaxSize = 10000000;
77 public $mMaxCount = 100;
78
79 public function __construct() {
80 if ( !function_exists( 'gzdeflate' ) ) {
81 throw new MWException( "Need zlib support to read or write this "
82 . "kind of history object (ConcatenatedGzipHistoryBlob)\n" );
83 }
84 }
85
90 public function addItem( $text ) {
91 $this->uncompress();
92 $hash = md5( $text );
93 if ( !isset( $this->mItems[$hash] ) ) {
94 $this->mItems[$hash] = $text;
95 $this->mSize += strlen( $text );
96 }
97 return $hash;
98 }
99
104 public function getItem( $hash ) {
105 $this->uncompress();
106 if ( array_key_exists( $hash, $this->mItems ) ) {
107 return $this->mItems[$hash];
108 } else {
109 return false;
110 }
111 }
112
117 public function setText( $text ) {
118 $this->uncompress();
119 $this->mDefaultHash = $this->addItem( $text );
120 }
121
125 public function getText() {
126 $this->uncompress();
127 return $this->getItem( $this->mDefaultHash );
128 }
129
135 public function removeItem( $hash ) {
136 $this->mSize -= strlen( $this->mItems[$hash] );
137 unset( $this->mItems[$hash] );
138 }
139
143 public function compress() {
144 if ( !$this->mCompressed ) {
145 $this->mItems = gzdeflate( serialize( $this->mItems ) );
146 $this->mCompressed = true;
147 }
148 }
149
153 public function uncompress() {
154 if ( $this->mCompressed ) {
155 $this->mItems = unserialize( gzinflate( $this->mItems ) );
156 $this->mCompressed = false;
157 }
158 }
159
163 function __sleep() {
164 $this->compress();
165 return [ 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' ];
166 }
167
168 function __wakeup() {
169 $this->uncompress();
170 }
171
178 public function isHappy() {
179 return $this->mSize < $this->mMaxSize
180 && count( $this->mItems ) < $this->mMaxCount;
181 }
182}
183
194 protected static $blobCache = [];
195
197 public $mOldId;
198
200 public $mHash;
201
203 public $mRef;
204
209 function __construct( $hash = '', $oldid = 0 ) {
210 $this->mHash = $hash;
211 }
212
218 function setLocation( $id ) {
219 $this->mOldId = $id;
220 }
221
226 function setReferrer( $id ) {
227 $this->mRef = $id;
228 }
229
234 function getReferrer() {
235 return $this->mRef;
236 }
237
241 function getText() {
242 if ( isset( self::$blobCache[$this->mOldId] ) ) {
243 $obj = self::$blobCache[$this->mOldId];
244 } else {
246 $row = $dbr->selectRow(
247 'text',
248 [ 'old_flags', 'old_text' ],
249 [ 'old_id' => $this->mOldId ]
250 );
251
252 if ( !$row ) {
253 return false;
254 }
255
256 $flags = explode( ',', $row->old_flags );
257 if ( in_array( 'external', $flags ) ) {
258 $url = $row->old_text;
259 $parts = explode( '://', $url, 2 );
260 if ( !isset( $parts[1] ) || $parts[1] == '' ) {
261 return false;
262 }
263 $row->old_text = ExternalStore::fetchFromURL( $url );
264
265 }
266
267 if ( !in_array( 'object', $flags ) ) {
268 return false;
269 }
270
271 if ( in_array( 'gzip', $flags ) ) {
272 // This shouldn't happen, but a bug in the compress script
273 // may at times gzip-compress a HistoryBlob object row.
274 $obj = unserialize( gzinflate( $row->old_text ) );
275 } else {
276 $obj = unserialize( $row->old_text );
277 }
278
279 if ( !is_object( $obj ) ) {
280 // Correct for old double-serialization bug.
281 $obj = unserialize( $obj );
282 }
283
284 // Save this item for reference; if pulling many
285 // items in a row we'll likely use it again.
286 $obj->uncompress();
287 self::$blobCache = [ $this->mOldId => $obj ];
288 }
289
290 return $obj->getItem( $this->mHash );
291 }
292
298 function getHash() {
299 return $this->mHash;
300 }
301}
302
313 public $mCurId;
314
318 function __construct( $curid = 0 ) {
319 $this->mCurId = $curid;
320 }
321
328 function setLocation( $id ) {
329 $this->mCurId = $id;
330 }
331
335 function getText() {
337 $row = $dbr->selectRow( 'cur', [ 'cur_text' ], [ 'cur_id' => $this->mCurId ] );
338 if ( !$row ) {
339 return false;
340 }
341 return $row->cur_text;
342 }
343}
344
349class DiffHistoryBlob implements HistoryBlob {
351 public $mItems = [];
352
354 public $mSize = 0;
355
364 public $mDiffs;
365
367 public $mDiffMap;
368
372
375
377 public $mFrozen = false;
378
383 public $mMaxSize = 10000000;
384
386 public $mMaxCount = 100;
387
389 const XDL_BDOP_INS = 1;
390 const XDL_BDOP_CPY = 2;
391 const XDL_BDOP_INSB = 3;
392
393 function __construct() {
394 if ( !function_exists( 'gzdeflate' ) ) {
395 throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
396 }
397 }
398
404 function addItem( $text ) {
405 if ( $this->mFrozen ) {
406 throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
407 }
408
409 $this->mItems[] = $text;
410 $this->mSize += strlen( $text );
411 $this->mDiffs = null; // later
412 return count( $this->mItems ) - 1;
413 }
414
419 function getItem( $key ) {
420 return $this->mItems[$key];
421 }
422
426 function setText( $text ) {
427 $this->mDefaultKey = $this->addItem( $text );
428 }
429
433 function getText() {
434 return $this->getItem( $this->mDefaultKey );
435 }
436
440 function compress() {
441 if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
442 throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" );
443 }
444 if ( isset( $this->mDiffs ) ) {
445 // Already compressed
446 return;
447 }
448 if ( !count( $this->mItems ) ) {
449 // Empty
450 return;
451 }
452
453 // Create two diff sequences: one for main text and one for small text
454 $sequences = [
455 'small' => [
456 'tail' => '',
457 'diffs' => [],
458 'map' => [],
459 ],
460 'main' => [
461 'tail' => '',
462 'diffs' => [],
463 'map' => [],
464 ],
465 ];
466 $smallFactor = 0.5;
467
468 $mItemsCount = count( $this->mItems );
469 for ( $i = 0; $i < $mItemsCount; $i++ ) {
470 $text = $this->mItems[$i];
471 if ( $i == 0 ) {
472 $seqName = 'main';
473 } else {
474 $mainTail = $sequences['main']['tail'];
475 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
476 $seqName = 'small';
477 } else {
478 $seqName = 'main';
479 }
480 }
481 $seq =& $sequences[$seqName];
482 $tail = $seq['tail'];
483 $diff = $this->diff( $tail, $text );
484 $seq['diffs'][] = $diff;
485 $seq['map'][] = $i;
486 $seq['tail'] = $text;
487 }
488 unset( $seq ); // unlink dangerous alias
489
490 // Knit the sequences together
491 $tail = '';
492 $this->mDiffs = [];
493 $this->mDiffMap = [];
494 foreach ( $sequences as $seq ) {
495 if ( !count( $seq['diffs'] ) ) {
496 continue;
497 }
498 if ( $tail === '' ) {
499 $this->mDiffs[] = $seq['diffs'][0];
500 } else {
501 $head = $this->patch( '', $seq['diffs'][0] );
502 $this->mDiffs[] = $this->diff( $tail, $head );
503 }
504 $this->mDiffMap[] = $seq['map'][0];
505 $diffsCount = count( $seq['diffs'] );
506 for ( $i = 1; $i < $diffsCount; $i++ ) {
507 $this->mDiffs[] = $seq['diffs'][$i];
508 $this->mDiffMap[] = $seq['map'][$i];
509 }
510 $tail = $seq['tail'];
511 }
512 }
513
519 function diff( $t1, $t2 ) {
520 # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
521 # "String is not zero-terminated"
522 Wikimedia\suppressWarnings();
523 $diff = xdiff_string_rabdiff( $t1, $t2 ) . '';
524 Wikimedia\restoreWarnings();
525 return $diff;
526 }
527
533 function patch( $base, $diff ) {
534 if ( function_exists( 'xdiff_string_bpatch' ) ) {
535 Wikimedia\suppressWarnings();
536 $text = xdiff_string_bpatch( $base, $diff ) . '';
537 Wikimedia\restoreWarnings();
538 return $text;
539 }
540
541 # Pure PHP implementation
542
543 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
544
545 # Check the checksum if hash extension is available
546 $ofp = $this->xdiffAdler32( $base );
547 if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) {
548 wfDebug( __METHOD__ . ": incorrect base checksum\n" );
549 return false;
550 }
551 if ( $header['csize'] != strlen( $base ) ) {
552 wfDebug( __METHOD__ . ": incorrect base length\n" );
553 return false;
554 }
555
556 $p = 8;
557 $out = '';
558 while ( $p < strlen( $diff ) ) {
559 $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
560 $op = $x['op'];
561 ++$p;
562 switch ( $op ) {
564 $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
565 $p++;
566 $out .= substr( $diff, $p, $x['size'] );
567 $p += $x['size'];
568 break;
570 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
571 $p += 4;
572 $out .= substr( $diff, $p, $x['csize'] );
573 $p += $x['csize'];
574 break;
576 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
577 $p += 8;
578 $out .= substr( $base, $x['off'], $x['csize'] );
579 break;
580 default:
581 wfDebug( __METHOD__ . ": invalid op\n" );
582 return false;
583 }
584 }
585 return $out;
586 }
587
595 function xdiffAdler32( $s ) {
596 if ( !function_exists( 'hash' ) ) {
597 return false;
598 }
599
600 static $init;
601 if ( $init === null ) {
602 $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
603 }
604
605 // The real Adler-32 checksum of $init is zero, so it initialises the
606 // state to zero, as it is at the start of LibXDiff's checksum
607 // algorithm. Appending the subject string then simulates LibXDiff.
608 return strrev( hash( 'adler32', $init . $s, true ) );
609 }
610
611 function uncompress() {
612 if ( !$this->mDiffs ) {
613 return;
614 }
615 $tail = '';
616 $mDiffsCount = count( $this->mDiffs );
617 for ( $diffKey = 0; $diffKey < $mDiffsCount; $diffKey++ ) {
618 $textKey = $this->mDiffMap[$diffKey];
619 $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
620 $this->mItems[$textKey] = $text;
621 $tail = $text;
622 }
623 }
624
628 function __sleep() {
629 $this->compress();
630 if ( !count( $this->mItems ) ) {
631 // Empty object
632 $info = false;
633 } else {
634 // Take forward differences to improve the compression ratio for sequences
635 $map = '';
636 $prev = 0;
637 foreach ( $this->mDiffMap as $i ) {
638 if ( $map !== '' ) {
639 $map .= ',';
640 }
641 $map .= $i - $prev;
642 $prev = $i;
643 }
644 $info = [
645 'diffs' => $this->mDiffs,
646 'map' => $map
647 ];
648 }
649 if ( isset( $this->mDefaultKey ) ) {
650 $info['default'] = $this->mDefaultKey;
651 }
652 $this->mCompressed = gzdeflate( serialize( $info ) );
653 return [ 'mCompressed' ];
654 }
655
656 function __wakeup() {
657 // addItem() doesn't work if mItems is partially filled from mDiffs
658 $this->mFrozen = true;
659 $info = unserialize( gzinflate( $this->mCompressed ) );
660 unset( $this->mCompressed );
661
662 if ( !$info ) {
663 // Empty object
664 return;
665 }
666
667 if ( isset( $info['default'] ) ) {
668 $this->mDefaultKey = $info['default'];
669 }
670 $this->mDiffs = $info['diffs'];
671 if ( isset( $info['base'] ) ) {
672 // Old format
673 $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
674 array_unshift( $this->mDiffs,
675 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
676 $info['base'] );
677 } else {
678 // New format
679 $map = explode( ',', $info['map'] );
680 $cur = 0;
681 $this->mDiffMap = [];
682 foreach ( $map as $i ) {
683 $cur += $i;
684 $this->mDiffMap[] = $cur;
685 }
686 }
687 $this->uncompress();
688 }
689
696 function isHappy() {
697 return $this->mSize < $this->mMaxSize
698 && count( $this->mItems ) < $this->mMaxCount;
699 }
700
701}
702
703// phpcs:ignore Generic.CodeAnalysis.UnconditionalIfStatement.Found
704if ( false ) {
705 // Blobs generated by MediaWiki < 1.5 on PHP 4 were serialized with the
706 // class name coerced to lowercase. We can improve efficiency by adding
707 // autoload entries for the lowercase variants of these classes (T166759).
708 // The code below is never executed, but it is picked up by the AutoloadGenerator
709 // parser, which scans for class_alias() calls.
710 class_alias( ConcatenatedGzipHistoryBlob::class, 'concatenatedgziphistoryblob' );
711 class_alias( HistoryBlobCurStub::class, 'historyblobcurstub' );
712 class_alias( HistoryBlobStub::class, 'historyblobstub' );
713}
serialize()
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Concatenated gzip (CGZ) storage Improves compression ratio by concatenating like objects before gzipp...
compress()
Compress the bulk data in the object.
isHappy()
Helper function for compression jobs Returns true until the object is "full" and ready to be committe...
uncompress()
Uncompress bulk data.
removeItem( $hash)
Remove an item.
Diff-based history compression Requires xdiff 1.5+ and zlib.
diff( $t1, $t2)
patch( $base, $diff)
string $mCompressed
Compressed storage.
array $mDiffMap
The diff map, see above.
bool $mFrozen
True if the object is locked against further writes.
int $mSize
Total uncompressed size.
array $mItems
Uncompressed item cache.
xdiffAdler32( $s)
Compute a binary "Adler-32" checksum as defined by LibXDiff, i.e.
int $mMaxSize
The maximum uncompressed size before the object becomes sad Should be less than max_allowed_packet.
int $mMaxCount
The maximum number of text items before the object becomes sad.
isHappy()
Helper function for compression jobs Returns true until the object is "full" and ready to be committe...
int $mDefaultKey
The key for getText()
array $mDiffs
Array of diffs.
const XDL_BDOP_INS
Constants from xdiff.h.
To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the leftover cur table as the b...
setLocation( $id)
Sets the location (cur_id) of the main object to which this object points.
__construct( $curid=0)
Pointer object for an item within a CGZ blob stored in the text table.
setLocation( $id)
Sets the location (old_id) of the main object to which this object points.
getReferrer()
Gets the location of the referring object.
static array $blobCache
One-step cache variable to hold base blobs; operations that pull multiple revisions may often pull mu...
__construct( $hash='', $oldid=0)
getHash()
Get the content hash.
setReferrer( $id)
Sets the location (old_id) of the referring object.
MediaWiki exception.
also included in $newHeader if any indicating whether we should show just the diff
Definition hooks.txt:1259
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition hooks.txt:864
Base class for general text storage via the "object" flag in old_flags, or two-part external storage ...
setText( $text)
Set the "default text" This concept is an odd property of the current DB schema, whereby each text it...
getText()
Get default text.
getItem( $key)
Get item by key, or false if the key is not present.
addItem( $text)
Adds an item of text, returns a stub object which points to the item.
const DB_REPLICA
Definition defines.php:25
$header