MediaWiki REL1_28
HistoryBlob.php
Go to the documentation of this file.
1<?php
28interface HistoryBlob {
38 function addItem( $text );
39
47 function getItem( $key );
48
59 function setText( $text );
60
66 function getText();
67}
68
74 public $mVersion = 0, $mCompressed = false, $mItems = [], $mDefaultHash = '';
75 public $mSize = 0;
76 public $mMaxSize = 10000000;
77 public $mMaxCount = 100;
78
82 public function __construct() {
83 if ( !function_exists( 'gzdeflate' ) ) {
84 throw new MWException( "Need zlib support to read or write this "
85 . "kind of history object (ConcatenatedGzipHistoryBlob)\n" );
86 }
87 }
88
93 public function addItem( $text ) {
94 $this->uncompress();
95 $hash = md5( $text );
96 if ( !isset( $this->mItems[$hash] ) ) {
97 $this->mItems[$hash] = $text;
98 $this->mSize += strlen( $text );
99 }
100 return $hash;
101 }
102
107 public function getItem( $hash ) {
108 $this->uncompress();
109 if ( array_key_exists( $hash, $this->mItems ) ) {
110 return $this->mItems[$hash];
111 } else {
112 return false;
113 }
114 }
115
120 public function setText( $text ) {
121 $this->uncompress();
122 $this->mDefaultHash = $this->addItem( $text );
123 }
124
128 public function getText() {
129 $this->uncompress();
130 return $this->getItem( $this->mDefaultHash );
131 }
132
138 public function removeItem( $hash ) {
139 $this->mSize -= strlen( $this->mItems[$hash] );
140 unset( $this->mItems[$hash] );
141 }
142
146 public function compress() {
147 if ( !$this->mCompressed ) {
148 $this->mItems = gzdeflate( serialize( $this->mItems ) );
149 $this->mCompressed = true;
150 }
151 }
152
156 public function uncompress() {
157 if ( $this->mCompressed ) {
158 $this->mItems = unserialize( gzinflate( $this->mItems ) );
159 $this->mCompressed = false;
160 }
161 }
162
166 function __sleep() {
167 $this->compress();
168 return [ 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' ];
169 }
170
171 function __wakeup() {
172 $this->uncompress();
173 }
174
181 public function isHappy() {
182 return $this->mSize < $this->mMaxSize
183 && count( $this->mItems ) < $this->mMaxCount;
184 }
185}
186
197 protected static $blobCache = [];
198
200 public $mOldId;
201
203 public $mHash;
204
206 public $mRef;
207
212 function __construct( $hash = '', $oldid = 0 ) {
213 $this->mHash = $hash;
214 }
215
221 function setLocation( $id ) {
222 $this->mOldId = $id;
223 }
224
229 function setReferrer( $id ) {
230 $this->mRef = $id;
231 }
232
237 function getReferrer() {
238 return $this->mRef;
239 }
240
244 function getText() {
245 if ( isset( self::$blobCache[$this->mOldId] ) ) {
246 $obj = self::$blobCache[$this->mOldId];
247 } else {
249 $row = $dbr->selectRow(
250 'text',
251 [ 'old_flags', 'old_text' ],
252 [ 'old_id' => $this->mOldId ]
253 );
254
255 if ( !$row ) {
256 return false;
257 }
258
259 $flags = explode( ',', $row->old_flags );
260 if ( in_array( 'external', $flags ) ) {
261 $url = $row->old_text;
262 $parts = explode( '://', $url, 2 );
263 if ( !isset( $parts[1] ) || $parts[1] == '' ) {
264 return false;
265 }
266 $row->old_text = ExternalStore::fetchFromURL( $url );
267
268 }
269
270 if ( !in_array( 'object', $flags ) ) {
271 return false;
272 }
273
274 if ( in_array( 'gzip', $flags ) ) {
275 // This shouldn't happen, but a bug in the compress script
276 // may at times gzip-compress a HistoryBlob object row.
277 $obj = unserialize( gzinflate( $row->old_text ) );
278 } else {
279 $obj = unserialize( $row->old_text );
280 }
281
282 if ( !is_object( $obj ) ) {
283 // Correct for old double-serialization bug.
284 $obj = unserialize( $obj );
285 }
286
287 // Save this item for reference; if pulling many
288 // items in a row we'll likely use it again.
289 $obj->uncompress();
290 self::$blobCache = [ $this->mOldId => $obj ];
291 }
292
293 return $obj->getItem( $this->mHash );
294 }
295
301 function getHash() {
302 return $this->mHash;
303 }
304}
305
316 public $mCurId;
317
321 function __construct( $curid = 0 ) {
322 $this->mCurId = $curid;
323 }
324
331 function setLocation( $id ) {
332 $this->mCurId = $id;
333 }
334
338 function getText() {
340 $row = $dbr->selectRow( 'cur', [ 'cur_text' ], [ 'cur_id' => $this->mCurId ] );
341 if ( !$row ) {
342 return false;
343 }
344 return $row->cur_text;
345 }
346}
347
352class DiffHistoryBlob implements HistoryBlob {
354 public $mItems = [];
355
357 public $mSize = 0;
358
367 public $mDiffs;
368
370 public $mDiffMap;
371
375
378
380 public $mFrozen = false;
381
386 public $mMaxSize = 10000000;
387
389 public $mMaxCount = 100;
390
392 const XDL_BDOP_INS = 1;
393 const XDL_BDOP_CPY = 2;
394 const XDL_BDOP_INSB = 3;
395
396 function __construct() {
397 if ( !function_exists( 'gzdeflate' ) ) {
398 throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
399 }
400 }
401
407 function addItem( $text ) {
408 if ( $this->mFrozen ) {
409 throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
410 }
411
412 $this->mItems[] = $text;
413 $this->mSize += strlen( $text );
414 $this->mDiffs = null; // later
415 return count( $this->mItems ) - 1;
416 }
417
422 function getItem( $key ) {
423 return $this->mItems[$key];
424 }
425
429 function setText( $text ) {
430 $this->mDefaultKey = $this->addItem( $text );
431 }
432
436 function getText() {
437 return $this->getItem( $this->mDefaultKey );
438 }
439
443 function compress() {
444 if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
445 throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" );
446 }
447 if ( isset( $this->mDiffs ) ) {
448 // Already compressed
449 return;
450 }
451 if ( !count( $this->mItems ) ) {
452 // Empty
453 return;
454 }
455
456 // Create two diff sequences: one for main text and one for small text
457 $sequences = [
458 'small' => [
459 'tail' => '',
460 'diffs' => [],
461 'map' => [],
462 ],
463 'main' => [
464 'tail' => '',
465 'diffs' => [],
466 'map' => [],
467 ],
468 ];
469 $smallFactor = 0.5;
470
471 $mItemsCount = count( $this->mItems );
472 for ( $i = 0; $i < $mItemsCount; $i++ ) {
473 $text = $this->mItems[$i];
474 if ( $i == 0 ) {
475 $seqName = 'main';
476 } else {
477 $mainTail = $sequences['main']['tail'];
478 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
479 $seqName = 'small';
480 } else {
481 $seqName = 'main';
482 }
483 }
484 $seq =& $sequences[$seqName];
485 $tail = $seq['tail'];
486 $diff = $this->diff( $tail, $text );
487 $seq['diffs'][] = $diff;
488 $seq['map'][] = $i;
489 $seq['tail'] = $text;
490 }
491 unset( $seq ); // unlink dangerous alias
492
493 // Knit the sequences together
494 $tail = '';
495 $this->mDiffs = [];
496 $this->mDiffMap = [];
497 foreach ( $sequences as $seq ) {
498 if ( !count( $seq['diffs'] ) ) {
499 continue;
500 }
501 if ( $tail === '' ) {
502 $this->mDiffs[] = $seq['diffs'][0];
503 } else {
504 $head = $this->patch( '', $seq['diffs'][0] );
505 $this->mDiffs[] = $this->diff( $tail, $head );
506 }
507 $this->mDiffMap[] = $seq['map'][0];
508 $diffsCount = count( $seq['diffs'] );
509 for ( $i = 1; $i < $diffsCount; $i++ ) {
510 $this->mDiffs[] = $seq['diffs'][$i];
511 $this->mDiffMap[] = $seq['map'][$i];
512 }
513 $tail = $seq['tail'];
514 }
515 }
516
522 function diff( $t1, $t2 ) {
523 # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
524 # "String is not zero-terminated"
525 MediaWiki\suppressWarnings();
526 $diff = xdiff_string_rabdiff( $t1, $t2 ) . '';
527 MediaWiki\restoreWarnings();
528 return $diff;
529 }
530
536 function patch( $base, $diff ) {
537 if ( function_exists( 'xdiff_string_bpatch' ) ) {
538 MediaWiki\suppressWarnings();
539 $text = xdiff_string_bpatch( $base, $diff ) . '';
540 MediaWiki\restoreWarnings();
541 return $text;
542 }
543
544 # Pure PHP implementation
545
546 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
547
548 # Check the checksum if hash extension is available
549 $ofp = $this->xdiffAdler32( $base );
550 if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) {
551 wfDebug( __METHOD__ . ": incorrect base checksum\n" );
552 return false;
553 }
554 if ( $header['csize'] != strlen( $base ) ) {
555 wfDebug( __METHOD__ . ": incorrect base length\n" );
556 return false;
557 }
558
559 $p = 8;
560 $out = '';
561 while ( $p < strlen( $diff ) ) {
562 $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
563 $op = $x['op'];
564 ++$p;
565 switch ( $op ) {
567 $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
568 $p++;
569 $out .= substr( $diff, $p, $x['size'] );
570 $p += $x['size'];
571 break;
573 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
574 $p += 4;
575 $out .= substr( $diff, $p, $x['csize'] );
576 $p += $x['csize'];
577 break;
579 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
580 $p += 8;
581 $out .= substr( $base, $x['off'], $x['csize'] );
582 break;
583 default:
584 wfDebug( __METHOD__ . ": invalid op\n" );
585 return false;
586 }
587 }
588 return $out;
589 }
590
598 function xdiffAdler32( $s ) {
599 if ( !function_exists( 'hash' ) ) {
600 return false;
601 }
602
603 static $init;
604 if ( $init === null ) {
605 $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
606 }
607
608 // The real Adler-32 checksum of $init is zero, so it initialises the
609 // state to zero, as it is at the start of LibXDiff's checksum
610 // algorithm. Appending the subject string then simulates LibXDiff.
611 return strrev( hash( 'adler32', $init . $s, true ) );
612 }
613
614 function uncompress() {
615 if ( !$this->mDiffs ) {
616 return;
617 }
618 $tail = '';
619 $mDiffsCount = count( $this->mDiffs );
620 for ( $diffKey = 0; $diffKey < $mDiffsCount; $diffKey++ ) {
621 $textKey = $this->mDiffMap[$diffKey];
622 $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
623 $this->mItems[$textKey] = $text;
624 $tail = $text;
625 }
626 }
627
631 function __sleep() {
632 $this->compress();
633 if ( !count( $this->mItems ) ) {
634 // Empty object
635 $info = false;
636 } else {
637 // Take forward differences to improve the compression ratio for sequences
638 $map = '';
639 $prev = 0;
640 foreach ( $this->mDiffMap as $i ) {
641 if ( $map !== '' ) {
642 $map .= ',';
643 }
644 $map .= $i - $prev;
645 $prev = $i;
646 }
647 $info = [
648 'diffs' => $this->mDiffs,
649 'map' => $map
650 ];
651 }
652 if ( isset( $this->mDefaultKey ) ) {
653 $info['default'] = $this->mDefaultKey;
654 }
655 $this->mCompressed = gzdeflate( serialize( $info ) );
656 return [ 'mCompressed' ];
657 }
658
659 function __wakeup() {
660 // addItem() doesn't work if mItems is partially filled from mDiffs
661 $this->mFrozen = true;
662 $info = unserialize( gzinflate( $this->mCompressed ) );
663 unset( $this->mCompressed );
664
665 if ( !$info ) {
666 // Empty object
667 return;
668 }
669
670 if ( isset( $info['default'] ) ) {
671 $this->mDefaultKey = $info['default'];
672 }
673 $this->mDiffs = $info['diffs'];
674 if ( isset( $info['base'] ) ) {
675 // Old format
676 $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
677 array_unshift( $this->mDiffs,
678 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
679 $info['base'] );
680 } else {
681 // New format
682 $map = explode( ',', $info['map'] );
683 $cur = 0;
684 $this->mDiffMap = [];
685 foreach ( $map as $i ) {
686 $cur += $i;
687 $this->mDiffMap[] = $cur;
688 }
689 }
690 $this->uncompress();
691 }
692
699 function isHappy() {
700 return $this->mSize < $this->mMaxSize
701 && count( $this->mItems ) < $this->mMaxCount;
702 }
703
704}
serialize()
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Concatenated gzip (CGZ) storage Improves compression ratio by concatenating like objects before gzipp...
compress()
Compress the bulk data in the object.
isHappy()
Helper function for compression jobs Returns true until the object is "full" and ready to be committe...
uncompress()
Uncompress bulk data.
removeItem( $hash)
Remove an item.
Diff-based history compression Requires xdiff 1.5+ and zlib.
diff( $t1, $t2)
patch( $base, $diff)
string $mCompressed
Compressed storage.
array $mDiffMap
The diff map, see above.
bool $mFrozen
True if the object is locked against further writes.
int $mSize
Total uncompressed size.
array $mItems
Uncompressed item cache.
xdiffAdler32( $s)
Compute a binary "Adler-32" checksum as defined by LibXDiff, i.e.
int $mMaxSize
The maximum uncompressed size before the object becomes sad Should be less than max_allowed_packet.
int $mMaxCount
The maximum number of text items before the object becomes sad.
isHappy()
Helper function for compression jobs Returns true until the object is "full" and ready to be committe...
int $mDefaultKey
The key for getText()
array $mDiffs
Array of diffs.
const XDL_BDOP_INS
Constants from xdiff.h.
static fetchFromURL( $url, array $params=[])
Fetch data from given URL.
To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the leftover cur table as the b...
setLocation( $id)
Sets the location (cur_id) of the main object to which this object points.
__construct( $curid=0)
Pointer object for an item within a CGZ blob stored in the text table.
setLocation( $id)
Sets the location (old_id) of the main object to which this object points.
getReferrer()
Gets the location of the referring object.
static array $blobCache
One-step cache variable to hold base blobs; operations that pull multiple revisions may often pull mu...
__construct( $hash='', $oldid=0)
getHash()
Get the content hash.
setReferrer( $id)
Sets the location (old_id) of the referring object.
MediaWiki exception.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
the array() calling protocol came about after MediaWiki 1.4rc1.
also included in $newHeader if any indicating whether we should show just the diff
Definition hooks.txt:1217
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition hooks.txt:2710
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition hooks.txt:886
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
Base class for general text storage via the "object" flag in old_flags, or two-part external storage ...
setText( $text)
Set the "default text" This concept is an odd property of the current DB schema, whereby each text it...
getText()
Get default text.
getItem( $key)
Get item by key, or false if the key is not present.
addItem( $text)
Adds an item of text, returns a stub object which points to the item.
const DB_REPLICA
Definition defines.php:22
$header