MediaWiki  master
DiffHistoryBlob.php
Go to the documentation of this file.
1 <?php
27 class DiffHistoryBlob implements HistoryBlob {
29  public $mItems = [];
30 
32  public $mSize = 0;
33 
42  public $mDiffs;
43 
45  public $mDiffMap;
46 
49  public $mDefaultKey;
50 
52  public $mCompressed;
53 
55  public $mFrozen = false;
56 
61  public $mMaxSize = 10000000;
62 
64  public $mMaxCount = 100;
65 
67  private const XDL_BDOP_INS = 1;
68  private const XDL_BDOP_CPY = 2;
69  private const XDL_BDOP_INSB = 3;
70 
71  public function __construct() {
72  if ( !function_exists( 'gzdeflate' ) ) {
73  throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
74  }
75  }
76 
82  public function addItem( $text ) {
83  if ( $this->mFrozen ) {
84  throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
85  }
86 
87  $this->mItems[] = $text;
88  $this->mSize += strlen( $text );
89  $this->mDiffs = null; // later
90  return (string)( count( $this->mItems ) - 1 );
91  }
92 
97  public function getItem( $key ) {
98  return $this->mItems[(int)$key];
99  }
100 
104  public function setText( $text ) {
105  $this->mDefaultKey = $this->addItem( $text );
106  }
107 
111  public function getText() {
112  return $this->getItem( $this->mDefaultKey );
113  }
114 
118  private function compress() {
119  if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
120  throw new MWException( "Need xdiff support to write DiffHistoryBlob\n" );
121  }
122  if ( isset( $this->mDiffs ) ) {
123  // Already compressed
124  return;
125  }
126  if ( $this->mItems === [] ) {
127  return;
128  }
129 
130  // Create two diff sequences: one for main text and one for small text
131  $sequences = [
132  'small' => [
133  'tail' => '',
134  'diffs' => [],
135  'map' => [],
136  ],
137  'main' => [
138  'tail' => '',
139  'diffs' => [],
140  'map' => [],
141  ],
142  ];
143  $smallFactor = 0.5;
144 
145  $mItemsCount = count( $this->mItems );
146  for ( $i = 0; $i < $mItemsCount; $i++ ) {
147  $text = $this->mItems[$i];
148  if ( $i == 0 ) {
149  $seqName = 'main';
150  } else {
151  $mainTail = $sequences['main']['tail'];
152  if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
153  $seqName = 'small';
154  } else {
155  $seqName = 'main';
156  }
157  }
158 
159  $tail = $sequences[$seqName]['tail'];
160  $diff = $this->diff( $tail, $text );
161  $sequences[$seqName]['diffs'][] = $diff;
162  $sequences[$seqName]['map'][] = $i;
163  $sequences[$seqName]['tail'] = $text;
164  }
165 
166  // Knit the sequences together
167  $tail = '';
168  $this->mDiffs = [];
169  $this->mDiffMap = [];
170  foreach ( $sequences as $seq ) {
171  if ( $seq['diffs'] === [] ) {
172  continue;
173  }
174  if ( $tail === '' ) {
175  $this->mDiffs[] = $seq['diffs'][0];
176  } else {
177  $head = $this->patch( '', $seq['diffs'][0] );
178  $this->mDiffs[] = $this->diff( $tail, $head );
179  }
180  $this->mDiffMap[] = $seq['map'][0];
181  $diffsCount = count( $seq['diffs'] );
182  for ( $i = 1; $i < $diffsCount; $i++ ) {
183  $this->mDiffs[] = $seq['diffs'][$i];
184  $this->mDiffMap[] = $seq['map'][$i];
185  }
186  $tail = $seq['tail'];
187  }
188  }
189 
195  private function diff( $t1, $t2 ) {
196  return xdiff_string_rabdiff( $t1, $t2 );
197  }
198 
204  private function patch( $base, $diff ) {
205  if ( function_exists( 'xdiff_string_bpatch' ) ) {
206  return xdiff_string_bpatch( $base, $diff );
207  }
208 
209  # Pure PHP implementation
210 
211  $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
212 
213  # Check the checksum
214  $ofp = $this->xdiffAdler32( $base );
215  if ( $ofp !== substr( $diff, 0, 4 ) ) {
216  wfDebug( __METHOD__ . ": incorrect base checksum" );
217  return false;
218  }
219  if ( $header['csize'] != strlen( $base ) ) {
220  wfDebug( __METHOD__ . ": incorrect base length" );
221  return false;
222  }
223 
224  $p = 8;
225  $out = '';
226  while ( $p < strlen( $diff ) ) {
227  $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
228  $op = $x['op'];
229  ++$p;
230  switch ( $op ) {
231  case self::XDL_BDOP_INS:
232  $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
233  $p++;
234  $out .= substr( $diff, $p, $x['size'] );
235  $p += $x['size'];
236  break;
237  case self::XDL_BDOP_INSB:
238  $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
239  $p += 4;
240  $out .= substr( $diff, $p, $x['csize'] );
241  $p += $x['csize'];
242  break;
243  case self::XDL_BDOP_CPY:
244  $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
245  $p += 8;
246  $out .= substr( $base, $x['off'], $x['csize'] );
247  break;
248  default:
249  wfDebug( __METHOD__ . ": invalid op" );
250  return false;
251  }
252  }
253  return $out;
254  }
255 
263  public function xdiffAdler32( $s ) {
264  static $init;
265  if ( $init === null ) {
266  $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
267  }
268 
269  // The real Adler-32 checksum of $init is zero, so it initialises the
270  // state to zero, as it is at the start of LibXDiff's checksum
271  // algorithm. Appending the subject string then simulates LibXDiff.
272  return strrev( hash( 'adler32', $init . $s, true ) );
273  }
274 
275  private function uncompress() {
276  if ( !$this->mDiffs ) {
277  return;
278  }
279  $tail = '';
280  $mDiffsCount = count( $this->mDiffs );
281  for ( $diffKey = 0; $diffKey < $mDiffsCount; $diffKey++ ) {
282  $textKey = $this->mDiffMap[$diffKey];
283  $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
284  $this->mItems[$textKey] = $text;
285  $tail = $text;
286  }
287  }
288 
292  public function __sleep() {
293  $this->compress();
294  if ( $this->mItems === [] ) {
295  $info = false;
296  } else {
297  // Take forward differences to improve the compression ratio for sequences
298  $map = '';
299  $prev = 0;
300  foreach ( $this->mDiffMap as $i ) {
301  if ( $map !== '' ) {
302  $map .= ',';
303  }
304  $map .= $i - $prev;
305  $prev = $i;
306  }
307  $info = [
308  'diffs' => $this->mDiffs,
309  'map' => $map
310  ];
311  }
312  if ( isset( $this->mDefaultKey ) ) {
313  $info['default'] = $this->mDefaultKey;
314  }
315  $this->mCompressed = gzdeflate( serialize( $info ) );
316  return [ 'mCompressed' ];
317  }
318 
319  public function __wakeup() {
320  // addItem() doesn't work if mItems is partially filled from mDiffs
321  $this->mFrozen = true;
322  $info = unserialize( gzinflate( $this->mCompressed ) );
323  $this->mCompressed = null;
324 
325  if ( !$info ) {
326  // Empty object
327  return;
328  }
329 
330  if ( isset( $info['default'] ) ) {
331  $this->mDefaultKey = $info['default'];
332  }
333  $this->mDiffs = $info['diffs'];
334  if ( isset( $info['base'] ) ) {
335  // Old format
336  $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
337  array_unshift( $this->mDiffs,
338  pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
339  $info['base'] );
340  } else {
341  // New format
342  $map = explode( ',', $info['map'] );
343  $cur = 0;
344  $this->mDiffMap = [];
345  foreach ( $map as $i ) {
346  $cur += $i;
347  $this->mDiffMap[] = $cur;
348  }
349  }
350  $this->uncompress();
351  }
352 
359  public function isHappy() {
360  return $this->mSize < $this->mMaxSize
361  && count( $this->mItems ) < $this->mMaxCount;
362  }
363 
364 }
serialize()
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Diff-based history compression Requires xdiff and zlib.
patch( $base, $diff)
string[] $mItems
Uncompressed item cache.
string null $mCompressed
Compressed storage.
array $mDiffMap
The diff map, see above.
bool $mFrozen
True if the object is locked against further writes.
int $mSize
Total uncompressed size.
xdiffAdler32( $s)
Compute a binary "Adler-32" checksum as defined by LibXDiff, i.e.
int $mMaxSize
The maximum uncompressed size before the object becomes sad Should be less than max_allowed_packet.
int $mMaxCount
The maximum number of text items before the object becomes sad.
isHappy()
Helper function for compression jobs Returns true until the object is "full" and ready to be committe...
array null $mDiffs
Array of diffs.
const XDL_BDOP_INS
Constants from xdiff.h.
string $mDefaultKey
The key for getText()
MediaWiki exception.
Definition: MWException.php:29
Base class for general text storage via the "object" flag in old_flags, or two-part external storage ...
Definition: HistoryBlob.php:28
foreach( $mmfl['setupFiles'] as $fileName) if( $queue) if(empty( $mmfl['quiet'])) $s
$header