MediaWiki  1.27.2
WikiDiff3.php
Go to the documentation of this file.
1 <?php
39 class WikiDiff3 {
40 
41  // Input variables
42  private $from;
43  private $to;
44  private $m;
45  private $n;
46 
47  private $tooLong;
48  private $powLimit;
49 
50  // State variables
51  private $maxDifferences;
53 
54  // Output variables
55  public $length;
56  public $removed;
57  public $added;
59 
60  function __construct( $tooLong = 2000000, $powLimit = 1.45 ) {
61  $this->tooLong = $tooLong;
62  $this->powLimit = $powLimit;
63  }
64 
65  public function diff( /*array*/ $from, /*array*/ $to ) {
66  // remember initial lengths
67  $m = count( $from );
68  $n = count( $to );
69 
70  $this->heuristicUsed = false;
71 
72  // output
73  $removed = $m > 0 ? array_fill( 0, $m, true ) : [];
74  $added = $n > 0 ? array_fill( 0, $n, true ) : [];
75 
76  // reduce the complexity for the next step (intentionally done twice)
77  // remove common tokens at the start
78  $i = 0;
79  while ( $i < $m && $i < $n && $from[$i] === $to[$i] ) {
80  $removed[$i] = $added[$i] = false;
81  unset( $from[$i], $to[$i] );
82  ++$i;
83  }
84 
85  // remove common tokens at the end
86  $j = 1;
87  while ( $i + $j <= $m && $i + $j <= $n && $from[$m - $j] === $to[$n - $j] ) {
88  $removed[$m - $j] = $added[$n - $j] = false;
89  unset( $from[$m - $j], $to[$n - $j] );
90  ++$j;
91  }
92 
93  $this->from = $newFromIndex = $this->to = $newToIndex = [];
94 
95  // remove tokens not in both sequences
96  $shared = [];
97  foreach ( $from as $key ) {
98  $shared[$key] = false;
99  }
100 
101  foreach ( $to as $index => &$el ) {
102  if ( array_key_exists( $el, $shared ) ) {
103  // keep it
104  $this->to[] = $el;
105  $shared[$el] = true;
106  $newToIndex[] = $index;
107  }
108  }
109  foreach ( $from as $index => &$el ) {
110  if ( $shared[$el] ) {
111  // keep it
112  $this->from[] = $el;
113  $newFromIndex[] = $index;
114  }
115  }
116 
117  unset( $shared, $from, $to );
118 
119  $this->m = count( $this->from );
120  $this->n = count( $this->to );
121 
122  $this->removed = $this->m > 0 ? array_fill( 0, $this->m, true ) : [];
123  $this->added = $this->n > 0 ? array_fill( 0, $this->n, true ) : [];
124 
125  if ( $this->m == 0 || $this->n == 0 ) {
126  $this->length = 0;
127  } else {
128  $this->maxDifferences = ceil( ( $this->m + $this->n ) / 2.0 );
129  if ( $this->m * $this->n > $this->tooLong ) {
130  // limit complexity to D^POW_LIMIT for long sequences
131  $this->maxDifferences = floor( pow( $this->maxDifferences, $this->powLimit - 1.0 ) );
132  wfDebug( "Limiting max number of differences to $this->maxDifferences\n" );
133  }
134 
135  /*
136  * The common prefixes and suffixes are always part of some LCS, include
137  * them now to reduce our search space
138  */
139  $max = min( $this->m, $this->n );
140  for ( $forwardBound = 0; $forwardBound < $max
141  && $this->from[$forwardBound] === $this->to[$forwardBound];
142  ++$forwardBound
143  ) {
144  $this->removed[$forwardBound] = $this->added[$forwardBound] = false;
145  }
146 
147  $backBoundL1 = $this->m - 1;
148  $backBoundL2 = $this->n - 1;
149 
150  while ( $backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound
151  && $this->from[$backBoundL1] === $this->to[$backBoundL2]
152  ) {
153  $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false;
154  }
155 
156  $temp = array_fill( 0, $this->m + $this->n + 1, 0 );
157  $V = [ $temp, $temp ];
158  $snake = [ 0, 0, 0 ];
159 
160  $this->length = $forwardBound + $this->m - $backBoundL1 - 1
161  + $this->lcs_rec(
162  $forwardBound,
163  $backBoundL1,
164  $forwardBound,
165  $backBoundL2,
166  $V,
167  $snake
168  );
169  }
170 
171  $this->m = $m;
172  $this->n = $n;
173 
174  $this->length += $i + $j - 1;
175 
176  foreach ( $this->removed as $key => &$removed_elem ) {
177  if ( !$removed_elem ) {
178  $removed[$newFromIndex[$key]] = false;
179  }
180  }
181  foreach ( $this->added as $key => &$added_elem ) {
182  if ( !$added_elem ) {
183  $added[$newToIndex[$key]] = false;
184  }
185  }
186  $this->removed = $removed;
187  $this->added = $added;
188  }
189 
190  function diff_range( $from_lines, $to_lines ) {
191  // Diff and store locally
192  $this->diff( $from_lines, $to_lines );
193  unset( $from_lines, $to_lines );
194 
195  $ranges = [];
196  $xi = $yi = 0;
197  while ( $xi < $this->m || $yi < $this->n ) {
198  // Matching "snake".
199  while ( $xi < $this->m && $yi < $this->n
200  && !$this->removed[$xi]
201  && !$this->added[$yi]
202  ) {
203  ++$xi;
204  ++$yi;
205  }
206  // Find deletes & adds.
207  $xstart = $xi;
208  while ( $xi < $this->m && $this->removed[$xi] ) {
209  ++$xi;
210  }
211 
212  $ystart = $yi;
213  while ( $yi < $this->n && $this->added[$yi] ) {
214  ++$yi;
215  }
216 
217  if ( $xi > $xstart || $yi > $ystart ) {
218  $ranges[] = new RangeDifference( $xstart, $xi, $ystart, $yi );
219  }
220  }
221 
222  return $ranges;
223  }
224 
225  private function lcs_rec( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake ) {
226  // check that both sequences are non-empty
227  if ( $bottoml1 > $topl1 || $bottoml2 > $topl2 ) {
228  return 0;
229  }
230 
231  $d = $this->find_middle_snake( $bottoml1, $topl1, $bottoml2,
232  $topl2, $V, $snake );
233 
234  // need to store these so we don't lose them when they're
235  // overwritten by the recursion
236  $len = $snake[2];
237  $startx = $snake[0];
238  $starty = $snake[1];
239 
240  // the middle snake is part of the LCS, store it
241  for ( $i = 0; $i < $len; ++$i ) {
242  $this->removed[$startx + $i] = $this->added[$starty + $i] = false;
243  }
244 
245  if ( $d > 1 ) {
246  return $len
247  + $this->lcs_rec( $bottoml1, $startx - 1, $bottoml2,
248  $starty - 1, $V, $snake )
249  + $this->lcs_rec( $startx + $len, $topl1, $starty + $len,
250  $topl2, $V, $snake );
251  } elseif ( $d == 1 ) {
252  /*
253  * In this case the sequences differ by exactly 1 line. We have
254  * already saved all the lines after the difference in the for loop
255  * above, now we need to save all the lines before the difference.
256  */
257  $max = min( $startx - $bottoml1, $starty - $bottoml2 );
258  for ( $i = 0; $i < $max; ++$i ) {
259  $this->removed[$bottoml1 + $i] =
260  $this->added[$bottoml2 + $i] = false;
261  }
262 
263  return $max + $len;
264  }
265 
266  return $len;
267  }
268 
269  private function find_middle_snake( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake ) {
270  $from = &$this->from;
271  $to = &$this->to;
272  $V0 = &$V[0];
273  $V1 = &$V[1];
274  $snake0 = &$snake[0];
275  $snake1 = &$snake[1];
276  $snake2 = &$snake[2];
277  $bottoml1_min_1 = $bottoml1 - 1;
278  $bottoml2_min_1 = $bottoml2 - 1;
279  $N = $topl1 - $bottoml1_min_1;
280  $M = $topl2 - $bottoml2_min_1;
281  $delta = $N - $M;
282  $maxabsx = $N + $bottoml1;
283  $maxabsy = $M + $bottoml2;
284  $limit = min( $this->maxDifferences, ceil( ( $N + $M ) / 2 ) );
285 
286  // value_to_add_forward: a 0 or 1 that we add to the start
287  // offset to make it odd/even
288  if ( ( $M & 1 ) == 1 ) {
289  $value_to_add_forward = 1;
290  } else {
291  $value_to_add_forward = 0;
292  }
293 
294  if ( ( $N & 1 ) == 1 ) {
295  $value_to_add_backward = 1;
296  } else {
297  $value_to_add_backward = 0;
298  }
299 
300  $start_forward = -$M;
301  $end_forward = $N;
302  $start_backward = -$N;
303  $end_backward = $M;
304 
305  $limit_min_1 = $limit - 1;
306  $limit_plus_1 = $limit + 1;
307 
308  $V0[$limit_plus_1] = 0;
309  $V1[$limit_min_1] = $N;
310  $limit = min( $this->maxDifferences, ceil( ( $N + $M ) / 2 ) );
311 
312  if ( ( $delta & 1 ) == 1 ) {
313  for ( $d = 0; $d <= $limit; ++$d ) {
314  $start_diag = max( $value_to_add_forward + $start_forward, -$d );
315  $end_diag = min( $end_forward, $d );
316  $value_to_add_forward = 1 - $value_to_add_forward;
317 
318  // compute forward furthest reaching paths
319  for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
320  if ( $k == -$d || ( $k < $d
321  && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k] )
322  ) {
323  $x = $V0[$limit_plus_1 + $k];
324  } else {
325  $x = $V0[$limit_min_1 + $k] + 1;
326  }
327 
328  $absx = $snake0 = $x + $bottoml1;
329  $absy = $snake1 = $x - $k + $bottoml2;
330 
331  while ( $absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy] ) {
332  ++$absx;
333  ++$absy;
334  }
335  $x = $absx - $bottoml1;
336 
337  $snake2 = $absx - $snake0;
338  $V0[$limit + $k] = $x;
339  if ( $k >= $delta - $d + 1 && $k <= $delta + $d - 1
340  && $x >= $V1[$limit + $k - $delta]
341  ) {
342  return 2 * $d - 1;
343  }
344 
345  // check to see if we can cut down the diagonal range
346  if ( $x >= $N && $end_forward > $k - 1 ) {
347  $end_forward = $k - 1;
348  } elseif ( $absy - $bottoml2 >= $M ) {
349  $start_forward = $k + 1;
350  $value_to_add_forward = 0;
351  }
352  }
353 
354  $start_diag = max( $value_to_add_backward + $start_backward, -$d );
355  $end_diag = min( $end_backward, $d );
356  $value_to_add_backward = 1 - $value_to_add_backward;
357 
358  // compute backward furthest reaching paths
359  for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
360  if ( $k == $d
361  || ( $k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k] )
362  ) {
363  $x = $V1[$limit_min_1 + $k];
364  } else {
365  $x = $V1[$limit_plus_1 + $k] - 1;
366  }
367 
368  $y = $x - $k - $delta;
369 
370  $snake2 = 0;
371  while ( $x > 0 && $y > 0
372  && $from[$x + $bottoml1_min_1] === $to[$y + $bottoml2_min_1]
373  ) {
374  --$x;
375  --$y;
376  ++$snake2;
377  }
378  $V1[$limit + $k] = $x;
379 
380  // check to see if we can cut down our diagonal range
381  if ( $x <= 0 ) {
382  $start_backward = $k + 1;
383  $value_to_add_backward = 0;
384  } elseif ( $y <= 0 && $end_backward > $k - 1 ) {
385  $end_backward = $k - 1;
386  }
387  }
388  }
389  } else {
390  for ( $d = 0; $d <= $limit; ++$d ) {
391  $start_diag = max( $value_to_add_forward + $start_forward, -$d );
392  $end_diag = min( $end_forward, $d );
393  $value_to_add_forward = 1 - $value_to_add_forward;
394 
395  // compute forward furthest reaching paths
396  for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
397  if ( $k == -$d
398  || ( $k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k] )
399  ) {
400  $x = $V0[$limit_plus_1 + $k];
401  } else {
402  $x = $V0[$limit_min_1 + $k] + 1;
403  }
404 
405  $absx = $snake0 = $x + $bottoml1;
406  $absy = $snake1 = $x - $k + $bottoml2;
407 
408  while ( $absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy] ) {
409  ++$absx;
410  ++$absy;
411  }
412  $x = $absx - $bottoml1;
413  $snake2 = $absx - $snake0;
414  $V0[$limit + $k] = $x;
415 
416  // check to see if we can cut down the diagonal range
417  if ( $x >= $N && $end_forward > $k - 1 ) {
418  $end_forward = $k - 1;
419  } elseif ( $absy - $bottoml2 >= $M ) {
420  $start_forward = $k + 1;
421  $value_to_add_forward = 0;
422  }
423  }
424 
425  $start_diag = max( $value_to_add_backward + $start_backward, -$d );
426  $end_diag = min( $end_backward, $d );
427  $value_to_add_backward = 1 - $value_to_add_backward;
428 
429  // compute backward furthest reaching paths
430  for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
431  if ( $k == $d
432  || ( $k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k] )
433  ) {
434  $x = $V1[$limit_min_1 + $k];
435  } else {
436  $x = $V1[$limit_plus_1 + $k] - 1;
437  }
438 
439  $y = $x - $k - $delta;
440 
441  $snake2 = 0;
442  while ( $x > 0 && $y > 0
443  && $from[$x + $bottoml1_min_1] === $to[$y + $bottoml2_min_1]
444  ) {
445  --$x;
446  --$y;
447  ++$snake2;
448  }
449  $V1[$limit + $k] = $x;
450 
451  if ( $k >= -$delta - $d && $k <= $d - $delta
452  && $x <= $V0[$limit + $k + $delta]
453  ) {
454  $snake0 = $bottoml1 + $x;
455  $snake1 = $bottoml2 + $y;
456 
457  return 2 * $d;
458  }
459 
460  // check to see if we can cut down our diagonal range
461  if ( $x <= 0 ) {
462  $start_backward = $k + 1;
463  $value_to_add_backward = 0;
464  } elseif ( $y <= 0 && $end_backward > $k - 1 ) {
465  $end_backward = $k - 1;
466  }
467  }
468  }
469  }
470  /*
471  * computing the true LCS is too expensive, instead find the diagonal
472  * with the most progress and pretend a midle snake of length 0 occurs
473  * there.
474  */
475 
476  $most_progress = self::findMostProgress( $M, $N, $limit, $V );
477 
478  $snake0 = $bottoml1 + $most_progress[0];
479  $snake1 = $bottoml2 + $most_progress[1];
480  $snake2 = 0;
481  wfDebug( "Computing the LCS is too expensive. Using a heuristic.\n" );
482  $this->heuristicUsed = true;
483 
484  return 5; /*
485  * HACK: since we didn't really finish the LCS computation
486  * we don't really know the length of the SES. We don't do
487  * anything with the result anyway, unless it's <=1. We know
488  * for a fact SES > 1 so 5 is as good a number as any to
489  * return here
490  */
491  }
492 
493  private static function findMostProgress( $M, $N, $limit, $V ) {
494  $delta = $N - $M;
495 
496  if ( ( $M & 1 ) == ( $limit & 1 ) ) {
497  $forward_start_diag = max( -$M, -$limit );
498  } else {
499  $forward_start_diag = max( 1 - $M, -$limit );
500  }
501 
502  $forward_end_diag = min( $N, $limit );
503 
504  if ( ( $N & 1 ) == ( $limit & 1 ) ) {
505  $backward_start_diag = max( -$N, -$limit );
506  } else {
507  $backward_start_diag = max( 1 - $N, -$limit );
508  }
509 
510  $backward_end_diag = -min( $M, $limit );
511 
512  $temp = [ 0, 0, 0 ];
513 
514  $max_progress = array_fill( 0, ceil( max( $forward_end_diag - $forward_start_diag,
515  $backward_end_diag - $backward_start_diag ) / 2 ), $temp );
516  $num_progress = 0; // the 1st entry is current, it is initialized
517  // with 0s
518 
519  // first search the forward diagonals
520  for ( $k = $forward_start_diag; $k <= $forward_end_diag; $k += 2 ) {
521  $x = $V[0][$limit + $k];
522  $y = $x - $k;
523  if ( $x > $N || $y > $M ) {
524  continue;
525  }
526 
527  $progress = $x + $y;
528  if ( $progress > $max_progress[0][2] ) {
529  $num_progress = 0;
530  $max_progress[0][0] = $x;
531  $max_progress[0][1] = $y;
532  $max_progress[0][2] = $progress;
533  } elseif ( $progress == $max_progress[0][2] ) {
534  ++$num_progress;
535  $max_progress[$num_progress][0] = $x;
536  $max_progress[$num_progress][1] = $y;
537  $max_progress[$num_progress][2] = $progress;
538  }
539  }
540 
541  $max_progress_forward = true; // initially the maximum
542  // progress is in the forward
543  // direction
544 
545  // now search the backward diagonals
546  for ( $k = $backward_start_diag; $k <= $backward_end_diag; $k += 2 ) {
547  $x = $V[1][$limit + $k];
548  $y = $x - $k - $delta;
549  if ( $x < 0 || $y < 0 ) {
550  continue;
551  }
552 
553  $progress = $N - $x + $M - $y;
554  if ( $progress > $max_progress[0][2] ) {
555  $num_progress = 0;
556  $max_progress_forward = false;
557  $max_progress[0][0] = $x;
558  $max_progress[0][1] = $y;
559  $max_progress[0][2] = $progress;
560  } elseif ( $progress == $max_progress[0][2] && !$max_progress_forward ) {
561  ++$num_progress;
562  $max_progress[$num_progress][0] = $x;
563  $max_progress[$num_progress][1] = $y;
564  $max_progress[$num_progress][2] = $progress;
565  }
566  }
567 
568  // return the middle diagonal with maximal progress.
569  return $max_progress[(int)floor( $num_progress / 2 )];
570  }
571 
575  public function getLcsLength() {
576  if ( $this->heuristicUsed && !$this->lcsLengthCorrectedForHeuristic ) {
577  $this->lcsLengthCorrectedForHeuristic = true;
578  $this->length = $this->m - array_sum( $this->added );
579  }
580 
581  return $this->length;
582  }
583 
584 }
585 
593 
595  public $leftstart;
596 
598  public $leftend;
599 
601  public $leftlength;
602 
604  public $rightstart;
605 
607  public $rightend;
608 
610  public $rightlength;
611 
613  $this->leftstart = $leftstart;
614  $this->leftend = $leftend;
615  $this->leftlength = $leftend - $leftstart;
616  $this->rightstart = $rightstart;
617  $this->rightend = $rightend;
618  $this->rightlength = $rightend - $rightstart;
619  }
620 
621 }
getLcsLength()
Definition: WikiDiff3.php:575
__construct($leftstart, $leftend, $rightstart, $rightend)
Definition: WikiDiff3.php:612
magic word the default is to use $key to get the and $key value or $key value text $key value html to format the value $key
Definition: hooks.txt:2321
diff_range($from_lines, $to_lines)
Definition: WikiDiff3.php:190
Alternative representation of a set of changes, by the index ranges that are changed.
Definition: WikiDiff3.php:592
This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which in turn...
Definition: WikiDiff3.php:39
find_middle_snake($bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake)
Definition: WikiDiff3.php:269
__construct($tooLong=2000000, $powLimit=1.45)
Definition: WikiDiff3.php:60
wfDebug($text, $dest= 'all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for and distribution as defined by Sections through of this document Licensor shall mean the copyright owner or entity authorized by the copyright owner that is granting the License Legal Entity shall mean the union of the acting entity and all other entities that control are controlled by or are under common control with that entity For the purposes of this definition control direct or to cause the direction or management of such whether by contract or including but not limited to software source documentation and configuration files Object form shall mean any form resulting from mechanical transformation or translation of a Source including but not limited to compiled object generated and conversions to other media types Work shall mean the work of whether in Source or Object made available under the as indicated by a copyright notice that is included in or attached to the whether in Source or Object that is based or other modifications as a an original work of authorship For the purposes of this Derivative Works shall not include works that remain separable from
static findMostProgress($M, $N, $limit, $V)
Definition: WikiDiff3.php:493
lcs_rec($bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake)
Definition: WikiDiff3.php:225
while(($__line=Maintenance::readconsole())!==false) print n
Definition: eval.php:64
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
diff($from, $to)
Definition: WikiDiff3.php:65
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object to manipulate or replace but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok inclusive $limit
Definition: hooks.txt:1004
$lcsLengthCorrectedForHeuristic
Definition: WikiDiff3.php:52