MediaWiki  1.23.0
WikiDiff3.php
Go to the documentation of this file.
1 <?php
39 class WikiDiff3 {
40 
41  // Input variables
42  private $from;
43  private $to;
44  private $m;
45  private $n;
46 
47  private $tooLong;
48  private $powLimit;
49 
50  // State variables
51  private $maxDifferences;
53 
54  // Output variables
55  public $length;
56  public $removed;
57  public $added;
59 
60  function __construct( $tooLong = 2000000, $powLimit = 1.45 ) {
61  $this->tooLong = $tooLong;
62  $this->powLimit = $powLimit;
63  }
64 
65  public function diff( /*array*/ $from, /*array*/ $to ) {
66  // remember initial lengths
67  $m = count( $from );
68  $n = count( $to );
69 
70  $this->heuristicUsed = false;
71 
72  // output
73  $removed = $m > 0 ? array_fill( 0, $m, true ) : array();
74  $added = $n > 0 ? array_fill( 0, $n, true ) : array();
75 
76  // reduce the complexity for the next step (intentionally done twice)
77  // remove common tokens at the start
78  $i = 0;
79  while ( $i < $m && $i < $n && $from[$i] === $to[$i] ) {
80  $removed[$i] = $added[$i] = false;
81  unset( $from[$i], $to[$i] );
82  ++$i;
83  }
84 
85  // remove common tokens at the end
86  $j = 1;
87  while ( $i + $j <= $m && $i + $j <= $n && $from[$m - $j] === $to[$n - $j] ) {
88  $removed[$m - $j] = $added[$n - $j] = false;
89  unset( $from[$m - $j], $to[$n - $j] );
90  ++$j;
91  }
92 
93  $this->from = $newFromIndex = $this->to = $newToIndex = array();
94 
95  // remove tokens not in both sequences
96  $shared = array();
97  foreach ( $from as $key ) {
98  $shared[$key] = false;
99  }
100 
101  foreach ( $to as $index => &$el ) {
102  if ( array_key_exists( $el, $shared ) ) {
103  // keep it
104  $this->to[] = $el;
105  $shared[$el] = true;
106  $newToIndex[] = $index;
107  }
108  }
109  foreach ( $from as $index => &$el ) {
110  if ( $shared[$el] ) {
111  // keep it
112  $this->from[] = $el;
113  $newFromIndex[] = $index;
114  }
115  }
116 
117  unset( $shared, $from, $to );
118 
119  $this->m = count( $this->from );
120  $this->n = count( $this->to );
121 
122  $this->removed = $this->m > 0 ? array_fill( 0, $this->m, true ) : array();
123  $this->added = $this->n > 0 ? array_fill( 0, $this->n, true ) : array();
124 
125  if ( $this->m == 0 || $this->n == 0 ) {
126  $this->length = 0;
127  } else {
128  $this->maxDifferences = ceil( ( $this->m + $this->n ) / 2.0 );
129  if ( $this->m * $this->n > $this->tooLong ) {
130  // limit complexity to D^POW_LIMIT for long sequences
131  $this->maxDifferences = floor( pow( $this->maxDifferences, $this->powLimit - 1.0 ) );
132  wfDebug( "Limiting max number of differences to $this->maxDifferences\n" );
133  }
134 
135  /*
136  * The common prefixes and suffixes are always part of some LCS, include
137  * them now to reduce our search space
138  */
139  $max = min( $this->m, $this->n );
140  for ( $forwardBound = 0; $forwardBound < $max
141  && $this->from[$forwardBound] === $this->to[$forwardBound];
142  ++$forwardBound
143  ) {
144  $this->removed[$forwardBound] = $this->added[$forwardBound] = false;
145  }
146 
147  $backBoundL1 = $this->m - 1;
148  $backBoundL2 = $this->n - 1;
149 
150  while ( $backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound
151  && $this->from[$backBoundL1] === $this->to[$backBoundL2]
152  ) {
153  $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false;
154  }
155 
156  $temp = array_fill( 0, $this->m + $this->n + 1, 0 );
157  $V = array( $temp, $temp );
158  $snake = array( 0, 0, 0 );
159 
160  $this->length = $forwardBound + $this->m - $backBoundL1 - 1
161  + $this->lcs_rec(
162  $forwardBound,
163  $backBoundL1,
164  $forwardBound,
165  $backBoundL2,
166  $V,
167  $snake
168  );
169  }
170 
171  $this->m = $m;
172  $this->n = $n;
173 
174  $this->length += $i + $j - 1;
175 
176  foreach ( $this->removed as $key => &$removed_elem ) {
177  if ( !$removed_elem ) {
178  $removed[$newFromIndex[$key]] = false;
179  }
180  }
181  foreach ( $this->added as $key => &$added_elem ) {
182  if ( !$added_elem ) {
183  $added[$newToIndex[$key]] = false;
184  }
185  }
186  $this->removed = $removed;
187  $this->added = $added;
188  }
189 
190  function diff_range( $from_lines, $to_lines ) {
191  // Diff and store locally
192  $this->diff( $from_lines, $to_lines );
193  unset( $from_lines, $to_lines );
194 
195  $ranges = array();
196  $xi = $yi = 0;
197  while ( $xi < $this->m || $yi < $this->n ) {
198  // Matching "snake".
199  while ( $xi < $this->m && $yi < $this->n
200  && !$this->removed[$xi]
201  && !$this->added[$yi]
202  ) {
203  ++$xi;
204  ++$yi;
205  }
206  // Find deletes & adds.
207  $xstart = $xi;
208  while ( $xi < $this->m && $this->removed[$xi] ) {
209  ++$xi;
210  }
211 
212  $ystart = $yi;
213  while ( $yi < $this->n && $this->added[$yi] ) {
214  ++$yi;
215  }
216 
217  if ( $xi > $xstart || $yi > $ystart ) {
218  $ranges[] = new RangeDifference( $xstart, $xi, $ystart, $yi );
219  }
220  }
221 
222  return $ranges;
223  }
224 
225  private function lcs_rec( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake ) {
226  // check that both sequences are non-empty
227  if ( $bottoml1 > $topl1 || $bottoml2 > $topl2 ) {
228  return 0;
229  }
230 
231  $d = $this->find_middle_snake( $bottoml1, $topl1, $bottoml2,
232  $topl2, $V, $snake );
233 
234  // need to store these so we don't lose them when they're
235  // overwritten by the recursion
236  $len = $snake[2];
237  $startx = $snake[0];
238  $starty = $snake[1];
239 
240  // the middle snake is part of the LCS, store it
241  for ( $i = 0; $i < $len; ++$i ) {
242  $this->removed[$startx + $i] = $this->added[$starty + $i] = false;
243  }
244 
245  if ( $d > 1 ) {
246  return $len
247  + $this->lcs_rec( $bottoml1, $startx - 1, $bottoml2,
248  $starty - 1, $V, $snake )
249  + $this->lcs_rec( $startx + $len, $topl1, $starty + $len,
250  $topl2, $V, $snake );
251  } elseif ( $d == 1 ) {
252  /*
253  * In this case the sequences differ by exactly 1 line. We have
254  * already saved all the lines after the difference in the for loop
255  * above, now we need to save all the lines before the difference.
256  */
257  $max = min( $startx - $bottoml1, $starty - $bottoml2 );
258  for ( $i = 0; $i < $max; ++$i ) {
259  $this->removed[$bottoml1 + $i] =
260  $this->added[$bottoml2 + $i] = false;
261  }
262 
263  return $max + $len;
264  }
265 
266  return $len;
267  }
268 
269  private function find_middle_snake( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake ) {
270  $from = &$this->from;
271  $to = &$this->to;
272  $V0 = &$V[0];
273  $V1 = &$V[1];
274  $snake0 = &$snake[0];
275  $snake1 = &$snake[1];
276  $snake2 = &$snake[2];
277  $bottoml1_min_1 = $bottoml1 - 1;
278  $bottoml2_min_1 = $bottoml2 - 1;
279  $N = $topl1 - $bottoml1_min_1;
280  $M = $topl2 - $bottoml2_min_1;
281  $delta = $N - $M;
282  $maxabsx = $N + $bottoml1;
283  $maxabsy = $M + $bottoml2;
284  $limit = min( $this->maxDifferences, ceil( ( $N + $M ) / 2 ) );
285 
286  // value_to_add_forward: a 0 or 1 that we add to the start
287  // offset to make it odd/even
288  if ( ( $M & 1 ) == 1 ) {
289  $value_to_add_forward = 1;
290  } else {
291  $value_to_add_forward = 0;
292  }
293 
294  if ( ( $N & 1 ) == 1 ) {
295  $value_to_add_backward = 1;
296  } else {
297  $value_to_add_backward = 0;
298  }
299 
300  $start_forward = -$M;
301  $end_forward = $N;
302  $start_backward = -$N;
303  $end_backward = $M;
304 
305  $limit_min_1 = $limit - 1;
306  $limit_plus_1 = $limit + 1;
307 
308  $V0[$limit_plus_1] = 0;
309  $V1[$limit_min_1] = $N;
310  $limit = min( $this->maxDifferences, ceil( ( $N + $M ) / 2 ) );
311 
312  if ( ( $delta & 1 ) == 1 ) {
313  for ( $d = 0; $d <= $limit; ++$d ) {
314  $start_diag = max( $value_to_add_forward + $start_forward, -$d );
315  $end_diag = min( $end_forward, $d );
316  $value_to_add_forward = 1 - $value_to_add_forward;
317 
318  // compute forward furthest reaching paths
319  for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
320  if ( $k == -$d || ( $k < $d
321  && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k] )
322  ) {
323  $x = $V0[$limit_plus_1 + $k];
324  } else {
325  $x = $V0[$limit_min_1 + $k] + 1;
326  }
327 
328  $absx = $snake0 = $x + $bottoml1;
329  $absy = $snake1 = $x - $k + $bottoml2;
330 
331  while ( $absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy] ) {
332  ++$absx;
333  ++$absy;
334  }
335  $x = $absx - $bottoml1;
336 
337  $snake2 = $absx - $snake0;
338  $V0[$limit + $k] = $x;
339  if ( $k >= $delta - $d + 1 && $k <= $delta + $d - 1
340  && $x >= $V1[$limit + $k - $delta]
341  ) {
342  return 2 * $d - 1;
343  }
344 
345  // check to see if we can cut down the diagonal range
346  if ( $x >= $N && $end_forward > $k - 1 ) {
347  $end_forward = $k - 1;
348  } elseif ( $absy - $bottoml2 >= $M ) {
349  $start_forward = $k + 1;
350  $value_to_add_forward = 0;
351  }
352  }
353 
354  $start_diag = max( $value_to_add_backward + $start_backward, -$d );
355  $end_diag = min( $end_backward, $d );
356  $value_to_add_backward = 1 - $value_to_add_backward;
357 
358  // compute backward furthest reaching paths
359  for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
360  if ( $k == $d
361  || ( $k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k] )
362  ) {
363  $x = $V1[$limit_min_1 + $k];
364  } else {
365  $x = $V1[$limit_plus_1 + $k] - 1;
366  }
367 
368  $y = $x - $k - $delta;
369 
370  $snake2 = 0;
371  while ( $x > 0 && $y > 0
372  && $from[$x + $bottoml1_min_1] === $to[$y + $bottoml2_min_1]
373  ) {
374  --$x;
375  --$y;
376  ++$snake2;
377  }
378  $V1[$limit + $k] = $x;
379 
380  // check to see if we can cut down our diagonal range
381  if ( $x <= 0 ) {
382  $start_backward = $k + 1;
383  $value_to_add_backward = 0;
384  } elseif ( $y <= 0 && $end_backward > $k - 1 ) {
385  $end_backward = $k - 1;
386  }
387  }
388  }
389  } else {
390  for ( $d = 0; $d <= $limit; ++$d ) {
391  $start_diag = max( $value_to_add_forward + $start_forward, -$d );
392  $end_diag = min( $end_forward, $d );
393  $value_to_add_forward = 1 - $value_to_add_forward;
394 
395  // compute forward furthest reaching paths
396  for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
397  if ( $k == -$d
398  || ( $k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k] )
399  ) {
400  $x = $V0[$limit_plus_1 + $k];
401  } else {
402  $x = $V0[$limit_min_1 + $k] + 1;
403  }
404 
405  $absx = $snake0 = $x + $bottoml1;
406  $absy = $snake1 = $x - $k + $bottoml2;
407 
408  while ( $absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy] ) {
409  ++$absx;
410  ++$absy;
411  }
412  $x = $absx - $bottoml1;
413  $snake2 = $absx - $snake0;
414  $V0[$limit + $k] = $x;
415 
416  // check to see if we can cut down the diagonal range
417  if ( $x >= $N && $end_forward > $k - 1 ) {
418  $end_forward = $k - 1;
419  } elseif ( $absy - $bottoml2 >= $M ) {
420  $start_forward = $k + 1;
421  $value_to_add_forward = 0;
422  }
423  }
424 
425  $start_diag = max( $value_to_add_backward + $start_backward, -$d );
426  $end_diag = min( $end_backward, $d );
427  $value_to_add_backward = 1 - $value_to_add_backward;
428 
429  // compute backward furthest reaching paths
430  for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
431  if ( $k == $d
432  || ( $k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k] )
433  ) {
434  $x = $V1[$limit_min_1 + $k];
435  } else {
436  $x = $V1[$limit_plus_1 + $k] - 1;
437  }
438 
439  $y = $x - $k - $delta;
440 
441  $snake2 = 0;
442  while ( $x > 0 && $y > 0
443  && $from[$x + $bottoml1_min_1] === $to[$y + $bottoml2_min_1]
444  ) {
445  --$x;
446  --$y;
447  ++$snake2;
448  }
449  $V1[$limit + $k] = $x;
450 
451  if ( $k >= -$delta - $d && $k <= $d - $delta
452  && $x <= $V0[$limit + $k + $delta]
453  ) {
454  $snake0 = $bottoml1 + $x;
455  $snake1 = $bottoml2 + $y;
456 
457  return 2 * $d;
458  }
459 
460  // check to see if we can cut down our diagonal range
461  if ( $x <= 0 ) {
462  $start_backward = $k + 1;
463  $value_to_add_backward = 0;
464  } elseif ( $y <= 0 && $end_backward > $k - 1 ) {
465  $end_backward = $k - 1;
466  }
467  }
468  }
469  }
470  /*
471  * computing the true LCS is too expensive, instead find the diagonal
472  * with the most progress and pretend a midle snake of length 0 occurs
473  * there.
474  */
475 
476  $most_progress = self::findMostProgress( $M, $N, $limit, $V );
477 
478  $snake0 = $bottoml1 + $most_progress[0];
479  $snake1 = $bottoml2 + $most_progress[1];
480  $snake2 = 0;
481  wfDebug( "Computing the LCS is too expensive. Using a heuristic.\n" );
482  $this->heuristicUsed = true;
483 
484  return 5; /*
485  * HACK: since we didn't really finish the LCS computation
486  * we don't really know the length of the SES. We don't do
487  * anything with the result anyway, unless it's <=1. We know
488  * for a fact SES > 1 so 5 is as good a number as any to
489  * return here
490  */
491  }
492 
493  private static function findMostProgress( $M, $N, $limit, $V ) {
494  $delta = $N - $M;
495 
496  if ( ( $M & 1 ) == ( $limit & 1 ) ) {
497  $forward_start_diag = max( -$M, -$limit );
498  } else {
499  $forward_start_diag = max( 1 - $M, -$limit );
500  }
501 
502  $forward_end_diag = min( $N, $limit );
503 
504  if ( ( $N & 1 ) == ( $limit & 1 ) ) {
505  $backward_start_diag = max( -$N, -$limit );
506  } else {
507  $backward_start_diag = max( 1 - $N, -$limit );
508  }
509 
510  $backward_end_diag = -min( $M, $limit );
511 
512  $temp = array( 0, 0, 0 );
513 
514  $max_progress = array_fill( 0, ceil( max( $forward_end_diag - $forward_start_diag,
515  $backward_end_diag - $backward_start_diag ) / 2 ), $temp );
516  $num_progress = 0; // the 1st entry is current, it is initialized
517  // with 0s
518 
519  // first search the forward diagonals
520  for ( $k = $forward_start_diag; $k <= $forward_end_diag; $k += 2 ) {
521  $x = $V[0][$limit + $k];
522  $y = $x - $k;
523  if ( $x > $N || $y > $M ) {
524  continue;
525  }
526 
527  $progress = $x + $y;
528  if ( $progress > $max_progress[0][2] ) {
529  $num_progress = 0;
530  $max_progress[0][0] = $x;
531  $max_progress[0][1] = $y;
532  $max_progress[0][2] = $progress;
533  } elseif ( $progress == $max_progress[0][2] ) {
534  ++$num_progress;
535  $max_progress[$num_progress][0] = $x;
536  $max_progress[$num_progress][1] = $y;
537  $max_progress[$num_progress][2] = $progress;
538  }
539  }
540 
541  $max_progress_forward = true; // initially the maximum
542  // progress is in the forward
543  // direction
544 
545  // now search the backward diagonals
546  for ( $k = $backward_start_diag; $k <= $backward_end_diag; $k += 2 ) {
547  $x = $V[1][$limit + $k];
548  $y = $x - $k - $delta;
549  if ( $x < 0 || $y < 0 ) {
550  continue;
551  }
552 
553  $progress = $N - $x + $M - $y;
554  if ( $progress > $max_progress[0][2] ) {
555  $num_progress = 0;
556  $max_progress_forward = false;
557  $max_progress[0][0] = $x;
558  $max_progress[0][1] = $y;
559  $max_progress[0][2] = $progress;
560  } elseif ( $progress == $max_progress[0][2] && !$max_progress_forward ) {
561  ++$num_progress;
562  $max_progress[$num_progress][0] = $x;
563  $max_progress[$num_progress][1] = $y;
564  $max_progress[$num_progress][2] = $progress;
565  }
566  }
567 
568  // return the middle diagonal with maximal progress.
569  return $max_progress[(int)floor( $num_progress / 2 )];
570  }
571 
575  public function getLcsLength() {
576  if ( $this->heuristicUsed && !$this->lcsLengthCorrectedForHeuristic ) {
577  $this->lcsLengthCorrectedForHeuristic = true;
578  $this->length = $this->m - array_sum( $this->added );
579  }
580 
581  return $this->length;
582  }
583 
584 }
585 
593 
595  public $leftstart;
596 
598  public $leftend;
599 
601  public $leftlength;
602 
604  public $rightstart;
605 
607  public $rightend;
608 
610  public $rightlength;
611 
613  $this->leftstart = $leftstart;
614  $this->leftend = $leftend;
615  $this->leftlength = $leftend - $leftstart;
616  $this->rightstart = $rightstart;
617  $this->rightend = $rightend;
618  $this->rightlength = $rightend - $rightstart;
619  }
620 
621 }
WikiDiff3\$maxDifferences
$maxDifferences
Definition: WikiDiff3.php:51
WikiDiff3\$n
$n
Definition: WikiDiff3.php:45
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
WikiDiff3
This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which in turn...
Definition: WikiDiff3.php:39
WikiDiff3\$length
$length
Definition: WikiDiff3.php:55
RangeDifference\$leftend
int $leftend
Definition: WikiDiff3.php:596
RangeDifference\__construct
__construct( $leftstart, $leftend, $rightstart, $rightend)
Definition: WikiDiff3.php:606
WikiDiff3\$lcsLengthCorrectedForHeuristic
$lcsLengthCorrectedForHeuristic
Definition: WikiDiff3.php:52
RangeDifference\$rightstart
int $rightstart
Definition: WikiDiff3.php:600
RangeDifference\$rightlength
int $rightlength
Definition: WikiDiff3.php:604
WikiDiff3\diff
diff($from, $to)
Definition: WikiDiff3.php:65
$limit
if( $sleep) $limit
Definition: importImages.php:99
n
if(! $in) print Initializing normalization quick check tables n
Definition: UtfNormalGenerate.php:36
WikiDiff3\$heuristicUsed
$heuristicUsed
Definition: WikiDiff3.php:58
WikiDiff3\$tooLong
$tooLong
Definition: WikiDiff3.php:47
WikiDiff3\findMostProgress
static findMostProgress( $M, $N, $limit, $V)
Definition: WikiDiff3.php:493
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
WikiDiff3\$to
$to
Definition: WikiDiff3.php:43
WikiDiff3\$m
$m
Definition: WikiDiff3.php:44
WikiDiff3\$removed
$removed
Definition: WikiDiff3.php:56
RangeDifference\$rightend
int $rightend
Definition: WikiDiff3.php:602
RangeDifference\$leftlength
int $leftlength
Definition: WikiDiff3.php:598
wfDebug
wfDebug( $text, $dest='all')
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:933
WikiDiff3\find_middle_snake
find_middle_snake( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake)
Definition: WikiDiff3.php:269
WikiDiff3\$added
$added
Definition: WikiDiff3.php:57
WikiDiff3\getLcsLength
getLcsLength()
Definition: WikiDiff3.php:575
WikiDiff3\$powLimit
$powLimit
Definition: WikiDiff3.php:48
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
WikiDiff3\$from
$from
Definition: WikiDiff3.php:42
from
Please log in again after you receive it</td >< td > s a saved copy from
Definition: All_system_messages.txt:3297
RangeDifference\$leftstart
int $leftstart
Definition: WikiDiff3.php:594
WikiDiff3\lcs_rec
lcs_rec( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake)
Definition: WikiDiff3.php:225
RangeDifference
Alternative representation of a set of changes, by the index ranges that are changed.
Definition: WikiDiff3.php:592
WikiDiff3\diff_range
diff_range( $from_lines, $to_lines)
Definition: WikiDiff3.php:190
WikiDiff3\__construct
__construct( $tooLong=2000000, $powLimit=1.45)
Definition: WikiDiff3.php:60