MediaWiki REL1_31
DiffEngine.php
Go to the documentation of this file.
1<?php
26
45
46 // Input variables
47 private $from;
48 private $to;
49 private $m;
50 private $n;
51
52 private $tooLong;
53 private $powLimit;
54
55 protected $bailoutComplexity = 0;
56
57 // State variables
60
61 // Output variables
62 public $length;
63 public $removed;
64 public $added;
66
67 function __construct( $tooLong = 2000000, $powLimit = 1.45 ) {
68 $this->tooLong = $tooLong;
69 $this->powLimit = $powLimit;
70 }
71
81 public function diff( $from_lines, $to_lines ) {
82 // Diff and store locally
84
85 // Merge edits when possible
86 $this->shiftBoundaries( $from_lines, $this->removed, $this->added );
87 $this->shiftBoundaries( $to_lines, $this->added, $this->removed );
88
89 // Compute the edit operations.
90 $n_from = count( $from_lines );
91 $n_to = count( $to_lines );
92
93 $edits = [];
94 $xi = $yi = 0;
95 while ( $xi < $n_from || $yi < $n_to ) {
98
99 // Skip matching "snake".
100 $copy = [];
101 while ( $xi < $n_from && $yi < $n_to
102 && !$this->removed[$xi] && !$this->added[$yi]
103 ) {
104 $copy[] = $from_lines[$xi++];
105 ++$yi;
106 }
107 if ( $copy ) {
108 $edits[] = new DiffOpCopy( $copy );
109 }
110
111 // Find deletes & adds.
112 $delete = [];
113 while ( $xi < $n_from && $this->removed[$xi] ) {
114 $delete[] = $from_lines[$xi++];
115 }
116
117 $add = [];
118 while ( $yi < $n_to && $this->added[$yi] ) {
119 $add[] = $to_lines[$yi++];
120 }
121
122 if ( $delete && $add ) {
123 $edits[] = new DiffOpChange( $delete, $add );
124 } elseif ( $delete ) {
125 $edits[] = new DiffOpDelete( $delete );
126 } elseif ( $add ) {
127 $edits[] = new DiffOpAdd( $add );
128 }
129 }
130
131 return $edits;
132 }
133
138 public function setBailoutComplexity( $value ) {
139 $this->bailoutComplexity = $value;
140 }
141
160 $i = 0;
161 $j = 0;
162
163 assert( count( $lines ) == count( $changed ) );
164 $len = count( $lines );
165 $other_len = count( $other_changed );
166
167 while ( 1 ) {
168 /*
169 * Scan forwards to find beginning of another run of changes.
170 * Also keep track of the corresponding point in the other file.
171 *
172 * Throughout this code, $i and $j are adjusted together so that
173 * the first $i elements of $changed and the first $j elements
174 * of $other_changed both contain the same number of zeros
175 * (unchanged lines).
176 * Furthermore, $j is always kept so that $j == $other_len or
177 * $other_changed[$j] == false.
178 */
179 while ( $j < $other_len && $other_changed[$j] ) {
180 $j++;
181 }
182
183 while ( $i < $len && !$changed[$i] ) {
185 $i++;
186 $j++;
187 while ( $j < $other_len && $other_changed[$j] ) {
188 $j++;
189 }
190 }
191
192 if ( $i == $len ) {
193 break;
194 }
195
196 $start = $i;
197
198 // Find the end of this run of changes.
199 while ( ++$i < $len && $changed[$i] ) {
200 continue;
201 }
202
203 do {
204 /*
205 * Record the length of this run of changes, so that
206 * we can later determine whether the run has grown.
207 */
208 $runlength = $i - $start;
209
210 /*
211 * Move the changed region back, so long as the
212 * previous unchanged line matches the last changed one.
213 * This merges with previous changed regions.
214 */
215 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
216 $changed[--$start] = 1;
217 $changed[--$i] = false;
218 while ( $start > 0 && $changed[$start - 1] ) {
219 $start--;
220 }
221 assert( $j > 0 );
222 while ( $other_changed[--$j] ) {
223 continue;
224 }
225 assert( $j >= 0 && !$other_changed[$j] );
226 }
227
228 /*
229 * Set CORRESPONDING to the end of the changed run, at the last
230 * point where it corresponds to a changed run in the other file.
231 * CORRESPONDING == LEN means no such point has been found.
232 */
234
235 /*
236 * Move the changed region forward, so long as the
237 * first changed line matches the following unchanged one.
238 * This merges with following changed regions.
239 * Do this second, so that if there are no merges,
240 * the changed region is moved forward as far as possible.
241 */
242 while ( $i < $len && $lines[$start] == $lines[$i] ) {
243 $changed[$start++] = false;
244 $changed[$i++] = 1;
245 while ( $i < $len && $changed[$i] ) {
246 $i++;
247 }
248
250 $j++;
251 if ( $j < $other_len && $other_changed[$j] ) {
253 while ( $j < $other_len && $other_changed[$j] ) {
254 $j++;
255 }
256 }
257 }
258 } while ( $runlength != $i - $start );
259
260 /*
261 * If possible, move the fully-merged run of changes
262 * back to a corresponding run in the other file.
263 */
264 while ( $corresponding < $i ) {
265 $changed[--$start] = 1;
266 $changed[--$i] = 0;
267 assert( $j > 0 );
268 while ( $other_changed[--$j] ) {
269 continue;
270 }
271 assert( $j >= 0 && !$other_changed[$j] );
272 }
273 }
274 }
275
281 protected function diffInternal( array $from, array $to ) {
282 // remember initial lengths
283 $m = count( $from );
284 $n = count( $to );
285
286 $this->heuristicUsed = false;
287
288 // output
289 $removed = $m > 0 ? array_fill( 0, $m, true ) : [];
290 $added = $n > 0 ? array_fill( 0, $n, true ) : [];
291
292 // reduce the complexity for the next step (intentionally done twice)
293 // remove common tokens at the start
294 $i = 0;
295 while ( $i < $m && $i < $n && $from[$i] === $to[$i] ) {
296 $removed[$i] = $added[$i] = false;
297 unset( $from[$i], $to[$i] );
298 ++$i;
299 }
300
301 // remove common tokens at the end
302 $j = 1;
303 while ( $i + $j <= $m && $i + $j <= $n && $from[$m - $j] === $to[$n - $j] ) {
304 $removed[$m - $j] = $added[$n - $j] = false;
305 unset( $from[$m - $j], $to[$n - $j] );
306 ++$j;
307 }
308
309 $this->from = $newFromIndex = $this->to = $newToIndex = [];
310
311 // remove tokens not in both sequences
312 $shared = [];
313 foreach ( $from as $key ) {
314 $shared[$key] = false;
315 }
316
317 foreach ( $to as $index => &$el ) {
318 if ( array_key_exists( $el, $shared ) ) {
319 // keep it
320 $this->to[] = $el;
321 $shared[$el] = true;
322 $newToIndex[] = $index;
323 }
324 }
325 foreach ( $from as $index => &$el ) {
326 if ( $shared[$el] ) {
327 // keep it
328 $this->from[] = $el;
329 $newFromIndex[] = $index;
330 }
331 }
332
333 unset( $shared, $from, $to );
334
335 $this->m = count( $this->from );
336 $this->n = count( $this->to );
337
338 if ( $this->bailoutComplexity > 0 && $this->m * $this->n > $this->bailoutComplexity ) {
339 throw new ComplexityException();
340 }
341
342 $this->removed = $this->m > 0 ? array_fill( 0, $this->m, true ) : [];
343 $this->added = $this->n > 0 ? array_fill( 0, $this->n, true ) : [];
344
345 if ( $this->m == 0 || $this->n == 0 ) {
346 $this->length = 0;
347 } else {
348 $this->maxDifferences = ceil( ( $this->m + $this->n ) / 2.0 );
349 if ( $this->m * $this->n > $this->tooLong ) {
350 // limit complexity to D^POW_LIMIT for long sequences
351 $this->maxDifferences = floor( pow( $this->maxDifferences, $this->powLimit - 1.0 ) );
352 wfDebug( "Limiting max number of differences to $this->maxDifferences\n" );
353 }
354
355 /*
356 * The common prefixes and suffixes are always part of some LCS, include
357 * them now to reduce our search space
358 */
359 $max = min( $this->m, $this->n );
360 for ( $forwardBound = 0; $forwardBound < $max
361 && $this->from[$forwardBound] === $this->to[$forwardBound];
363 ) {
364 $this->removed[$forwardBound] = $this->added[$forwardBound] = false;
365 }
366
367 $backBoundL1 = $this->m - 1;
368 $backBoundL2 = $this->n - 1;
369
371 && $this->from[$backBoundL1] === $this->to[$backBoundL2]
372 ) {
373 $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false;
374 }
375
376 $temp = array_fill( 0, $this->m + $this->n + 1, 0 );
377 $V = [ $temp, $temp ];
378 $snake = [ 0, 0, 0 ];
379
380 $this->length = $forwardBound + $this->m - $backBoundL1 - 1
381 + $this->lcs_rec(
385 $backBoundL2,
386 $V,
387 $snake
388 );
389 }
390
391 $this->m = $m;
392 $this->n = $n;
393
394 $this->length += $i + $j - 1;
395
396 foreach ( $this->removed as $key => &$removed_elem ) {
397 if ( !$removed_elem ) {
398 $removed[$newFromIndex[$key]] = false;
399 }
400 }
401 foreach ( $this->added as $key => &$added_elem ) {
402 if ( !$added_elem ) {
403 $added[$newToIndex[$key]] = false;
404 }
405 }
406 $this->removed = $removed;
407 $this->added = $added;
408 }
409
411 // Diff and store locally
412 $this->diff( $from_lines, $to_lines );
414
415 $ranges = [];
416 $xi = $yi = 0;
417 while ( $xi < $this->m || $yi < $this->n ) {
418 // Matching "snake".
419 while ( $xi < $this->m && $yi < $this->n
420 && !$this->removed[$xi]
421 && !$this->added[$yi]
422 ) {
423 ++$xi;
424 ++$yi;
425 }
426 // Find deletes & adds.
427 $xstart = $xi;
428 while ( $xi < $this->m && $this->removed[$xi] ) {
429 ++$xi;
430 }
431
432 $ystart = $yi;
433 while ( $yi < $this->n && $this->added[$yi] ) {
434 ++$yi;
435 }
436
437 if ( $xi > $xstart || $yi > $ystart ) {
439 }
440 }
441
442 return $ranges;
443 }
444
445 private function lcs_rec( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake ) {
446 // check that both sequences are non-empty
447 if ( $bottoml1 > $topl1 || $bottoml2 > $topl2 ) {
448 return 0;
449 }
450
452 $topl2, $V, $snake );
453
454 // need to store these so we don't lose them when they're
455 // overwritten by the recursion
456 $len = $snake[2];
457 $startx = $snake[0];
458 $starty = $snake[1];
459
460 // the middle snake is part of the LCS, store it
461 for ( $i = 0; $i < $len; ++$i ) {
462 $this->removed[$startx + $i] = $this->added[$starty + $i] = false;
463 }
464
465 if ( $d > 1 ) {
466 return $len
467 + $this->lcs_rec( $bottoml1, $startx - 1, $bottoml2,
468 $starty - 1, $V, $snake )
469 + $this->lcs_rec( $startx + $len, $topl1, $starty + $len,
470 $topl2, $V, $snake );
471 } elseif ( $d == 1 ) {
472 /*
473 * In this case the sequences differ by exactly 1 line. We have
474 * already saved all the lines after the difference in the for loop
475 * above, now we need to save all the lines before the difference.
476 */
477 $max = min( $startx - $bottoml1, $starty - $bottoml2 );
478 for ( $i = 0; $i < $max; ++$i ) {
479 $this->removed[$bottoml1 + $i] =
480 $this->added[$bottoml2 + $i] = false;
481 }
482
483 return $max + $len;
484 }
485
486 return $len;
487 }
488
491 $to = &$this->to;
492 $V0 = &$V[0];
493 $V1 = &$V[1];
494 $snake0 = &$snake[0];
495 $snake1 = &$snake[1];
496 $snake2 = &$snake[2];
501 $delta = $N - $M;
504 $limit = min( $this->maxDifferences, ceil( ( $N + $M ) / 2 ) );
505
506 // value_to_add_forward: a 0 or 1 that we add to the start
507 // offset to make it odd/even
508 if ( ( $M & 1 ) == 1 ) {
510 } else {
512 }
513
514 if ( ( $N & 1 ) == 1 ) {
516 } else {
518 }
519
524
525 $limit_min_1 = $limit - 1;
526 $limit_plus_1 = $limit + 1;
527
528 $V0[$limit_plus_1] = 0;
530 $limit = min( $this->maxDifferences, ceil( ( $N + $M ) / 2 ) );
531
532 if ( ( $delta & 1 ) == 1 ) {
533 for ( $d = 0; $d <= $limit; ++$d ) {
535 $end_diag = min( $end_forward, $d );
537
538 // compute forward furthest reaching paths
539 for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
540 if ( $k == -$d || ( $k < $d
542 ) {
543 $x = $V0[$limit_plus_1 + $k];
544 } else {
545 $x = $V0[$limit_min_1 + $k] + 1;
546 }
547
549 $absy = $snake1 = $x - $k + $bottoml2;
550
551 while ( $absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy] ) {
552 ++$absx;
553 ++$absy;
554 }
555 $x = $absx - $bottoml1;
556
558 $V0[$limit + $k] = $x;
559 if ( $k >= $delta - $d + 1 && $k <= $delta + $d - 1
560 && $x >= $V1[$limit + $k - $delta]
561 ) {
562 return 2 * $d - 1;
563 }
564
565 // check to see if we can cut down the diagonal range
566 if ( $x >= $N && $end_forward > $k - 1 ) {
567 $end_forward = $k - 1;
568 } elseif ( $absy - $bottoml2 >= $M ) {
569 $start_forward = $k + 1;
571 }
572 }
573
575 $end_diag = min( $end_backward, $d );
577
578 // compute backward furthest reaching paths
579 for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
580 if ( $k == $d
581 || ( $k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k] )
582 ) {
583 $x = $V1[$limit_min_1 + $k];
584 } else {
585 $x = $V1[$limit_plus_1 + $k] - 1;
586 }
587
588 $y = $x - $k - $delta;
589
590 $snake2 = 0;
591 while ( $x > 0 && $y > 0
593 ) {
594 --$x;
595 --$y;
596 ++$snake2;
597 }
598 $V1[$limit + $k] = $x;
599
600 // check to see if we can cut down our diagonal range
601 if ( $x <= 0 ) {
602 $start_backward = $k + 1;
605 $end_backward = $k - 1;
606 }
607 }
608 }
609 } else {
610 for ( $d = 0; $d <= $limit; ++$d ) {
612 $end_diag = min( $end_forward, $d );
614
615 // compute forward furthest reaching paths
616 for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
617 if ( $k == -$d
618 || ( $k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k] )
619 ) {
620 $x = $V0[$limit_plus_1 + $k];
621 } else {
622 $x = $V0[$limit_min_1 + $k] + 1;
623 }
624
626 $absy = $snake1 = $x - $k + $bottoml2;
627
628 while ( $absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy] ) {
629 ++$absx;
630 ++$absy;
631 }
632 $x = $absx - $bottoml1;
634 $V0[$limit + $k] = $x;
635
636 // check to see if we can cut down the diagonal range
637 if ( $x >= $N && $end_forward > $k - 1 ) {
638 $end_forward = $k - 1;
639 } elseif ( $absy - $bottoml2 >= $M ) {
640 $start_forward = $k + 1;
642 }
643 }
644
646 $end_diag = min( $end_backward, $d );
648
649 // compute backward furthest reaching paths
650 for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
651 if ( $k == $d
652 || ( $k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k] )
653 ) {
654 $x = $V1[$limit_min_1 + $k];
655 } else {
656 $x = $V1[$limit_plus_1 + $k] - 1;
657 }
658
659 $y = $x - $k - $delta;
660
661 $snake2 = 0;
662 while ( $x > 0 && $y > 0
664 ) {
665 --$x;
666 --$y;
667 ++$snake2;
668 }
669 $V1[$limit + $k] = $x;
670
671 if ( $k >= -$delta - $d && $k <= $d - $delta
672 && $x <= $V0[$limit + $k + $delta]
673 ) {
674 $snake0 = $bottoml1 + $x;
675 $snake1 = $bottoml2 + $y;
676
677 return 2 * $d;
678 }
679
680 // check to see if we can cut down our diagonal range
681 if ( $x <= 0 ) {
682 $start_backward = $k + 1;
685 $end_backward = $k - 1;
686 }
687 }
688 }
689 }
690 /*
691 * computing the true LCS is too expensive, instead find the diagonal
692 * with the most progress and pretend a midle snake of length 0 occurs
693 * there.
694 */
695
697
700 $snake2 = 0;
701 wfDebug( "Computing the LCS is too expensive. Using a heuristic.\n" );
702 $this->heuristicUsed = true;
703
704 return 5; /*
705 * HACK: since we didn't really finish the LCS computation
706 * we don't really know the length of the SES. We don't do
707 * anything with the result anyway, unless it's <=1. We know
708 * for a fact SES > 1 so 5 is as good a number as any to
709 * return here
710 */
711 }
712
713 private static function findMostProgress( $M, $N, $limit, $V ) {
714 $delta = $N - $M;
715
716 if ( ( $M & 1 ) == ( $limit & 1 ) ) {
717 $forward_start_diag = max( -$M, -$limit );
718 } else {
719 $forward_start_diag = max( 1 - $M, -$limit );
720 }
721
722 $forward_end_diag = min( $N, $limit );
723
724 if ( ( $N & 1 ) == ( $limit & 1 ) ) {
725 $backward_start_diag = max( -$N, -$limit );
726 } else {
727 $backward_start_diag = max( 1 - $N, -$limit );
728 }
729
730 $backward_end_diag = -min( $M, $limit );
731
732 $temp = [ 0, 0, 0 ];
733
736 $num_progress = 0; // the 1st entry is current, it is initialized
737 // with 0s
738
739 // first search the forward diagonals
740 for ( $k = $forward_start_diag; $k <= $forward_end_diag; $k += 2 ) {
741 $x = $V[0][$limit + $k];
742 $y = $x - $k;
743 if ( $x > $N || $y > $M ) {
744 continue;
745 }
746
747 $progress = $x + $y;
748 if ( $progress > $max_progress[0][2] ) {
749 $num_progress = 0;
750 $max_progress[0][0] = $x;
751 $max_progress[0][1] = $y;
752 $max_progress[0][2] = $progress;
753 } elseif ( $progress == $max_progress[0][2] ) {
757 $max_progress[$num_progress][2] = $progress;
758 }
759 }
760
761 $max_progress_forward = true; // initially the maximum
762 // progress is in the forward
763 // direction
764
765 // now search the backward diagonals
766 for ( $k = $backward_start_diag; $k <= $backward_end_diag; $k += 2 ) {
767 $x = $V[1][$limit + $k];
768 $y = $x - $k - $delta;
769 if ( $x < 0 || $y < 0 ) {
770 continue;
771 }
772
773 $progress = $N - $x + $M - $y;
774 if ( $progress > $max_progress[0][2] ) {
775 $num_progress = 0;
776 $max_progress_forward = false;
777 $max_progress[0][0] = $x;
778 $max_progress[0][1] = $y;
779 $max_progress[0][2] = $progress;
780 } elseif ( $progress == $max_progress[0][2] && !$max_progress_forward ) {
784 $max_progress[$num_progress][2] = $progress;
785 }
786 }
787
788 // return the middle diagonal with maximal progress.
789 return $max_progress[(int)floor( $num_progress / 2 )];
790 }
791
795 public function getLcsLength() {
796 if ( $this->heuristicUsed && !$this->lcsLengthCorrectedForHeuristic ) {
797 $this->lcsLengthCorrectedForHeuristic = true;
798 $this->length = $this->m - array_sum( $this->added );
799 }
800
801 return $this->length;
802 }
803
804}
805
813
816
818 public $leftend;
819
822
825
827 public $rightend;
828
831
833 $this->leftstart = $leftstart;
834 $this->leftend = $leftend;
835 $this->leftlength = $leftend - $leftstart;
836 $this->rightstart = $rightstart;
837 $this->rightend = $rightend;
838 $this->rightlength = $rightend - $rightstart;
839 }
840
841}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for and distribution as defined by Sections through of this document Licensor shall mean the copyright owner or entity authorized by the copyright owner that is granting the License Legal Entity shall mean the union of the acting entity and all other entities that control are controlled by or are under common control with that entity For the purposes of this definition control direct or to cause the direction or management of such whether by contract or including but not limited to software source documentation and configuration files Object form shall mean any form resulting from mechanical transformation or translation of a Source including but not limited to compiled object generated and conversions to other media types Work shall mean the work of whether in Source or Object made available under the as indicated by a copyright notice that is included in or attached to the whether in Source or Object that is based or other modifications as a an original work of authorship For the purposes of this Derivative Works shall not include works that remain separable from
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which in turn...
setBailoutComplexity( $value)
Sets the complexity (in comparison operations) that can't be exceeded.
diff_range( $from_lines, $to_lines)
__construct( $tooLong=2000000, $powLimit=1.45)
lcs_rec( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake)
shiftBoundaries(array $lines, array &$changed, array $other_changed)
Adjust inserts/deletes of identical lines to join changes as much as possible.
diff( $from_lines, $to_lines)
Performs diff.
diffInternal(array $from, array $to)
$lcsLengthCorrectedForHeuristic
find_middle_snake( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake)
static findMostProgress( $M, $N, $limit, $V)
Extends DiffOp.
Extends DiffOp.
Extends DiffOp.
Extends DiffOp.
Alternative representation of a set of changes, by the index ranges that are changed.
__construct( $leftstart, $leftend, $rightstart, $rightend)
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
the array() calling protocol came about after MediaWiki 1.4rc1.
also included in $newHeader if any indicating whether we should show just the diff
Definition hooks.txt:1259
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
$lines
Definition router.php:61