MediaWiki REL1_27
DairikiDiff.php
Go to the documentation of this file.
1<?php
37abstract class DiffOp {
38
42 public $type;
43
47 public $orig;
48
52 public $closing;
53
57 public function getType() {
58 return $this->type;
59 }
60
64 public function getOrig() {
65 return $this->orig;
66 }
67
72 public function getClosing( $i = null ) {
73 if ( $i === null ) {
74 return $this->closing;
75 }
76 if ( array_key_exists( $i, $this->closing ) ) {
77 return $this->closing[$i];
78 }
79 return null;
80 }
81
82 abstract public function reverse();
83
87 public function norig() {
88 return $this->orig ? count( $this->orig ) : 0;
89 }
90
94 public function nclosing() {
95 return $this->closing ? count( $this->closing ) : 0;
96 }
97}
98
106class DiffOpCopy extends DiffOp {
107 public $type = 'copy';
108
109 public function __construct( $orig, $closing = false ) {
110 if ( !is_array( $closing ) ) {
111 $closing = $orig;
112 }
113 $this->orig = $orig;
114 $this->closing = $closing;
115 }
116
120 public function reverse() {
121 return new DiffOpCopy( $this->closing, $this->orig );
122 }
123}
124
132class DiffOpDelete extends DiffOp {
133 public $type = 'delete';
134
135 public function __construct( $lines ) {
136 $this->orig = $lines;
137 $this->closing = false;
138 }
139
143 public function reverse() {
144 return new DiffOpAdd( $this->orig );
145 }
146}
147
155class DiffOpAdd extends DiffOp {
156 public $type = 'add';
157
158 public function __construct( $lines ) {
159 $this->closing = $lines;
160 $this->orig = false;
161 }
162
166 public function reverse() {
167 return new DiffOpDelete( $this->closing );
168 }
169}
170
178class DiffOpChange extends DiffOp {
179 public $type = 'change';
180
181 public function __construct( $orig, $closing ) {
182 $this->orig = $orig;
183 $this->closing = $closing;
184 }
185
189 public function reverse() {
190 return new DiffOpChange( $this->closing, $this->orig );
191 }
192}
193
219 const MAX_XREF_LENGTH = 10000;
220
222
223 protected $xv = [], $yv = [];
224 protected $xind = [], $yind = [];
225
226 protected $seq = [], $in_seq = [];
227
228 protected $lcs = 0;
229
236 public function diff( $from_lines, $to_lines ) {
237
238 // Diff and store locally
239 $this->diffLocal( $from_lines, $to_lines );
240
241 // Merge edits when possible
242 $this->shiftBoundaries( $from_lines, $this->xchanged, $this->ychanged );
243 $this->shiftBoundaries( $to_lines, $this->ychanged, $this->xchanged );
244
245 // Compute the edit operations.
246 $n_from = count( $from_lines );
247 $n_to = count( $to_lines );
248
249 $edits = [];
250 $xi = $yi = 0;
251 while ( $xi < $n_from || $yi < $n_to ) {
252 assert( $yi < $n_to || $this->xchanged[$xi] );
253 assert( $xi < $n_from || $this->ychanged[$yi] );
254
255 // Skip matching "snake".
256 $copy = [];
257 while ( $xi < $n_from && $yi < $n_to
258 && !$this->xchanged[$xi] && !$this->ychanged[$yi]
259 ) {
260 $copy[] = $from_lines[$xi++];
261 ++$yi;
262 }
263 if ( $copy ) {
264 $edits[] = new DiffOpCopy( $copy );
265 }
266
267 // Find deletes & adds.
268 $delete = [];
269 while ( $xi < $n_from && $this->xchanged[$xi] ) {
270 $delete[] = $from_lines[$xi++];
271 }
272
273 $add = [];
274 while ( $yi < $n_to && $this->ychanged[$yi] ) {
275 $add[] = $to_lines[$yi++];
276 }
277
278 if ( $delete && $add ) {
279 $edits[] = new DiffOpChange( $delete, $add );
280 } elseif ( $delete ) {
281 $edits[] = new DiffOpDelete( $delete );
282 } elseif ( $add ) {
283 $edits[] = new DiffOpAdd( $add );
284 }
285 }
286
287 return $edits;
288 }
289
294 private function diffLocal( $from_lines, $to_lines ) {
295 $wikidiff3 = new WikiDiff3();
296 $wikidiff3->diff( $from_lines, $to_lines );
297 $this->xchanged = $wikidiff3->removed;
298 $this->ychanged = $wikidiff3->added;
299 }
300
314 private function shiftBoundaries( $lines, &$changed, $other_changed ) {
315 $i = 0;
316 $j = 0;
317
318 assert( count( $lines ) == count( $changed ) );
319 $len = count( $lines );
320 $other_len = count( $other_changed );
321
322 while ( 1 ) {
323 /*
324 * Scan forwards to find beginning of another run of changes.
325 * Also keep track of the corresponding point in the other file.
326 *
327 * Throughout this code, $i and $j are adjusted together so that
328 * the first $i elements of $changed and the first $j elements
329 * of $other_changed both contain the same number of zeros
330 * (unchanged lines).
331 * Furthermore, $j is always kept so that $j == $other_len or
332 * $other_changed[$j] == false.
333 */
334 while ( $j < $other_len && $other_changed[$j] ) {
335 $j++;
336 }
337
338 while ( $i < $len && !$changed[$i] ) {
339 assert( $j < $other_len && ! $other_changed[$j] );
340 $i++;
341 $j++;
342 while ( $j < $other_len && $other_changed[$j] ) {
343 $j++;
344 }
345 }
346
347 if ( $i == $len ) {
348 break;
349 }
350
351 $start = $i;
352
353 // Find the end of this run of changes.
354 while ( ++$i < $len && $changed[$i] ) {
355 continue;
356 }
357
358 do {
359 /*
360 * Record the length of this run of changes, so that
361 * we can later determine whether the run has grown.
362 */
363 $runlength = $i - $start;
364
365 /*
366 * Move the changed region back, so long as the
367 * previous unchanged line matches the last changed one.
368 * This merges with previous changed regions.
369 */
370 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
371 $changed[--$start] = 1;
372 $changed[--$i] = false;
373 while ( $start > 0 && $changed[$start - 1] ) {
374 $start--;
375 }
376 assert( $j > 0 );
377 while ( $other_changed[--$j] ) {
378 continue;
379 }
380 assert( $j >= 0 && !$other_changed[$j] );
381 }
382
383 /*
384 * Set CORRESPONDING to the end of the changed run, at the last
385 * point where it corresponds to a changed run in the other file.
386 * CORRESPONDING == LEN means no such point has been found.
387 */
388 $corresponding = $j < $other_len ? $i : $len;
389
390 /*
391 * Move the changed region forward, so long as the
392 * first changed line matches the following unchanged one.
393 * This merges with following changed regions.
394 * Do this second, so that if there are no merges,
395 * the changed region is moved forward as far as possible.
396 */
397 while ( $i < $len && $lines[$start] == $lines[$i] ) {
398 $changed[$start++] = false;
399 $changed[$i++] = 1;
400 while ( $i < $len && $changed[$i] ) {
401 $i++;
402 }
403
404 assert( $j < $other_len && ! $other_changed[$j] );
405 $j++;
406 if ( $j < $other_len && $other_changed[$j] ) {
407 $corresponding = $i;
408 while ( $j < $other_len && $other_changed[$j] ) {
409 $j++;
410 }
411 }
412 }
413 } while ( $runlength != $i - $start );
414
415 /*
416 * If possible, move the fully-merged run of changes
417 * back to a corresponding run in the other file.
418 */
419 while ( $corresponding < $i ) {
420 $changed[--$start] = 1;
421 $changed[--$i] = 0;
422 assert( $j > 0 );
423 while ( $other_changed[--$j] ) {
424 continue;
425 }
426 assert( $j >= 0 && !$other_changed[$j] );
427 }
428 }
429 }
430}
431
438class Diff {
439
443 public $edits;
444
453 public function __construct( $from_lines, $to_lines ) {
454 $eng = new DiffEngine;
455 $this->edits = $eng->diff( $from_lines, $to_lines );
456 }
457
461 public function getEdits() {
462 return $this->edits;
463 }
464
476 public function reverse() {
477 $rev = $this;
478 $rev->edits = [];
480 foreach ( $this->edits as $edit ) {
481 $rev->edits[] = $edit->reverse();
482 }
483
484 return $rev;
485 }
486
492 public function isEmpty() {
493 foreach ( $this->edits as $edit ) {
494 if ( $edit->type != 'copy' ) {
495 return false;
496 }
497 }
498
499 return true;
500 }
501
509 public function lcs() {
510 $lcs = 0;
511 foreach ( $this->edits as $edit ) {
512 if ( $edit->type == 'copy' ) {
513 $lcs += count( $edit->orig );
514 }
515 }
516
517 return $lcs;
518 }
519
528 public function orig() {
529 $lines = [];
530
531 foreach ( $this->edits as $edit ) {
532 if ( $edit->orig ) {
533 array_splice( $lines, count( $lines ), 0, $edit->orig );
534 }
535 }
536
537 return $lines;
538 }
539
548 public function closing() {
549 $lines = [];
550
551 foreach ( $this->edits as $edit ) {
552 if ( $edit->closing ) {
553 array_splice( $lines, count( $lines ), 0, $edit->closing );
554 }
555 }
556
557 return $lines;
558 }
559}
560
566class MappedDiff extends Diff {
587 public function __construct( $from_lines, $to_lines,
588 $mapped_from_lines, $mapped_to_lines ) {
589
590 assert( count( $from_lines ) == count( $mapped_from_lines ) );
591 assert( count( $to_lines ) == count( $mapped_to_lines ) );
592
593 parent::__construct( $mapped_from_lines, $mapped_to_lines );
594
595 $xi = $yi = 0;
596 $editCount = count( $this->edits );
597 for ( $i = 0; $i < $editCount; $i++ ) {
598 $orig = &$this->edits[$i]->orig;
599 if ( is_array( $orig ) ) {
600 $orig = array_slice( $from_lines, $xi, count( $orig ) );
601 $xi += count( $orig );
602 }
603
604 $closing = &$this->edits[$i]->closing;
605 if ( is_array( $closing ) ) {
606 $closing = array_slice( $to_lines, $yi, count( $closing ) );
607 $yi += count( $closing );
608 }
609 }
610 }
611}
612
623 public $insClass = ' class="diffchange diffchange-inline"';
624 public $delClass = ' class="diffchange diffchange-inline"';
625
626 private $lines = [];
627 private $line = '';
628 private $group = '';
629 private $tag = '';
630
634 private function flushGroup( $new_tag ) {
635 if ( $this->group !== '' ) {
636 if ( $this->tag == 'ins' ) {
637 $this->line .= "<ins{$this->insClass}>" .
638 htmlspecialchars( $this->group ) . '</ins>';
639 } elseif ( $this->tag == 'del' ) {
640 $this->line .= "<del{$this->delClass}>" .
641 htmlspecialchars( $this->group ) . '</del>';
642 } else {
643 $this->line .= htmlspecialchars( $this->group );
644 }
645 }
646 $this->group = '';
647 $this->tag = $new_tag;
648 }
649
653 private function flushLine( $new_tag ) {
654 $this->flushGroup( $new_tag );
655 if ( $this->line != '' ) {
656 array_push( $this->lines, $this->line );
657 } else {
658 # make empty lines visible by inserting an NBSP
659 array_push( $this->lines, '&#160;' );
660 }
661 $this->line = '';
662 }
663
668 public function addWords( $words, $tag = '' ) {
669 if ( $tag != $this->tag ) {
670 $this->flushGroup( $tag );
671 }
672
673 foreach ( $words as $word ) {
674 // new-line should only come as first char of word.
675 if ( $word == '' ) {
676 continue;
677 }
678 if ( $word[0] == "\n" ) {
679 $this->flushLine( $tag );
680 $word = substr( $word, 1 );
681 }
682 assert( !strstr( $word, "\n" ) );
683 $this->group .= $word;
684 }
685 }
686
690 public function getLines() {
691 $this->flushLine( '~done' );
692
693 return $this->lines;
694 }
695}
696
703 const MAX_LINE_LENGTH = 10000;
704
709 public function __construct( $orig_lines, $closing_lines ) {
710
711 list( $orig_words, $orig_stripped ) = $this->split( $orig_lines );
712 list( $closing_words, $closing_stripped ) = $this->split( $closing_lines );
713
714 parent::__construct( $orig_words, $closing_words,
715 $orig_stripped, $closing_stripped );
716 }
717
723 private function split( $lines ) {
724
725 $words = [];
726 $stripped = [];
727 $first = true;
728 foreach ( $lines as $line ) {
729 # If the line is too long, just pretend the entire line is one big word
730 # This prevents resource exhaustion problems
731 if ( $first ) {
732 $first = false;
733 } else {
734 $words[] = "\n";
735 $stripped[] = "\n";
736 }
737 if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
738 $words[] = $line;
739 $stripped[] = $line;
740 } else {
741 $m = [];
742 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
743 $line, $m )
744 ) {
745 foreach ( $m[0] as $word ) {
746 $words[] = $word;
747 }
748 foreach ( $m[1] as $stripped_word ) {
749 $stripped[] = $stripped_word;
750 }
751 }
752 }
753 }
754
755 return [ $words, $stripped ];
756 }
757
761 public function orig() {
762 $orig = new HWLDFWordAccumulator;
763
764 foreach ( $this->edits as $edit ) {
765 if ( $edit->type == 'copy' ) {
766 $orig->addWords( $edit->orig );
767 } elseif ( $edit->orig ) {
768 $orig->addWords( $edit->orig, 'del' );
769 }
770 }
771 $lines = $orig->getLines();
772
773 return $lines;
774 }
775
779 public function closing() {
780 $closing = new HWLDFWordAccumulator;
781
782 foreach ( $this->edits as $edit ) {
783 if ( $edit->type == 'copy' ) {
784 $closing->addWords( $edit->closing );
785 } elseif ( $edit->closing ) {
786 $closing->addWords( $edit->closing, 'ins' );
787 }
788 }
789 $lines = $closing->getLines();
790
791 return $lines;
792 }
793
794}
$i
Definition Parser.php:1694
$line
Definition cdb.php:59
Class used internally by Diff to actually compute the diffs.
const MAX_XREF_LENGTH
diffLocal( $from_lines, $to_lines)
shiftBoundaries( $lines, &$changed, $other_changed)
Adjust inserts/deletes of identical lines to join changes as much as possible.
diff( $from_lines, $to_lines)
Extends DiffOp.
__construct( $lines)
Extends DiffOp.
__construct( $orig, $closing)
Extends DiffOp.
__construct( $orig, $closing=false)
Extends DiffOp.
__construct( $lines)
The base class for all other DiffOp classes.
string[] $orig
getClosing( $i=null)
string $type
string[] $closing
Class representing a 'diff' between two sequences of strings.
reverse()
Compute reversed Diff.
orig()
Get the original set of lines.
isEmpty()
Check for empty diff.
lcs()
Compute the length of the Longest Common Subsequence (LCS).
closing()
Get the closing set of lines.
DiffOp[] $edits
__construct( $from_lines, $to_lines)
Constructor.
Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3.
addWords( $words, $tag='')
__construct( $from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines)
Constructor.
This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which in turn...
Definition WikiDiff3.php:39
const MAX_LINE_LENGTH
__construct( $orig_lines, $closing_lines)
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs which is used to update link tables of transcluding pages after edits
Definition deferred.txt:17
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
I won t presume to tell you how to I m just describing the methods I chose to use for myself If you do choose to follow these it will probably be easier for you to collaborate with others on the but if you want to contribute without by all means do which work well I also use K &R brace matching style I know that s a religious issue for so if you want to use a style that puts opening braces on the next line
Definition design.txt:80
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition hooks.txt:1597
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
$lines
Definition router.php:66