MediaWiki  1.27.2
DairikiDiff.php
Go to the documentation of this file.
1 <?php
37 abstract class DiffOp {
38 
42  public $type;
43 
47  public $orig;
48 
52  public $closing;
53 
57  public function getType() {
58  return $this->type;
59  }
60 
64  public function getOrig() {
65  return $this->orig;
66  }
67 
72  public function getClosing( $i = null ) {
73  if ( $i === null ) {
74  return $this->closing;
75  }
76  if ( array_key_exists( $i, $this->closing ) ) {
77  return $this->closing[$i];
78  }
79  return null;
80  }
81 
82  abstract public function reverse();
83 
87  public function norig() {
88  return $this->orig ? count( $this->orig ) : 0;
89  }
90 
94  public function nclosing() {
95  return $this->closing ? count( $this->closing ) : 0;
96  }
97 }
98 
106 class DiffOpCopy extends DiffOp {
107  public $type = 'copy';
108 
109  public function __construct( $orig, $closing = false ) {
110  if ( !is_array( $closing ) ) {
111  $closing = $orig;
112  }
113  $this->orig = $orig;
114  $this->closing = $closing;
115  }
116 
120  public function reverse() {
121  return new DiffOpCopy( $this->closing, $this->orig );
122  }
123 }
124 
132 class DiffOpDelete extends DiffOp {
133  public $type = 'delete';
134 
135  public function __construct( $lines ) {
136  $this->orig = $lines;
137  $this->closing = false;
138  }
139 
143  public function reverse() {
144  return new DiffOpAdd( $this->orig );
145  }
146 }
147 
155 class DiffOpAdd extends DiffOp {
156  public $type = 'add';
157 
158  public function __construct( $lines ) {
159  $this->closing = $lines;
160  $this->orig = false;
161  }
162 
166  public function reverse() {
167  return new DiffOpDelete( $this->closing );
168  }
169 }
170 
178 class DiffOpChange extends DiffOp {
179  public $type = 'change';
180 
181  public function __construct( $orig, $closing ) {
182  $this->orig = $orig;
183  $this->closing = $closing;
184  }
185 
189  public function reverse() {
190  return new DiffOpChange( $this->closing, $this->orig );
191  }
192 }
193 
218 class DiffEngine {
219  const MAX_XREF_LENGTH = 10000;
220 
221  protected $xchanged, $ychanged;
222 
223  protected $xv = [], $yv = [];
224  protected $xind = [], $yind = [];
225 
226  protected $seq = [], $in_seq = [];
227 
228  protected $lcs = 0;
229 
236  public function diff( $from_lines, $to_lines ) {
237 
238  // Diff and store locally
239  $this->diffLocal( $from_lines, $to_lines );
240 
241  // Merge edits when possible
242  $this->shiftBoundaries( $from_lines, $this->xchanged, $this->ychanged );
243  $this->shiftBoundaries( $to_lines, $this->ychanged, $this->xchanged );
244 
245  // Compute the edit operations.
246  $n_from = count( $from_lines );
247  $n_to = count( $to_lines );
248 
249  $edits = [];
250  $xi = $yi = 0;
251  while ( $xi < $n_from || $yi < $n_to ) {
252  assert( $yi < $n_to || $this->xchanged[$xi] );
253  assert( $xi < $n_from || $this->ychanged[$yi] );
254 
255  // Skip matching "snake".
256  $copy = [];
257  while ( $xi < $n_from && $yi < $n_to
258  && !$this->xchanged[$xi] && !$this->ychanged[$yi]
259  ) {
260  $copy[] = $from_lines[$xi++];
261  ++$yi;
262  }
263  if ( $copy ) {
264  $edits[] = new DiffOpCopy( $copy );
265  }
266 
267  // Find deletes & adds.
268  $delete = [];
269  while ( $xi < $n_from && $this->xchanged[$xi] ) {
270  $delete[] = $from_lines[$xi++];
271  }
272 
273  $add = [];
274  while ( $yi < $n_to && $this->ychanged[$yi] ) {
275  $add[] = $to_lines[$yi++];
276  }
277 
278  if ( $delete && $add ) {
279  $edits[] = new DiffOpChange( $delete, $add );
280  } elseif ( $delete ) {
281  $edits[] = new DiffOpDelete( $delete );
282  } elseif ( $add ) {
283  $edits[] = new DiffOpAdd( $add );
284  }
285  }
286 
287  return $edits;
288  }
289 
294  private function diffLocal( $from_lines, $to_lines ) {
295  $wikidiff3 = new WikiDiff3();
296  $wikidiff3->diff( $from_lines, $to_lines );
297  $this->xchanged = $wikidiff3->removed;
298  $this->ychanged = $wikidiff3->added;
299  }
300 
314  private function shiftBoundaries( $lines, &$changed, $other_changed ) {
315  $i = 0;
316  $j = 0;
317 
318  assert( count( $lines ) == count( $changed ) );
319  $len = count( $lines );
320  $other_len = count( $other_changed );
321 
322  while ( 1 ) {
323  /*
324  * Scan forwards to find beginning of another run of changes.
325  * Also keep track of the corresponding point in the other file.
326  *
327  * Throughout this code, $i and $j are adjusted together so that
328  * the first $i elements of $changed and the first $j elements
329  * of $other_changed both contain the same number of zeros
330  * (unchanged lines).
331  * Furthermore, $j is always kept so that $j == $other_len or
332  * $other_changed[$j] == false.
333  */
334  while ( $j < $other_len && $other_changed[$j] ) {
335  $j++;
336  }
337 
338  while ( $i < $len && !$changed[$i] ) {
339  assert( $j < $other_len && ! $other_changed[$j] );
340  $i++;
341  $j++;
342  while ( $j < $other_len && $other_changed[$j] ) {
343  $j++;
344  }
345  }
346 
347  if ( $i == $len ) {
348  break;
349  }
350 
351  $start = $i;
352 
353  // Find the end of this run of changes.
354  while ( ++$i < $len && $changed[$i] ) {
355  continue;
356  }
357 
358  do {
359  /*
360  * Record the length of this run of changes, so that
361  * we can later determine whether the run has grown.
362  */
363  $runlength = $i - $start;
364 
365  /*
366  * Move the changed region back, so long as the
367  * previous unchanged line matches the last changed one.
368  * This merges with previous changed regions.
369  */
370  while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
371  $changed[--$start] = 1;
372  $changed[--$i] = false;
373  while ( $start > 0 && $changed[$start - 1] ) {
374  $start--;
375  }
376  assert( $j > 0 );
377  while ( $other_changed[--$j] ) {
378  continue;
379  }
380  assert( $j >= 0 && !$other_changed[$j] );
381  }
382 
383  /*
384  * Set CORRESPONDING to the end of the changed run, at the last
385  * point where it corresponds to a changed run in the other file.
386  * CORRESPONDING == LEN means no such point has been found.
387  */
388  $corresponding = $j < $other_len ? $i : $len;
389 
390  /*
391  * Move the changed region forward, so long as the
392  * first changed line matches the following unchanged one.
393  * This merges with following changed regions.
394  * Do this second, so that if there are no merges,
395  * the changed region is moved forward as far as possible.
396  */
397  while ( $i < $len && $lines[$start] == $lines[$i] ) {
398  $changed[$start++] = false;
399  $changed[$i++] = 1;
400  while ( $i < $len && $changed[$i] ) {
401  $i++;
402  }
403 
404  assert( $j < $other_len && ! $other_changed[$j] );
405  $j++;
406  if ( $j < $other_len && $other_changed[$j] ) {
407  $corresponding = $i;
408  while ( $j < $other_len && $other_changed[$j] ) {
409  $j++;
410  }
411  }
412  }
413  } while ( $runlength != $i - $start );
414 
415  /*
416  * If possible, move the fully-merged run of changes
417  * back to a corresponding run in the other file.
418  */
419  while ( $corresponding < $i ) {
420  $changed[--$start] = 1;
421  $changed[--$i] = 0;
422  assert( $j > 0 );
423  while ( $other_changed[--$j] ) {
424  continue;
425  }
426  assert( $j >= 0 && !$other_changed[$j] );
427  }
428  }
429  }
430 }
431 
438 class Diff {
439 
443  public $edits;
444 
453  public function __construct( $from_lines, $to_lines ) {
454  $eng = new DiffEngine;
455  $this->edits = $eng->diff( $from_lines, $to_lines );
456  }
457 
461  public function getEdits() {
462  return $this->edits;
463  }
464 
476  public function reverse() {
477  $rev = $this;
478  $rev->edits = [];
480  foreach ( $this->edits as $edit ) {
481  $rev->edits[] = $edit->reverse();
482  }
483 
484  return $rev;
485  }
486 
492  public function isEmpty() {
493  foreach ( $this->edits as $edit ) {
494  if ( $edit->type != 'copy' ) {
495  return false;
496  }
497  }
498 
499  return true;
500  }
501 
509  public function lcs() {
510  $lcs = 0;
511  foreach ( $this->edits as $edit ) {
512  if ( $edit->type == 'copy' ) {
513  $lcs += count( $edit->orig );
514  }
515  }
516 
517  return $lcs;
518  }
519 
528  public function orig() {
529  $lines = [];
530 
531  foreach ( $this->edits as $edit ) {
532  if ( $edit->orig ) {
533  array_splice( $lines, count( $lines ), 0, $edit->orig );
534  }
535  }
536 
537  return $lines;
538  }
539 
548  public function closing() {
549  $lines = [];
550 
551  foreach ( $this->edits as $edit ) {
552  if ( $edit->closing ) {
553  array_splice( $lines, count( $lines ), 0, $edit->closing );
554  }
555  }
556 
557  return $lines;
558  }
559 }
560 
566 class MappedDiff extends Diff {
587  public function __construct( $from_lines, $to_lines,
588  $mapped_from_lines, $mapped_to_lines ) {
589 
590  assert( count( $from_lines ) == count( $mapped_from_lines ) );
591  assert( count( $to_lines ) == count( $mapped_to_lines ) );
592 
593  parent::__construct( $mapped_from_lines, $mapped_to_lines );
594 
595  $xi = $yi = 0;
596  $editCount = count( $this->edits );
597  for ( $i = 0; $i < $editCount; $i++ ) {
598  $orig = &$this->edits[$i]->orig;
599  if ( is_array( $orig ) ) {
600  $orig = array_slice( $from_lines, $xi, count( $orig ) );
601  $xi += count( $orig );
602  }
603 
604  $closing = &$this->edits[$i]->closing;
605  if ( is_array( $closing ) ) {
606  $closing = array_slice( $to_lines, $yi, count( $closing ) );
607  $yi += count( $closing );
608  }
609  }
610  }
611 }
612 
623  public $insClass = ' class="diffchange diffchange-inline"';
624  public $delClass = ' class="diffchange diffchange-inline"';
625 
626  private $lines = [];
627  private $line = '';
628  private $group = '';
629  private $tag = '';
630 
634  private function flushGroup( $new_tag ) {
635  if ( $this->group !== '' ) {
636  if ( $this->tag == 'ins' ) {
637  $this->line .= "<ins{$this->insClass}>" .
638  htmlspecialchars( $this->group ) . '</ins>';
639  } elseif ( $this->tag == 'del' ) {
640  $this->line .= "<del{$this->delClass}>" .
641  htmlspecialchars( $this->group ) . '</del>';
642  } else {
643  $this->line .= htmlspecialchars( $this->group );
644  }
645  }
646  $this->group = '';
647  $this->tag = $new_tag;
648  }
649 
653  private function flushLine( $new_tag ) {
654  $this->flushGroup( $new_tag );
655  if ( $this->line != '' ) {
656  array_push( $this->lines, $this->line );
657  } else {
658  # make empty lines visible by inserting an NBSP
659  array_push( $this->lines, '&#160;' );
660  }
661  $this->line = '';
662  }
663 
668  public function addWords( $words, $tag = '' ) {
669  if ( $tag != $this->tag ) {
670  $this->flushGroup( $tag );
671  }
672 
673  foreach ( $words as $word ) {
674  // new-line should only come as first char of word.
675  if ( $word == '' ) {
676  continue;
677  }
678  if ( $word[0] == "\n" ) {
679  $this->flushLine( $tag );
680  $word = substr( $word, 1 );
681  }
682  assert( !strstr( $word, "\n" ) );
683  $this->group .= $word;
684  }
685  }
686 
690  public function getLines() {
691  $this->flushLine( '~done' );
692 
693  return $this->lines;
694  }
695 }
696 
702 class WordLevelDiff extends MappedDiff {
703  const MAX_LINE_LENGTH = 10000;
704 
709  public function __construct( $orig_lines, $closing_lines ) {
710 
711  list( $orig_words, $orig_stripped ) = $this->split( $orig_lines );
712  list( $closing_words, $closing_stripped ) = $this->split( $closing_lines );
713 
714  parent::__construct( $orig_words, $closing_words,
715  $orig_stripped, $closing_stripped );
716  }
717 
723  private function split( $lines ) {
724 
725  $words = [];
726  $stripped = [];
727  $first = true;
728  foreach ( $lines as $line ) {
729  # If the line is too long, just pretend the entire line is one big word
730  # This prevents resource exhaustion problems
731  if ( $first ) {
732  $first = false;
733  } else {
734  $words[] = "\n";
735  $stripped[] = "\n";
736  }
737  if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
738  $words[] = $line;
739  $stripped[] = $line;
740  } else {
741  $m = [];
742  if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
743  $line, $m )
744  ) {
745  foreach ( $m[0] as $word ) {
746  $words[] = $word;
747  }
748  foreach ( $m[1] as $stripped_word ) {
749  $stripped[] = $stripped_word;
750  }
751  }
752  }
753  }
754 
755  return [ $words, $stripped ];
756  }
757 
761  public function orig() {
762  $orig = new HWLDFWordAccumulator;
763 
764  foreach ( $this->edits as $edit ) {
765  if ( $edit->type == 'copy' ) {
766  $orig->addWords( $edit->orig );
767  } elseif ( $edit->orig ) {
768  $orig->addWords( $edit->orig, 'del' );
769  }
770  }
771  $lines = $orig->getLines();
772 
773  return $lines;
774  }
775 
779  public function closing() {
780  $closing = new HWLDFWordAccumulator;
781 
782  foreach ( $this->edits as $edit ) {
783  if ( $edit->type == 'copy' ) {
784  $closing->addWords( $edit->closing );
785  } elseif ( $edit->closing ) {
786  $closing->addWords( $edit->closing, 'ins' );
787  }
788  }
789  $lines = $closing->getLines();
790 
791  return $lines;
792  }
793 
794 }
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
string[] $orig
Definition: DairikiDiff.php:47
shiftBoundaries($lines, &$changed, $other_changed)
Adjust inserts/deletes of identical lines to join changes as much as possible.
Extends DiffOp.
__construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines)
Constructor.
closing()
Get the closing set of lines.
The base class for all other DiffOp classes.
Definition: DairikiDiff.php:37
Extends DiffOp.
getEdits()
__construct($from_lines, $to_lines)
Constructor.
This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which in turn...
Definition: WikiDiff3.php:39
getClosing($i=null)
Definition: DairikiDiff.php:72
reverse()
Compute reversed Diff.
lcs()
Compute the length of the Longest Common Subsequence (LCS).
__construct($orig, $closing)
Extends DiffOp.
orig()
Get the original set of lines.
__construct($lines)
nclosing()
Definition: DairikiDiff.php:94
isEmpty()
Check for empty diff.
string $type
Definition: DairikiDiff.php:42
Class representing a 'diff' between two sequences of strings.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs which is used to update link tables of transcluding pages after edits
Definition: deferred.txt:11
__construct($lines)
const MAX_LINE_LENGTH
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1584
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
Class used internally by Diff to actually compute the diffs.
Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3.
__construct($orig_lines, $closing_lines)
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
$lines
Definition: router.php:66
addWords($words, $tag= '')
diffLocal($from_lines, $to_lines)
__construct($orig, $closing=false)
const MAX_XREF_LENGTH
$line
Definition: cdb.php:59
I won t presume to tell you how to I m just describing the methods I chose to use for myself If you do choose to follow these it will probably be easier for you to collaborate with others on the but if you want to contribute without by all means do which work well I also use K &R brace matching style I know that s a religious issue for so if you want to use a style that puts opening braces on the next line
Definition: design.txt:79
DiffOp[] $edits
string[] $closing
Definition: DairikiDiff.php:52
Extends DiffOp.
diff($from_lines, $to_lines)