MediaWiki  1.34.0
WordLevelDiff.php
Go to the documentation of this file.
1 <?php
28 
34 class WordLevelDiff extends \Diff {
38  protected $bailoutComplexity = 40000000; // Roughly 6K x 6K words changed
39 
44  public function __construct( $linesBefore, $linesAfter ) {
45  list( $wordsBefore, $wordsBeforeStripped ) = $this->split( $linesBefore );
46  list( $wordsAfter, $wordsAfterStripped ) = $this->split( $linesAfter );
47 
48  try {
49  parent::__construct( $wordsBeforeStripped, $wordsAfterStripped );
50  } catch ( ComplexityException $ex ) {
51  // Too hard to diff, just show whole paragraph(s) as changed
52  $this->edits = [ new DiffOpChange( $linesBefore, $linesAfter ) ];
53  }
54 
55  $xi = $yi = 0;
56  $editCount = count( $this->edits );
57  for ( $i = 0; $i < $editCount; $i++ ) {
58  $orig = &$this->edits[$i]->orig;
59  if ( is_array( $orig ) ) {
60  $orig = array_slice( $wordsBefore, $xi, count( $orig ) );
61  $xi += count( $orig );
62  }
63 
64  $closing = &$this->edits[$i]->closing;
65  if ( is_array( $closing ) ) {
66  $closing = array_slice( $wordsAfter, $yi, count( $closing ) );
67  $yi += count( $closing );
68  }
69  }
70  }
71 
77  private function split( $lines ) {
78  $words = [];
79  $stripped = [];
80  $first = true;
81  foreach ( $lines as $line ) {
82  if ( $first ) {
83  $first = false;
84  } else {
85  $words[] = "\n";
86  $stripped[] = "\n";
87  }
88  $m = [];
89  if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
90  $line, $m ) ) {
91  foreach ( $m[0] as $word ) {
92  $words[] = $word;
93  }
94  foreach ( $m[1] as $stripped_word ) {
95  $stripped[] = $stripped_word;
96  }
97  }
98  }
99 
100  return [ $words, $stripped ];
101  }
102 
106  public function orig() {
107  $orig = new WordAccumulator;
108 
109  foreach ( $this->edits as $edit ) {
110  if ( $edit->type == 'copy' ) {
111  $orig->addWords( $edit->orig );
112  } elseif ( $edit->orig ) {
113  $orig->addWords( $edit->orig, 'del' );
114  }
115  }
116  $lines = $orig->getLines();
117 
118  return $lines;
119  }
120 
124  public function closing() {
125  $closing = new WordAccumulator;
126 
127  foreach ( $this->edits as $edit ) {
128  if ( $edit->type == 'copy' ) {
129  $closing->addWords( $edit->closing );
130  } elseif ( $edit->closing ) {
131  $closing->addWords( $edit->closing, 'ins' );
132  }
133  }
134  $lines = $closing->getLines();
135 
136  return $lines;
137  }
138 
139 }
WordLevelDiff\__construct
__construct( $linesBefore, $linesAfter)
Definition: WordLevelDiff.php:44
WordLevelDiff\orig
orig()
Definition: WordLevelDiff.php:106
WordLevelDiff\closing
closing()
Definition: WordLevelDiff.php:124
WordLevelDiff
Performs a word-level diff on several lines.
Definition: WordLevelDiff.php:34
DiffOpChange
Extends DiffOp.
Definition: DiffOpChange.php:35
WordLevelDiff\split
split( $lines)
Definition: WordLevelDiff.php:77
MediaWiki\Diff\WordAccumulator
Stores, escapes and formats the results of word-level diff.
Definition: WordAccumulator.php:34
$lines
$lines
Definition: router.php:61
MediaWiki\Diff\WordAccumulator\addWords
addWords( $words, $tag='')
Definition: WordAccumulator.php:78
$line
$line
Definition: cdb.php:59
WordLevelDiff\$bailoutComplexity
$bailoutComplexity
Definition: WordLevelDiff.php:38
MediaWiki\Diff\ComplexityException
Definition: ComplexityException.php:26
Diff
Class representing a 'diff' between two sequences of strings.
Definition: Diff.php:32