MediaWiki  master
WordLevelDiff.php
Go to the documentation of this file.
1 <?php
26 namespace Wikimedia\Diff;
27 
34 class WordLevelDiff extends Diff {
38  protected $bailoutComplexity = 40000000; // Roughly 6K x 6K words changed
39 
47  public function __construct( $linesBefore, $linesAfter ) {
48  [ $wordsBefore, $wordsBeforeStripped ] = $this->split( $linesBefore );
49  [ $wordsAfter, $wordsAfterStripped ] = $this->split( $linesAfter );
50 
51  try {
52  parent::__construct( $wordsBeforeStripped, $wordsAfterStripped );
53  } catch ( ComplexityException $ex ) {
54  // Too hard to diff, just show whole paragraph(s) as changed
55  $this->edits = [ new DiffOpChange( $linesBefore, $linesAfter ) ];
56  }
57 
58  $xi = $yi = 0;
59  $editCount = count( $this->edits );
60  for ( $i = 0; $i < $editCount; $i++ ) {
61  $orig = &$this->edits[$i]->orig;
62  if ( is_array( $orig ) ) {
63  $orig = array_slice( $wordsBefore, $xi, count( $orig ) );
64  $xi += count( $orig );
65  }
66 
67  $closing = &$this->edits[$i]->closing;
68  if ( is_array( $closing ) ) {
69  $closing = array_slice( $wordsAfter, $yi, count( $closing ) );
70  $yi += count( $closing );
71  }
72  }
73  }
74 
80  private function split( $lines ) {
81  $words = [];
82  $stripped = [];
83  $first = true;
84  foreach ( $lines as $line ) {
85  if ( $first ) {
86  $first = false;
87  } else {
88  $words[] = "\n";
89  $stripped[] = "\n";
90  }
91  $m = [];
92  if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
93  $line, $m ) ) {
94  foreach ( $m[0] as $word ) {
95  $words[] = $word;
96  }
97  foreach ( $m[1] as $stripped_word ) {
98  $stripped[] = $stripped_word;
99  }
100  }
101  }
102 
103  return [ $words, $stripped ];
104  }
105 
109  public function orig() {
110  $orig = new WordAccumulator;
111 
112  foreach ( $this->edits as $edit ) {
113  if ( $edit->type == 'copy' ) {
114  $orig->addWords( $edit->orig );
115  } elseif ( $edit->orig ) {
116  $orig->addWords( $edit->orig, 'del' );
117  }
118  }
119  $lines = $orig->getLines();
120 
121  return $lines;
122  }
123 
127  public function closing() {
128  $closing = new WordAccumulator;
129 
130  foreach ( $this->edits as $edit ) {
131  if ( $edit->type == 'copy' ) {
132  $closing->addWords( $edit->closing );
133  } elseif ( $edit->closing ) {
134  $closing->addWords( $edit->closing, 'ins' );
135  }
136  }
137  $lines = $closing->getLines();
138 
139  return $lines;
140  }
141 
142 }
143 
145 class_alias( WordLevelDiff::class, 'WordLevelDiff' );
Class representing a 'diff' between two sequences of strings.
Definition: Diff.php:34
Stores, escapes and formats the results of word-level diff.
Performs a word-level diff on several lines.
__construct( $linesBefore, $linesAfter)
if(!file_exists( $CREDITS)) $lines