MediaWiki  master
WordLevelDiff.php
Go to the documentation of this file.
1 <?php
28 
35 class WordLevelDiff extends \Diff {
39  protected $bailoutComplexity = 40000000; // Roughly 6K x 6K words changed
40 
48  public function __construct( $linesBefore, $linesAfter ) {
49  [ $wordsBefore, $wordsBeforeStripped ] = $this->split( $linesBefore );
50  [ $wordsAfter, $wordsAfterStripped ] = $this->split( $linesAfter );
51 
52  try {
53  parent::__construct( $wordsBeforeStripped, $wordsAfterStripped );
54  } catch ( ComplexityException $ex ) {
55  // Too hard to diff, just show whole paragraph(s) as changed
56  $this->edits = [ new DiffOpChange( $linesBefore, $linesAfter ) ];
57  }
58 
59  $xi = $yi = 0;
60  $editCount = count( $this->edits );
61  for ( $i = 0; $i < $editCount; $i++ ) {
62  $orig = &$this->edits[$i]->orig;
63  if ( is_array( $orig ) ) {
64  $orig = array_slice( $wordsBefore, $xi, count( $orig ) );
65  $xi += count( $orig );
66  }
67 
68  $closing = &$this->edits[$i]->closing;
69  if ( is_array( $closing ) ) {
70  $closing = array_slice( $wordsAfter, $yi, count( $closing ) );
71  $yi += count( $closing );
72  }
73  }
74  }
75 
81  private function split( $lines ) {
82  $words = [];
83  $stripped = [];
84  $first = true;
85  foreach ( $lines as $line ) {
86  if ( $first ) {
87  $first = false;
88  } else {
89  $words[] = "\n";
90  $stripped[] = "\n";
91  }
92  $m = [];
93  if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
94  $line, $m ) ) {
95  foreach ( $m[0] as $word ) {
96  $words[] = $word;
97  }
98  foreach ( $m[1] as $stripped_word ) {
99  $stripped[] = $stripped_word;
100  }
101  }
102  }
103 
104  return [ $words, $stripped ];
105  }
106 
110  public function orig() {
111  $orig = new WordAccumulator;
112 
113  foreach ( $this->edits as $edit ) {
114  if ( $edit->type == 'copy' ) {
115  $orig->addWords( $edit->orig );
116  } elseif ( $edit->orig ) {
117  $orig->addWords( $edit->orig, 'del' );
118  }
119  }
120  $lines = $orig->getLines();
121 
122  return $lines;
123  }
124 
128  public function closing() {
129  $closing = new WordAccumulator;
130 
131  foreach ( $this->edits as $edit ) {
132  if ( $edit->type == 'copy' ) {
133  $closing->addWords( $edit->closing );
134  } elseif ( $edit->closing ) {
135  $closing->addWords( $edit->closing, 'ins' );
136  }
137  }
138  $lines = $closing->getLines();
139 
140  return $lines;
141  }
142 
143 }
Extends DiffOp.
Class representing a 'diff' between two sequences of strings.
Definition: Diff.php:32
Stores, escapes and formats the results of word-level diff.
Performs a word-level diff on several lines.
__construct( $linesBefore, $linesAfter)
if(!file_exists( $CREDITS)) $lines