MediaWiki master
WordLevelDiff.php
Go to the documentation of this file.
1<?php
12namespace Wikimedia\Diff;
13
20class WordLevelDiff extends Diff {
24 protected $bailoutComplexity = 40_000_000; // Roughly 6K x 6K words changed
25
33 public function __construct( $linesBefore, $linesAfter ) {
34 [ $wordsBefore, $wordsBeforeStripped ] = $this->split( $linesBefore );
35 [ $wordsAfter, $wordsAfterStripped ] = $this->split( $linesAfter );
36
37 try {
38 parent::__construct( $wordsBeforeStripped, $wordsAfterStripped );
39 } catch ( ComplexityException ) {
40 // Too hard to diff, just show whole paragraph(s) as changed
41 $this->edits = [ new DiffOpChange( $linesBefore, $linesAfter ) ];
42 }
43
44 $xi = $yi = 0;
45 $editCount = count( $this->edits );
46 for ( $i = 0; $i < $editCount; $i++ ) {
47 $orig = &$this->edits[$i]->orig;
48 if ( is_array( $orig ) ) {
49 $orig = array_slice( $wordsBefore, $xi, count( $orig ) );
50 $xi += count( $orig );
51 }
52
53 $closing = &$this->edits[$i]->closing;
54 if ( is_array( $closing ) ) {
55 $closing = array_slice( $wordsAfter, $yi, count( $closing ) );
56 $yi += count( $closing );
57 }
58 }
59 }
60
66 private function split( $lines ) {
67 $words = [];
68 $stripped = [];
69 $first = true;
70 foreach ( $lines as $line ) {
71 if ( $first ) {
72 $first = false;
73 } else {
74 $words[] = "\n";
75 $stripped[] = "\n";
76 }
77 $m = [];
78 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
79 $line, $m ) ) {
80 foreach ( $m[0] as $word ) {
81 $words[] = $word;
82 }
83 foreach ( $m[1] as $stripped_word ) {
84 $stripped[] = $stripped_word;
85 }
86 }
87 }
88
89 return [ $words, $stripped ];
90 }
91
95 public function orig() {
96 $orig = new WordAccumulator;
97
98 foreach ( $this->edits as $edit ) {
99 if ( $edit->type == 'copy' ) {
100 $orig->addWords( $edit->orig );
101 } elseif ( $edit->orig ) {
102 $orig->addWords( $edit->orig, 'del' );
103 }
104 }
105 $lines = $orig->getLines();
106
107 return $lines;
108 }
109
113 public function closing() {
114 $closing = new WordAccumulator;
115
116 foreach ( $this->edits as $edit ) {
117 if ( $edit->type == 'copy' ) {
118 $closing->addWords( $edit->closing );
119 } elseif ( $edit->closing ) {
120 $closing->addWords( $edit->closing, 'ins' );
121 }
122 }
123 $lines = $closing->getLines();
124
125 return $lines;
126 }
127
128}
129
131class_alias( WordLevelDiff::class, 'WordLevelDiff' );
Class representing a 'diff' between two sequences of strings.
Definition Diff.php:20
Stores, escapes and formats the results of word-level diff.
Performs a word-level diff on several lines.
__construct( $linesBefore, $linesAfter)