MediaWiki master
WordLevelDiff.php
Go to the documentation of this file.
1<?php
26namespace Wikimedia\Diff;
27
34class WordLevelDiff extends Diff {
38 protected $bailoutComplexity = 40_000_000; // Roughly 6K x 6K words changed
39
47 public function __construct( $linesBefore, $linesAfter ) {
48 [ $wordsBefore, $wordsBeforeStripped ] = $this->split( $linesBefore );
49 [ $wordsAfter, $wordsAfterStripped ] = $this->split( $linesAfter );
50
51 try {
52 parent::__construct( $wordsBeforeStripped, $wordsAfterStripped );
53 } catch ( ComplexityException $ex ) {
54 // Too hard to diff, just show whole paragraph(s) as changed
55 $this->edits = [ new DiffOpChange( $linesBefore, $linesAfter ) ];
56 }
57
58 $xi = $yi = 0;
59 $editCount = count( $this->edits );
60 for ( $i = 0; $i < $editCount; $i++ ) {
61 $orig = &$this->edits[$i]->orig;
62 if ( is_array( $orig ) ) {
63 $orig = array_slice( $wordsBefore, $xi, count( $orig ) );
64 $xi += count( $orig );
65 }
66
67 $closing = &$this->edits[$i]->closing;
68 if ( is_array( $closing ) ) {
69 $closing = array_slice( $wordsAfter, $yi, count( $closing ) );
70 $yi += count( $closing );
71 }
72 }
73 }
74
80 private function split( $lines ) {
81 $words = [];
82 $stripped = [];
83 $first = true;
84 foreach ( $lines as $line ) {
85 if ( $first ) {
86 $first = false;
87 } else {
88 $words[] = "\n";
89 $stripped[] = "\n";
90 }
91 $m = [];
92 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
93 $line, $m ) ) {
94 foreach ( $m[0] as $word ) {
95 $words[] = $word;
96 }
97 foreach ( $m[1] as $stripped_word ) {
98 $stripped[] = $stripped_word;
99 }
100 }
101 }
102
103 return [ $words, $stripped ];
104 }
105
109 public function orig() {
110 $orig = new WordAccumulator;
111
112 foreach ( $this->edits as $edit ) {
113 if ( $edit->type == 'copy' ) {
114 $orig->addWords( $edit->orig );
115 } elseif ( $edit->orig ) {
116 $orig->addWords( $edit->orig, 'del' );
117 }
118 }
119 $lines = $orig->getLines();
120
121 return $lines;
122 }
123
127 public function closing() {
128 $closing = new WordAccumulator;
129
130 foreach ( $this->edits as $edit ) {
131 if ( $edit->type == 'copy' ) {
132 $closing->addWords( $edit->closing );
133 } elseif ( $edit->closing ) {
134 $closing->addWords( $edit->closing, 'ins' );
135 }
136 }
137 $lines = $closing->getLines();
138
139 return $lines;
140 }
141
142}
143
145class_alias( WordLevelDiff::class, 'WordLevelDiff' );
Class representing a 'diff' between two sequences of strings.
Definition Diff.php:34
Stores, escapes and formats the results of word-level diff.
Performs a word-level diff on several lines.
__construct( $linesBefore, $linesAfter)
if(!file_exists( $CREDITS)) $lines