Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.02% |
99 / 101 |
|
85.71% |
6 / 7 |
CRAP | |
0.00% |
0 / 1 |
AnnotatedHtmlDiffFormatter | |
98.02% |
99 / 101 |
|
85.71% |
6 / 7 |
31 | |
0.00% |
0 / 1 |
format | |
100.00% |
63 / 63 |
|
100.00% |
1 / 1 |
14 | |||
rTrimmedWordLevelDiff | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
commonRTrim | |
80.00% |
8 / 10 |
|
0.00% |
0 / 1 |
5.20 | |||
getOriginalInlineDiff | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
getClosingInlineDiff | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
getWordAccumulator | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
composeHtml | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace TwoColConflict; |
4 | |
5 | use Wikimedia\Diff\ComplexityException; |
6 | use Wikimedia\Diff\Diff; |
7 | use Wikimedia\Diff\WordAccumulator; |
8 | use Wikimedia\Diff\WordLevelDiff; |
9 | |
10 | /** |
11 | * @license GPL-2.0-or-later |
12 | * @author Christoph Jauera <christoph.jauera@wikimedia.de> |
13 | */ |
14 | class AnnotatedHtmlDiffFormatter { |
15 | |
16 | /** |
17 | * @param string[] $oldLines |
18 | * @param string[] $newLines |
19 | * @param string[] $preSaveTransformedLines |
20 | * |
21 | * @throws ComplexityException |
22 | * @return array[] List of changes, each of which include an HTML representation of the diff, |
23 | * and the original wikitext. Note the HTML does not use <br> but relies on `white-space: |
24 | * pre-line` being set! |
25 | * TODO: "preSavedTransformedLines" is still warty. |
26 | */ |
27 | public function format( |
28 | array $oldLines, |
29 | array $newLines, |
30 | array $preSaveTransformedLines |
31 | ): array { |
32 | $changes = []; |
33 | $oldLine = 0; |
34 | $newLine = 0; |
35 | $diff = new Diff( $oldLines, $preSaveTransformedLines ); |
36 | |
37 | foreach ( $diff->getEdits() as $edit ) { |
38 | switch ( $edit->getType() ) { |
39 | case 'add': |
40 | $changes[] = [ |
41 | 'action' => 'add', |
42 | 'oldhtml' => "\u{00A0}", |
43 | 'oldtext' => null, |
44 | 'newhtml' => '<ins class="mw-twocolconflict-diffchange">' . |
45 | $this->composeHtml( $edit->getClosing() ) . '</ins>', |
46 | 'newtext' => implode( "\n", |
47 | array_slice( $newLines, $newLine, $edit->nclosing() ) ), |
48 | ]; |
49 | break; |
50 | |
51 | case 'delete': |
52 | $changes[] = [ |
53 | 'action' => 'delete', |
54 | 'oldhtml' => '<del class="mw-twocolconflict-diffchange">' . |
55 | $this->composeHtml( $edit->getOrig() ) . '</del>', |
56 | 'oldtext' => implode( "\n", $edit->getOrig() ), |
57 | 'newhtml' => "\u{00A0}", |
58 | 'newtext' => null, |
59 | ]; |
60 | break; |
61 | |
62 | case 'change': |
63 | $wordLevelDiff = $this->rTrimmedWordLevelDiff( $edit->getOrig(), $edit->getClosing() ); |
64 | $changes[] = [ |
65 | 'action' => 'change', |
66 | 'oldhtml' => $this->getOriginalInlineDiff( $wordLevelDiff ), |
67 | 'oldtext' => implode( "\n", $edit->getOrig() ), |
68 | 'newhtml' => $this->getClosingInlineDiff( $wordLevelDiff ), |
69 | 'newtext' => implode( "\n", |
70 | array_slice( $newLines, $newLine, $edit->nclosing() ) ), |
71 | ]; |
72 | break; |
73 | |
74 | case 'copy': |
75 | $changes[] = [ |
76 | 'action' => 'copy', |
77 | // Warning, this must be unescaped Wikitext, not escaped HTML! |
78 | 'copytext' => implode( "\n", $edit->getOrig() ), |
79 | ]; |
80 | break; |
81 | } |
82 | |
83 | $oldLine += $edit->norig(); |
84 | $newLine += $edit->nclosing(); |
85 | } |
86 | |
87 | // Try to merge unchanged newline-only rows into a more meaningful row |
88 | foreach ( $changes as $i => $row ) { |
89 | if ( !isset( $row['copytext'] ) || trim( $row['copytext'], "\n" ) !== '' ) { |
90 | continue; |
91 | } |
92 | |
93 | // Prefer adding extra empty lines to the end of the previous row |
94 | foreach ( [ -1, 1 ] as $offset ) { |
95 | if ( !isset( $changes[$i + $offset] ) ) { |
96 | continue; |
97 | } |
98 | |
99 | $target = &$changes[$i + $offset]; |
100 | if ( isset( $target['oldtext'] ) && isset( $target['newtext'] ) ) { |
101 | $extra = "\n" . $row['copytext']; |
102 | if ( $offset < 0 ) { |
103 | $target['oldtext'] .= $extra; |
104 | $target['newtext'] .= $extra; |
105 | } else { |
106 | $target['oldtext'] = $extra . $target['oldtext']; |
107 | $target['newtext'] = $extra . $target['newtext']; |
108 | } |
109 | unset( $changes[$i] ); |
110 | break; |
111 | } |
112 | } |
113 | } |
114 | |
115 | return array_values( $changes ); |
116 | } |
117 | |
118 | /** |
119 | * @param string[] $before |
120 | * @param string[] $after |
121 | * |
122 | * @return WordLevelDiff |
123 | */ |
124 | private function rTrimmedWordLevelDiff( array $before, array $after ): WordLevelDiff { |
125 | end( $before ); |
126 | end( $after ); |
127 | $this->commonRTrim( $before[key( $before )], $after[key( $after )] ); |
128 | return new WordLevelDiff( $before, $after ); |
129 | } |
130 | |
131 | /** |
132 | * Trims identical sequences of whitespace from the end of both lines. |
133 | * |
134 | * @param string &$before |
135 | * @param string &$after |
136 | */ |
137 | private function commonRTrim( string &$before, string &$after ): void { |
138 | $uncommonBefore = strlen( $before ); |
139 | $uncommonAfter = strlen( $after ); |
140 | while ( $uncommonBefore > 0 && |
141 | $uncommonAfter > 0 && |
142 | $before[$uncommonBefore - 1] === $after[$uncommonAfter - 1] && |
143 | ctype_space( $after[$uncommonAfter - 1] ) |
144 | ) { |
145 | $uncommonBefore--; |
146 | $uncommonAfter--; |
147 | } |
148 | $before = substr( $before, 0, $uncommonBefore ); |
149 | $after = substr( $after, 0, $uncommonAfter ); |
150 | } |
151 | |
152 | /** |
153 | * Composes lines from a WordLevelDiff and marks removed words. |
154 | * |
155 | * @param WordLevelDiff $diff |
156 | * |
157 | * @return string Composed HTML string with inline markup |
158 | */ |
159 | private function getOriginalInlineDiff( WordLevelDiff $diff ): string { |
160 | $wordAccumulator = $this->getWordAccumulator(); |
161 | |
162 | foreach ( $diff->getEdits() as $edit ) { |
163 | if ( $edit->type === 'copy' ) { |
164 | $wordAccumulator->addWords( $edit->orig ); |
165 | } elseif ( $edit->orig ) { |
166 | $wordAccumulator->addWords( $edit->orig, 'del' ); |
167 | } |
168 | } |
169 | return implode( "\n", $wordAccumulator->getLines() ); |
170 | } |
171 | |
172 | /** |
173 | * Composes lines from a WordLevelDiff and marks added words. |
174 | * |
175 | * @param WordLevelDiff $diff |
176 | * |
177 | * @return string Composed HTML string with inline markup |
178 | */ |
179 | private function getClosingInlineDiff( WordLevelDiff $diff ): string { |
180 | $wordAccumulator = $this->getWordAccumulator(); |
181 | |
182 | foreach ( $diff->getEdits() as $edit ) { |
183 | if ( $edit->type === 'copy' ) { |
184 | $wordAccumulator->addWords( $edit->closing ); |
185 | } elseif ( $edit->closing ) { |
186 | $wordAccumulator->addWords( $edit->closing, 'ins' ); |
187 | } |
188 | } |
189 | return implode( "\n", $wordAccumulator->getLines() ); |
190 | } |
191 | |
192 | /** |
193 | * @return WordAccumulator |
194 | */ |
195 | private function getWordAccumulator(): WordAccumulator { |
196 | $wordAccumulator = new WordAccumulator(); |
197 | $wordAccumulator->insClass = ' class="mw-twocolconflict-diffchange"'; |
198 | $wordAccumulator->delClass = ' class="mw-twocolconflict-diffchange"'; |
199 | return $wordAccumulator; |
200 | } |
201 | |
202 | /** |
203 | * @param string[] $lines |
204 | * |
205 | * @return string HTML without <br>, relying on `white-space: pre-line` being set |
206 | */ |
207 | private function composeHtml( array $lines ): string { |
208 | return htmlspecialchars( implode( "\n", array_map( |
209 | static function ( string $line ): string { |
210 | // Replace empty lines with a non-breaking space |
211 | return $line === '' ? "\u{00A0}" : $line; |
212 | }, |
213 | $lines |
214 | ) ) ); |
215 | } |
216 | |
217 | } |