Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
98.02% covered (success)
98.02%
99 / 101
85.71% covered (warning)
85.71%
6 / 7
CRAP
0.00% covered (danger)
0.00%
0 / 1
AnnotatedHtmlDiffFormatter
98.02% covered (success)
98.02%
99 / 101
85.71% covered (warning)
85.71%
6 / 7
31
0.00% covered (danger)
0.00%
0 / 1
 format
100.00% covered (success)
100.00%
63 / 63
100.00% covered (success)
100.00%
1 / 1
14
 rTrimmedWordLevelDiff
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 commonRTrim
80.00% covered (warning)
80.00%
8 / 10
0.00% covered (danger)
0.00%
0 / 1
5.20
 getOriginalInlineDiff
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
4
 getClosingInlineDiff
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
4
 getWordAccumulator
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 composeHtml
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2
3namespace TwoColConflict;
4
5use Wikimedia\Diff\ComplexityException;
6use Wikimedia\Diff\Diff;
7use Wikimedia\Diff\WordAccumulator;
8use Wikimedia\Diff\WordLevelDiff;
9
10/**
11 * @license GPL-2.0-or-later
12 * @author Christoph Jauera <christoph.jauera@wikimedia.de>
13 */
14class AnnotatedHtmlDiffFormatter {
15
16    /**
17     * @param string[] $oldLines
18     * @param string[] $newLines
19     * @param string[] $preSaveTransformedLines
20     *
21     * @throws ComplexityException
22     * @return array[] List of changes, each of which include an HTML representation of the diff,
23     *  and the original wikitext. Note the HTML does not use <br> but relies on `white-space:
24     *  pre-line` being set!
25     * TODO: "preSavedTransformedLines" is still warty.
26     */
27    public function format(
28        array $oldLines,
29        array $newLines,
30        array $preSaveTransformedLines
31    ): array {
32        $changes = [];
33        $oldLine = 0;
34        $newLine = 0;
35        $diff = new Diff( $oldLines, $preSaveTransformedLines );
36
37        foreach ( $diff->getEdits() as $edit ) {
38            switch ( $edit->getType() ) {
39                case 'add':
40                    $changes[] = [
41                        'action' => 'add',
42                        'oldhtml' => "\u{00A0}",
43                        'oldtext' => null,
44                        'newhtml' => '<ins class="mw-twocolconflict-diffchange">' .
45                            $this->composeHtml( $edit->getClosing() ) . '</ins>',
46                        'newtext' => implode( "\n",
47                            array_slice( $newLines, $newLine, $edit->nclosing() ) ),
48                    ];
49                    break;
50
51                case 'delete':
52                    $changes[] = [
53                        'action' => 'delete',
54                        'oldhtml' => '<del class="mw-twocolconflict-diffchange">' .
55                            $this->composeHtml( $edit->getOrig() ) . '</del>',
56                        'oldtext' => implode( "\n", $edit->getOrig() ),
57                        'newhtml' => "\u{00A0}",
58                        'newtext' => null,
59                    ];
60                    break;
61
62                case 'change':
63                    $wordLevelDiff = $this->rTrimmedWordLevelDiff( $edit->getOrig(), $edit->getClosing() );
64                    $changes[] = [
65                        'action' => 'change',
66                        'oldhtml' => $this->getOriginalInlineDiff( $wordLevelDiff ),
67                        'oldtext' => implode( "\n", $edit->getOrig() ),
68                        'newhtml' => $this->getClosingInlineDiff( $wordLevelDiff ),
69                        'newtext' => implode( "\n",
70                            array_slice( $newLines, $newLine, $edit->nclosing() ) ),
71                    ];
72                    break;
73
74                case 'copy':
75                    $changes[] = [
76                        'action' => 'copy',
77                        // Warning, this must be unescaped Wikitext, not escaped HTML!
78                        'copytext' => implode( "\n", $edit->getOrig() ),
79                    ];
80                    break;
81            }
82
83            $oldLine += $edit->norig();
84            $newLine += $edit->nclosing();
85        }
86
87        // Try to merge unchanged newline-only rows into a more meaningful row
88        foreach ( $changes as $i => $row ) {
89            if ( !isset( $row['copytext'] ) || trim( $row['copytext'], "\n" ) !== '' ) {
90                continue;
91            }
92
93            // Prefer adding extra empty lines to the end of the previous row
94            foreach ( [ -1, 1 ] as $offset ) {
95                if ( !isset( $changes[$i + $offset] ) ) {
96                    continue;
97                }
98
99                $target = &$changes[$i + $offset];
100                if ( isset( $target['oldtext'] ) && isset( $target['newtext'] ) ) {
101                    $extra = "\n" . $row['copytext'];
102                    if ( $offset < 0 ) {
103                        $target['oldtext'] .= $extra;
104                        $target['newtext'] .= $extra;
105                    } else {
106                        $target['oldtext'] = $extra . $target['oldtext'];
107                        $target['newtext'] = $extra . $target['newtext'];
108                    }
109                    unset( $changes[$i] );
110                    break;
111                }
112            }
113        }
114
115        return array_values( $changes );
116    }
117
118    /**
119     * @param string[] $before
120     * @param string[] $after
121     *
122     * @return WordLevelDiff
123     */
124    private function rTrimmedWordLevelDiff( array $before, array $after ): WordLevelDiff {
125        end( $before );
126        end( $after );
127        $this->commonRTrim( $before[key( $before )], $after[key( $after )] );
128        return new WordLevelDiff( $before, $after );
129    }
130
131    /**
132     * Trims identical sequences of whitespace from the end of both lines.
133     *
134     * @param string &$before
135     * @param string &$after
136     */
137    private function commonRTrim( string &$before, string &$after ): void {
138        $uncommonBefore = strlen( $before );
139        $uncommonAfter = strlen( $after );
140        while ( $uncommonBefore > 0 &&
141            $uncommonAfter > 0 &&
142            $before[$uncommonBefore - 1] === $after[$uncommonAfter - 1] &&
143            ctype_space( $after[$uncommonAfter - 1] )
144        ) {
145            $uncommonBefore--;
146            $uncommonAfter--;
147        }
148        $before = substr( $before, 0, $uncommonBefore );
149        $after = substr( $after, 0, $uncommonAfter );
150    }
151
152    /**
153     * Composes lines from a WordLevelDiff and marks removed words.
154     *
155     * @param WordLevelDiff $diff
156     *
157     * @return string Composed HTML string with inline markup
158     */
159    private function getOriginalInlineDiff( WordLevelDiff $diff ): string {
160        $wordAccumulator = $this->getWordAccumulator();
161
162        foreach ( $diff->getEdits() as $edit ) {
163            if ( $edit->type === 'copy' ) {
164                $wordAccumulator->addWords( $edit->orig );
165            } elseif ( $edit->orig ) {
166                $wordAccumulator->addWords( $edit->orig, 'del' );
167            }
168        }
169        return implode( "\n", $wordAccumulator->getLines() );
170    }
171
172    /**
173     * Composes lines from a WordLevelDiff and marks added words.
174     *
175     * @param WordLevelDiff $diff
176     *
177     * @return string Composed HTML string with inline markup
178     */
179    private function getClosingInlineDiff( WordLevelDiff $diff ): string {
180        $wordAccumulator = $this->getWordAccumulator();
181
182        foreach ( $diff->getEdits() as $edit ) {
183            if ( $edit->type === 'copy' ) {
184                $wordAccumulator->addWords( $edit->closing );
185            } elseif ( $edit->closing ) {
186                $wordAccumulator->addWords( $edit->closing, 'ins' );
187            }
188        }
189        return implode( "\n", $wordAccumulator->getLines() );
190    }
191
192    /**
193     * @return WordAccumulator
194     */
195    private function getWordAccumulator(): WordAccumulator {
196        $wordAccumulator = new WordAccumulator();
197        $wordAccumulator->insClass = ' class="mw-twocolconflict-diffchange"';
198        $wordAccumulator->delClass = ' class="mw-twocolconflict-diffchange"';
199        return $wordAccumulator;
200    }
201
202    /**
203     * @param string[] $lines
204     *
205     * @return string HTML without <br>, relying on `white-space: pre-line` being set
206     */
207    private function composeHtml( array $lines ): string {
208        return htmlspecialchars( implode( "\n", array_map(
209            static function ( string $line ): string {
210                // Replace empty lines with a non-breaking space
211                return $line === '' ? "\u{00A0}" : $line;
212            },
213            $lines
214        ) ) );
215    }
216
217}