Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
86.54% |
45 / 52 |
|
75.00% |
9 / 12 |
CRAP | |
0.00% |
0 / 1 |
DiffMatrix | |
86.54% |
45 / 52 |
|
75.00% |
9 / 12 |
20.98 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
calculateDiffMatrix | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
3 | |||
getDiffOps | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
hasDiffOps | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
getEditCountByRow | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getEditCountByCol | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getIndicesOfRemovedItems | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getIndicesOfAddedItems | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getIndicesOfMax | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
getNormalizer | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
zeroArray | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
__toString | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | /** |
3 | * WikiLambda DiffMatrix utility class to compute differences |
4 | * between two lists with different number of items. |
5 | * |
6 | * @file |
7 | * @ingroup Extensions |
8 | * @copyright 2020– Abstract Wikipedia team; see AUTHORS.txt |
9 | * @license MIT |
10 | */ |
11 | |
12 | namespace MediaWiki\Extension\WikiLambda\Diff; |
13 | |
14 | use Diff\DiffOp\Diff\Diff; |
15 | use Diff\DiffOp\DiffOp; |
16 | |
17 | class DiffMatrix { |
18 | |
19 | private ZObjectDiffer $zObjectDiffer; |
20 | |
21 | /** @var array */ |
22 | private $oldArray; |
23 | |
24 | /** @var array */ |
25 | private $newArray; |
26 | |
27 | /** @var array Matrix with all the DiffOps found for every combination of old and new items. */ |
28 | private $diffMatrix = []; |
29 | |
30 | /** @var array Matrix with all the DiffOps count for every combination of old and new items. */ |
31 | private $diffCountMatrix = []; |
32 | |
33 | /** @var array List of sums of edit counts for every row. */ |
34 | private $editCountByRow; |
35 | |
36 | /** @var array List of sums of edit counts for every column. */ |
37 | private $editCountByCol; |
38 | |
39 | /** |
40 | * Creates a DiffMatrix object between an array of old values and an array of new values. |
41 | * This class also exposes utilities to operate on row and col edit counts and find items |
42 | * that have been deleted, added or changed positions. |
43 | * |
44 | * @param ZObjectDiffer $zObjectDiffer injected ZObjectDiffer to calculate diff between list items |
45 | * @param array $oldArray |
46 | * @param array $newArray |
47 | */ |
48 | public function __construct( ZObjectDiffer $zObjectDiffer, array $oldArray, array $newArray ) { |
49 | $this->zObjectDiffer = $zObjectDiffer; |
50 | $this->oldArray = $oldArray; |
51 | $this->newArray = $newArray; |
52 | $this->calculateDiffMatrix(); |
53 | } |
54 | |
55 | /** |
56 | * Iterates over rows and colums and generates both |
57 | * the matrix of DiffOps and the matrix of edit counts. |
58 | */ |
59 | private function calculateDiffMatrix(): void { |
60 | // Initialize edit count by row and by column |
61 | $this->editCountByRow = $this->zeroArray( count( $this->oldArray ) ); |
62 | $this->editCountByCol = $this->zeroArray( count( $this->newArray ) ); |
63 | |
64 | // For every old item... |
65 | for ( $i = 0; $i < count( $this->oldArray ); $i++ ) { |
66 | $oldItem = $this->oldArray[ $i ]; |
67 | |
68 | // ... we calculate its diff with every new item. |
69 | for ( $j = 0; $j < count( $this->newArray ); $j++ ) { |
70 | $newItem = $this->newArray[ $j ]; |
71 | $itemDiff = $this->zObjectDiffer->doDiff( $oldItem, $newItem ); |
72 | $itemEditCount = count( $itemDiff ); |
73 | |
74 | // We set the diff and diff count collections of this class |
75 | $this->diffMatrix[ $i ][ $j ] = $itemDiff; |
76 | $this->diffCountMatrix[ $i ][ $j ] = $itemEditCount; |
77 | $this->editCountByRow[ $i ] += $itemEditCount; |
78 | $this->editCountByCol[ $j ] += $itemEditCount; |
79 | } |
80 | } |
81 | } |
82 | |
83 | /** |
84 | * Get the set of DiffOps saved in the matrix by row and column indices |
85 | * |
86 | * @param int $row |
87 | * @param int $col |
88 | * @return DiffOp |
89 | */ |
90 | public function getDiffOps( int $row, int $col ): DiffOp { |
91 | return ( |
92 | ( $row >= count( $this->diffMatrix ) ) || |
93 | ( $col >= count( $this->diffMatrix[ $row ] ) ) |
94 | ) ? new Diff( [] ) : $this->diffMatrix[ $row ][ $col ]; |
95 | } |
96 | |
97 | /** |
98 | * Whether the matrix position given by row and colum registers |
99 | * any diffs or not |
100 | * |
101 | * @param int $row |
102 | * @param int $col |
103 | * @return bool |
104 | */ |
105 | public function hasDiffOps( int $row, int $col ): bool { |
106 | return ( |
107 | ( $row >= count( $this->diffCountMatrix ) ) || |
108 | ( $col >= count( $this->diffCountMatrix[ $row ] ) ) |
109 | ) ? false : ( $this->diffCountMatrix[ $row ][ $col ] > 0 ); |
110 | } |
111 | |
112 | /** |
113 | * Get the set of edit counts for every row. |
114 | * |
115 | * @return int[] |
116 | */ |
117 | public function getEditCountByRow(): array { |
118 | return $this->editCountByRow; |
119 | } |
120 | |
121 | /** |
122 | * Get the set of edit counts for every column. |
123 | * |
124 | * @return int[] |
125 | */ |
126 | public function getEditCountByCol(): array { |
127 | return $this->editCountByCol; |
128 | } |
129 | |
130 | /** |
131 | * Return the indices of the rows (old values) that were most edited, |
132 | * which will be the ones most likely removed in the case that oldArray |
133 | * has more items than newArray. |
134 | * The number of indices returned is always the difference between |
135 | * number of old items and number of new items. |
136 | * |
137 | * @return int[] |
138 | */ |
139 | public function getIndicesOfRemovedItems(): array { |
140 | $numItems = count( $this->oldArray ) - count( $this->newArray ); |
141 | return $this->getIndicesOfMax( $this->editCountByRow, $numItems ); |
142 | } |
143 | |
144 | /** |
145 | * Return the indices of the cols (new values) that were most edited, |
146 | * which will be the ones most likely added in the case that oldArray |
147 | * has less items than newArray. |
148 | * The number of indices returned is always the difference between |
149 | * number of new items and number of old items. |
150 | * |
151 | * @return int[] |
152 | */ |
153 | public function getIndicesOfAddedItems(): array { |
154 | $numItems = count( $this->newArray ) - count( $this->oldArray ); |
155 | return $this->getIndicesOfMax( $this->editCountByCol, $numItems ); |
156 | } |
157 | |
158 | /** |
159 | * Helper function to get the indices of the n highest values from a |
160 | * given array. In case of two equal values, the returned index will |
161 | * be the first one found. |
162 | * |
163 | * @param int[] $vector |
164 | * @param int $numItems |
165 | * @return array |
166 | */ |
167 | private function getIndicesOfMax( array $vector, int $numItems ): array { |
168 | $vectorCopy = array_merge( [], $vector ); |
169 | uasort( |
170 | $vectorCopy, |
171 | static function ( int $a, int $b ) { |
172 | return $b <=> $a; |
173 | } |
174 | ); |
175 | return array_slice( array_keys( $vectorCopy ), 0, $numItems ); |
176 | } |
177 | |
178 | /** |
179 | * Return integer that calculates the correct row or column index |
180 | * to access a particular matrix element depending on the items |
181 | * that have been removed or added in the diff. |
182 | * |
183 | * @param int[] $indices |
184 | * @param int $index |
185 | * @return int |
186 | */ |
187 | public function getNormalizer( array $indices, int $index ): int { |
188 | return count( array_filter( |
189 | $indices, static function ( int $i ) use ( $index ) { |
190 | return ( $i < $index ); |
191 | } |
192 | ) ); |
193 | } |
194 | |
195 | /** |
196 | * Return an array of n number of zeros. This function helps us |
197 | * initialize the arrays of edit count by row and by column. |
198 | * |
199 | * @param int $n |
200 | * @return int[] |
201 | */ |
202 | private function zeroArray( int $n ): array { |
203 | $zeroArray = []; |
204 | for ( $i = 0; $i < $n; $i++ ) { |
205 | $zeroArray[] = 0; |
206 | } |
207 | return $zeroArray; |
208 | } |
209 | |
210 | /** |
211 | * Returns a string representing the matrix of edit counts. |
212 | * |
213 | * @return string |
214 | */ |
215 | public function __toString(): string { |
216 | $string = ""; |
217 | foreach ( $this->diffCountMatrix as $row ) { |
218 | $string .= json_encode( $row ); |
219 | $string .= "\n"; |
220 | } |
221 | return $string; |
222 | } |
223 | } |