Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 350 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
DiffEngine | |
0.00% |
0 / 349 |
|
0.00% |
0 / 9 |
24806 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
diff | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
342 | |||
setBailoutComplexity | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
shiftBoundaries | |
0.00% |
0 / 42 |
|
0.00% |
0 / 1 |
992 | |||
diffInternal | |
0.00% |
0 / 73 |
|
0.00% |
0 / 1 |
930 | |||
lcs_rec | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
56 | |||
find_middle_snake | |
0.00% |
0 / 131 |
|
0.00% |
0 / 1 |
2756 | |||
findMostProgress | |
0.00% |
0 / 48 |
|
0.00% |
0 / 1 |
210 | |||
getLcsLength | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | /** |
3 | * New version of the difference engine |
4 | * |
5 | * Copyright © 2008 Guy Van den Broeck <guy@guyvdb.eu> |
6 | * |
7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; either version 2 of the License, or |
10 | * (at your option) any later version. |
11 | * |
12 | * This program is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | * GNU General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU General Public License |
18 | * along with this program; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
20 | * http://www.gnu.org/copyleft/gpl.html |
21 | * |
22 | * @file |
23 | * @ingroup DifferenceEngine |
24 | */ |
25 | |
26 | namespace Wikimedia\Diff; |
27 | |
28 | // FIXME: Don't use assert() in this file |
29 | // phpcs:disable MediaWiki.Usage.ForbiddenFunctions.assert |
30 | |
31 | /** |
32 | * This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which |
33 | * in turn is based on Myers' "An O(ND) difference algorithm and its variations" |
34 | * (http://citeseer.ist.psu.edu/myers86ond.html) with range compression (see Wu et al.'s |
35 | * "An O(NP) Sequence Comparison Algorithm"). |
36 | * |
37 | * This implementation supports an upper bound on the execution time. |
38 | * |
39 | * Some ideas (and a bit of code) are from analyze.c, from GNU |
40 | * diffutils-2.7, which can be found at: |
41 | * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz |
42 | * |
43 | * Complexity: O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M + N) space |
44 | * |
45 | * @author Guy Van den Broeck, Geoffrey T. Dairiki, Tim Starling |
46 | * @ingroup DifferenceEngine |
47 | */ |
48 | class DiffEngine { |
49 | |
50 | // Input variables |
51 | /** @var string[] */ |
52 | private $from; |
53 | /** @var string[] */ |
54 | private $to; |
55 | /** @var int */ |
56 | private $m; |
57 | /** @var int */ |
58 | private $n; |
59 | |
60 | /** @var int */ |
61 | private $tooLong; |
62 | /** @var float */ |
63 | private $powLimit; |
64 | |
65 | /** @var int */ |
66 | protected $bailoutComplexity = 0; |
67 | |
68 | // State variables |
69 | /** @var float */ |
70 | private $maxDifferences; |
71 | /** @var bool */ |
72 | private $lcsLengthCorrectedForHeuristic = false; |
73 | |
74 | // Output variables |
75 | /** @var int */ |
76 | public $length; |
77 | /** @var array */ |
78 | public $removed; |
79 | /** @var array */ |
80 | public $added; |
81 | /** @var bool */ |
82 | public $heuristicUsed; |
83 | |
84 | /** |
85 | * @param int $tooLong |
86 | * @param float $powLimit |
87 | */ |
88 | public function __construct( $tooLong = 2_000_000, $powLimit = 1.45 ) { |
89 | $this->tooLong = $tooLong; |
90 | $this->powLimit = $powLimit; |
91 | } |
92 | |
93 | /** |
94 | * Performs diff |
95 | * |
96 | * @param string[] $from_lines |
97 | * @param string[] $to_lines |
98 | * @throws ComplexityException |
99 | * |
100 | * @return DiffOp[] |
101 | */ |
102 | public function diff( $from_lines, $to_lines ) { |
103 | // Diff and store locally |
104 | $this->diffInternal( $from_lines, $to_lines ); |
105 | |
106 | // Merge edits when possible |
107 | $this->shiftBoundaries( $from_lines, $this->removed, $this->added ); |
108 | $this->shiftBoundaries( $to_lines, $this->added, $this->removed ); |
109 | |
110 | // Compute the edit operations. |
111 | $n_from = count( $from_lines ); |
112 | $n_to = count( $to_lines ); |
113 | |
114 | $edits = []; |
115 | $xi = $yi = 0; |
116 | while ( $xi < $n_from || $yi < $n_to ) { |
117 | assert( $yi < $n_to || $this->removed[$xi] ); |
118 | assert( $xi < $n_from || $this->added[$yi] ); |
119 | |
120 | // Skip matching "snake". |
121 | $copy = []; |
122 | while ( $xi < $n_from && $yi < $n_to |
123 | && !$this->removed[$xi] && !$this->added[$yi] |
124 | ) { |
125 | $copy[] = $from_lines[$xi++]; |
126 | ++$yi; |
127 | } |
128 | if ( $copy ) { |
129 | $edits[] = new DiffOpCopy( $copy ); |
130 | } |
131 | |
132 | // Find deletes & adds. |
133 | $delete = []; |
134 | while ( $xi < $n_from && $this->removed[$xi] ) { |
135 | $delete[] = $from_lines[$xi++]; |
136 | } |
137 | |
138 | $add = []; |
139 | while ( $yi < $n_to && $this->added[$yi] ) { |
140 | $add[] = $to_lines[$yi++]; |
141 | } |
142 | |
143 | if ( $delete && $add ) { |
144 | $edits[] = new DiffOpChange( $delete, $add ); |
145 | } elseif ( $delete ) { |
146 | $edits[] = new DiffOpDelete( $delete ); |
147 | } elseif ( $add ) { |
148 | $edits[] = new DiffOpAdd( $add ); |
149 | } |
150 | } |
151 | |
152 | return $edits; |
153 | } |
154 | |
155 | /** |
156 | * Sets the complexity (in comparison operations) that can't be exceeded |
157 | * @param int $value |
158 | */ |
159 | public function setBailoutComplexity( $value ) { |
160 | $this->bailoutComplexity = $value; |
161 | } |
162 | |
163 | /** |
164 | * Adjust inserts/deletes of identical lines to join changes |
165 | * as much as possible. |
166 | * |
167 | * We do something when a run of changed lines include a |
168 | * line at one end and has an excluded, identical line at the other. |
169 | * We are free to choose which identical line is included. |
170 | * `compareseq' usually chooses the one at the beginning, |
171 | * but usually it is cleaner to consider the following identical line |
172 | * to be the "change". |
173 | * |
174 | * This is extracted verbatim from analyze.c (GNU diffutils-2.7). |
175 | * |
176 | * @param string[] $lines |
177 | * @param string[] &$changed |
178 | * @param string[] $other_changed |
179 | */ |
180 | private function shiftBoundaries( array $lines, array &$changed, array $other_changed ) { |
181 | $i = 0; |
182 | $j = 0; |
183 | |
184 | assert( count( $lines ) == count( $changed ) ); |
185 | $len = count( $lines ); |
186 | $other_len = count( $other_changed ); |
187 | |
188 | while ( 1 ) { |
189 | /* |
190 | * Scan forwards to find beginning of another run of changes. |
191 | * Also keep track of the corresponding point in the other file. |
192 | * |
193 | * Throughout this code, $i and $j are adjusted together so that |
194 | * the first $i elements of $changed and the first $j elements |
195 | * of $other_changed both contain the same number of zeros |
196 | * (unchanged lines). |
197 | * Furthermore, $j is always kept so that $j == $other_len or |
198 | * $other_changed[$j] == false. |
199 | */ |
200 | while ( $j < $other_len && $other_changed[$j] ) { |
201 | $j++; |
202 | } |
203 | |
204 | while ( $i < $len && !$changed[$i] ) { |
205 | assert( $j < $other_len && !$other_changed[$j] ); |
206 | $i++; |
207 | $j++; |
208 | while ( $j < $other_len && $other_changed[$j] ) { |
209 | $j++; |
210 | } |
211 | } |
212 | |
213 | if ( $i == $len ) { |
214 | break; |
215 | } |
216 | |
217 | $start = $i; |
218 | |
219 | // Find the end of this run of changes. |
220 | while ( ++$i < $len && $changed[$i] ) { |
221 | continue; |
222 | } |
223 | |
224 | do { |
225 | /* |
226 | * Record the length of this run of changes, so that |
227 | * we can later determine whether the run has grown. |
228 | */ |
229 | $runlength = $i - $start; |
230 | |
231 | /* |
232 | * Move the changed region back, so long as the |
233 | * previous unchanged line matches the last changed one. |
234 | * This merges with previous changed regions. |
235 | */ |
236 | while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) { |
237 | $changed[--$start] = 1; |
238 | $changed[--$i] = false; |
239 | // @phan-suppress-next-line PhanPluginLoopVariableReuse |
240 | while ( $start > 0 && $changed[$start - 1] ) { |
241 | $start--; |
242 | } |
243 | assert( $j > 0 ); |
244 | while ( $other_changed[--$j] ) { |
245 | continue; |
246 | } |
247 | assert( $j >= 0 && !$other_changed[$j] ); |
248 | } |
249 | |
250 | /* |
251 | * Set CORRESPONDING to the end of the changed run, at the last |
252 | * point where it corresponds to a changed run in the other file. |
253 | * CORRESPONDING == LEN means no such point has been found. |
254 | */ |
255 | $corresponding = $j < $other_len ? $i : $len; |
256 | |
257 | /* |
258 | * Move the changed region forward, so long as the |
259 | * first changed line matches the following unchanged one. |
260 | * This merges with following changed regions. |
261 | * Do this second, so that if there are no merges, |
262 | * the changed region is moved forward as far as possible. |
263 | */ |
264 | while ( $i < $len && $lines[$start] == $lines[$i] ) { |
265 | $changed[$start++] = false; |
266 | $changed[$i++] = 1; |
267 | while ( $i < $len && $changed[$i] ) { |
268 | $i++; |
269 | } |
270 | |
271 | assert( $j < $other_len && !$other_changed[$j] ); |
272 | $j++; |
273 | if ( $j < $other_len && $other_changed[$j] ) { |
274 | $corresponding = $i; |
275 | while ( $j < $other_len && $other_changed[$j] ) { |
276 | $j++; |
277 | } |
278 | } |
279 | } |
280 | } while ( $runlength != $i - $start ); |
281 | |
282 | /* |
283 | * If possible, move the fully-merged run of changes |
284 | * back to a corresponding run in the other file. |
285 | */ |
286 | while ( $corresponding < $i ) { |
287 | $changed[--$start] = 1; |
288 | $changed[--$i] = 0; |
289 | assert( $j > 0 ); |
290 | while ( $other_changed[--$j] ) { |
291 | continue; |
292 | } |
293 | assert( $j >= 0 && !$other_changed[$j] ); |
294 | } |
295 | } |
296 | } |
297 | |
298 | /** |
299 | * @param string[] $from |
300 | * @param string[] $to |
301 | * @throws ComplexityException |
302 | */ |
303 | protected function diffInternal( array $from, array $to ) { |
304 | // remember initial lengths |
305 | $m = count( $from ); |
306 | $n = count( $to ); |
307 | |
308 | $this->heuristicUsed = false; |
309 | |
310 | // output |
311 | $removed = $m > 0 ? array_fill( 0, $m, true ) : []; |
312 | $added = $n > 0 ? array_fill( 0, $n, true ) : []; |
313 | |
314 | // reduce the complexity for the next step (intentionally done twice) |
315 | // remove common tokens at the start |
316 | $i = 0; |
317 | while ( $i < $m && $i < $n && $from[$i] === $to[$i] ) { |
318 | $removed[$i] = $added[$i] = false; |
319 | unset( $from[$i], $to[$i] ); |
320 | ++$i; |
321 | } |
322 | |
323 | // remove common tokens at the end |
324 | $j = 1; |
325 | while ( $i + $j <= $m && $i + $j <= $n && $from[$m - $j] === $to[$n - $j] ) { |
326 | $removed[$m - $j] = $added[$n - $j] = false; |
327 | unset( $from[$m - $j], $to[$n - $j] ); |
328 | ++$j; |
329 | } |
330 | |
331 | $this->from = $newFromIndex = $this->to = $newToIndex = []; |
332 | |
333 | // remove tokens not in both sequences |
334 | $shared = []; |
335 | foreach ( $from as $key ) { |
336 | $shared[$key] = false; |
337 | } |
338 | |
339 | foreach ( $to as $index => &$el ) { |
340 | if ( array_key_exists( $el, $shared ) ) { |
341 | // keep it |
342 | $this->to[] = $el; |
343 | $shared[$el] = true; |
344 | $newToIndex[] = $index; |
345 | } |
346 | } |
347 | foreach ( $from as $index => &$el ) { |
348 | if ( $shared[$el] ) { |
349 | // keep it |
350 | $this->from[] = $el; |
351 | $newFromIndex[] = $index; |
352 | } |
353 | } |
354 | |
355 | unset( $shared, $from, $to ); |
356 | |
357 | $this->m = count( $this->from ); |
358 | $this->n = count( $this->to ); |
359 | |
360 | if ( $this->bailoutComplexity > 0 && $this->m * $this->n > $this->bailoutComplexity ) { |
361 | throw new ComplexityException(); |
362 | } |
363 | |
364 | $this->removed = $this->m > 0 ? array_fill( 0, $this->m, true ) : []; |
365 | $this->added = $this->n > 0 ? array_fill( 0, $this->n, true ) : []; |
366 | |
367 | if ( $this->m == 0 || $this->n == 0 ) { |
368 | $this->length = 0; |
369 | } else { |
370 | $this->maxDifferences = ceil( ( $this->m + $this->n ) / 2.0 ); |
371 | if ( $this->m * $this->n > $this->tooLong ) { |
372 | // limit complexity to D^POW_LIMIT for long sequences |
373 | $this->maxDifferences = floor( $this->maxDifferences ** ( $this->powLimit - 1.0 ) ); |
374 | } |
375 | |
376 | /* |
377 | * The common prefixes and suffixes are always part of some LCS, include |
378 | * them now to reduce our search space |
379 | */ |
380 | $max = min( $this->m, $this->n ); |
381 | for ( $forwardBound = 0; $forwardBound < $max |
382 | && $this->from[$forwardBound] === $this->to[$forwardBound]; |
383 | ++$forwardBound |
384 | ) { |
385 | $this->removed[$forwardBound] = $this->added[$forwardBound] = false; |
386 | } |
387 | |
388 | $backBoundL1 = $this->m - 1; |
389 | $backBoundL2 = $this->n - 1; |
390 | |
391 | while ( $backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound |
392 | && $this->from[$backBoundL1] === $this->to[$backBoundL2] |
393 | ) { |
394 | $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false; |
395 | } |
396 | |
397 | $temp = array_fill( 0, $this->m + $this->n + 1, 0 ); |
398 | $V = [ $temp, $temp ]; |
399 | $snake = [ 0, 0, 0 ]; |
400 | |
401 | $this->length = $forwardBound + $this->m - $backBoundL1 - 1 |
402 | + $this->lcs_rec( |
403 | $forwardBound, |
404 | $backBoundL1, |
405 | $forwardBound, |
406 | $backBoundL2, |
407 | $V, |
408 | $snake |
409 | ); |
410 | } |
411 | |
412 | $this->m = $m; |
413 | $this->n = $n; |
414 | |
415 | $this->length += $i + $j - 1; |
416 | |
417 | foreach ( $this->removed as $key => &$removed_elem ) { |
418 | if ( !$removed_elem ) { |
419 | $removed[$newFromIndex[$key]] = false; |
420 | } |
421 | } |
422 | foreach ( $this->added as $key => &$added_elem ) { |
423 | if ( !$added_elem ) { |
424 | $added[$newToIndex[$key]] = false; |
425 | } |
426 | } |
427 | $this->removed = $removed; |
428 | $this->added = $added; |
429 | } |
430 | |
431 | /** |
432 | * @param int $bottoml1 |
433 | * @param int $topl1 |
434 | * @param int $bottoml2 |
435 | * @param int $topl2 |
436 | * @param array &$V |
437 | * @param array &$snake |
438 | * @return int |
439 | */ |
440 | private function lcs_rec( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake ) { |
441 | // check that both sequences are non-empty |
442 | if ( $bottoml1 > $topl1 || $bottoml2 > $topl2 ) { |
443 | return 0; |
444 | } |
445 | |
446 | $d = $this->find_middle_snake( $bottoml1, $topl1, $bottoml2, |
447 | $topl2, $V, $snake ); |
448 | |
449 | // need to store these so we don't lose them when they're |
450 | // overwritten by the recursion |
451 | [ $startx, $starty, $len ] = $snake; |
452 | |
453 | // the middle snake is part of the LCS, store it |
454 | for ( $i = 0; $i < $len; ++$i ) { |
455 | $this->removed[$startx + $i] = $this->added[$starty + $i] = false; |
456 | } |
457 | |
458 | if ( $d > 1 ) { |
459 | return $len |
460 | + $this->lcs_rec( $bottoml1, $startx - 1, $bottoml2, |
461 | $starty - 1, $V, $snake ) |
462 | + $this->lcs_rec( $startx + $len, $topl1, $starty + $len, |
463 | $topl2, $V, $snake ); |
464 | } elseif ( $d == 1 ) { |
465 | /* |
466 | * In this case the sequences differ by exactly 1 line. We have |
467 | * already saved all the lines after the difference in the for loop |
468 | * above, now we need to save all the lines before the difference. |
469 | */ |
470 | $max = min( $startx - $bottoml1, $starty - $bottoml2 ); |
471 | for ( $i = 0; $i < $max; ++$i ) { |
472 | $this->removed[$bottoml1 + $i] = |
473 | $this->added[$bottoml2 + $i] = false; |
474 | } |
475 | |
476 | return $max + $len; |
477 | } |
478 | |
479 | return $len; |
480 | } |
481 | |
482 | /** |
483 | * @param int $bottoml1 |
484 | * @param int $topl1 |
485 | * @param int $bottoml2 |
486 | * @param int $topl2 |
487 | * @param array &$V |
488 | * @param array &$snake |
489 | * @return int |
490 | */ |
491 | private function find_middle_snake( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake ) { |
492 | $from = &$this->from; |
493 | $to = &$this->to; |
494 | $V0 = &$V[0]; |
495 | $V1 = &$V[1]; |
496 | $snake0 = &$snake[0]; |
497 | $snake1 = &$snake[1]; |
498 | $snake2 = &$snake[2]; |
499 | $bottoml1_min_1 = $bottoml1 - 1; |
500 | $bottoml2_min_1 = $bottoml2 - 1; |
501 | $N = $topl1 - $bottoml1_min_1; |
502 | $M = $topl2 - $bottoml2_min_1; |
503 | $delta = $N - $M; |
504 | $maxabsx = $N + $bottoml1; |
505 | $maxabsy = $M + $bottoml2; |
506 | $limit = min( $this->maxDifferences, ceil( ( $N + $M ) / 2 ) ); |
507 | |
508 | // value_to_add_forward: a 0 or 1 that we add to the start |
509 | // offset to make it odd/even |
510 | if ( $M & 1 ) { |
511 | $value_to_add_forward = 1; |
512 | } else { |
513 | $value_to_add_forward = 0; |
514 | } |
515 | |
516 | if ( $N & 1 ) { |
517 | $value_to_add_backward = 1; |
518 | } else { |
519 | $value_to_add_backward = 0; |
520 | } |
521 | |
522 | $start_forward = -$M; |
523 | $end_forward = $N; |
524 | $start_backward = -$N; |
525 | $end_backward = $M; |
526 | |
527 | $limit_min_1 = $limit - 1; |
528 | $limit_plus_1 = $limit + 1; |
529 | |
530 | $V0[$limit_plus_1] = 0; |
531 | $V1[$limit_min_1] = $N; |
532 | $limit = min( $this->maxDifferences, ceil( ( $N + $M ) / 2 ) ); |
533 | |
534 | if ( $delta & 1 ) { |
535 | for ( $d = 0; $d <= $limit; ++$d ) { |
536 | $start_diag = max( $value_to_add_forward + $start_forward, -$d ); |
537 | $end_diag = min( $end_forward, $d ); |
538 | $value_to_add_forward = 1 - $value_to_add_forward; |
539 | |
540 | // compute forward furthest reaching paths |
541 | for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) { |
542 | if ( $k == -$d || ( $k < $d |
543 | && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k] ) |
544 | ) { |
545 | $x = $V0[$limit_plus_1 + $k]; |
546 | } else { |
547 | $x = $V0[$limit_min_1 + $k] + 1; |
548 | } |
549 | |
550 | $absx = $snake0 = $x + $bottoml1; |
551 | $absy = $snake1 = $x - $k + $bottoml2; |
552 | |
553 | while ( $absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy] ) { |
554 | ++$absx; |
555 | ++$absy; |
556 | } |
557 | $x = $absx - $bottoml1; |
558 | |
559 | $snake2 = $absx - $snake0; |
560 | $V0[$limit + $k] = $x; |
561 | if ( $k >= $delta - $d + 1 && $k <= $delta + $d - 1 |
562 | && $x >= $V1[$limit + $k - $delta] |
563 | ) { |
564 | return 2 * $d - 1; |
565 | } |
566 | |
567 | // check to see if we can cut down the diagonal range |
568 | if ( $x >= $N && $end_forward > $k - 1 ) { |
569 | $end_forward = $k - 1; |
570 | } elseif ( $absy - $bottoml2 >= $M ) { |
571 | $start_forward = $k + 1; |
572 | $value_to_add_forward = 0; |
573 | } |
574 | } |
575 | |
576 | $start_diag = max( $value_to_add_backward + $start_backward, -$d ); |
577 | $end_diag = min( $end_backward, $d ); |
578 | $value_to_add_backward = 1 - $value_to_add_backward; |
579 | |
580 | // compute backward furthest reaching paths |
581 | for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) { |
582 | if ( $k == $d |
583 | || ( $k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k] ) |
584 | ) { |
585 | $x = $V1[$limit_min_1 + $k]; |
586 | } else { |
587 | $x = $V1[$limit_plus_1 + $k] - 1; |
588 | } |
589 | |
590 | $y = $x - $k - $delta; |
591 | |
592 | $snake2 = 0; |
593 | while ( $x > 0 && $y > 0 |
594 | && $from[$x + $bottoml1_min_1] === $to[$y + $bottoml2_min_1] |
595 | ) { |
596 | --$x; |
597 | --$y; |
598 | ++$snake2; |
599 | } |
600 | $V1[$limit + $k] = $x; |
601 | |
602 | // check to see if we can cut down our diagonal range |
603 | if ( $x <= 0 ) { |
604 | $start_backward = $k + 1; |
605 | $value_to_add_backward = 0; |
606 | } elseif ( $y <= 0 && $end_backward > $k - 1 ) { |
607 | $end_backward = $k - 1; |
608 | } |
609 | } |
610 | } |
611 | } else { |
612 | for ( $d = 0; $d <= $limit; ++$d ) { |
613 | $start_diag = max( $value_to_add_forward + $start_forward, -$d ); |
614 | $end_diag = min( $end_forward, $d ); |
615 | $value_to_add_forward = 1 - $value_to_add_forward; |
616 | |
617 | // compute forward furthest reaching paths |
618 | for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) { |
619 | if ( $k == -$d |
620 | || ( $k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k] ) |
621 | ) { |
622 | $x = $V0[$limit_plus_1 + $k]; |
623 | } else { |
624 | $x = $V0[$limit_min_1 + $k] + 1; |
625 | } |
626 | |
627 | $absx = $snake0 = $x + $bottoml1; |
628 | $absy = $snake1 = $x - $k + $bottoml2; |
629 | |
630 | while ( $absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy] ) { |
631 | ++$absx; |
632 | ++$absy; |
633 | } |
634 | $x = $absx - $bottoml1; |
635 | $snake2 = $absx - $snake0; |
636 | $V0[$limit + $k] = $x; |
637 | |
638 | // check to see if we can cut down the diagonal range |
639 | if ( $x >= $N && $end_forward > $k - 1 ) { |
640 | $end_forward = $k - 1; |
641 | } elseif ( $absy - $bottoml2 >= $M ) { |
642 | $start_forward = $k + 1; |
643 | $value_to_add_forward = 0; |
644 | } |
645 | } |
646 | |
647 | $start_diag = max( $value_to_add_backward + $start_backward, -$d ); |
648 | $end_diag = min( $end_backward, $d ); |
649 | $value_to_add_backward = 1 - $value_to_add_backward; |
650 | |
651 | // compute backward furthest reaching paths |
652 | for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) { |
653 | if ( $k == $d |
654 | || ( $k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k] ) |
655 | ) { |
656 | $x = $V1[$limit_min_1 + $k]; |
657 | } else { |
658 | $x = $V1[$limit_plus_1 + $k] - 1; |
659 | } |
660 | |
661 | $y = $x - $k - $delta; |
662 | |
663 | $snake2 = 0; |
664 | while ( $x > 0 && $y > 0 |
665 | && $from[$x + $bottoml1_min_1] === $to[$y + $bottoml2_min_1] |
666 | ) { |
667 | --$x; |
668 | --$y; |
669 | ++$snake2; |
670 | } |
671 | $V1[$limit + $k] = $x; |
672 | |
673 | if ( $k >= -$delta - $d && $k <= $d - $delta |
674 | && $x <= $V0[$limit + $k + $delta] |
675 | ) { |
676 | $snake0 = $bottoml1 + $x; |
677 | $snake1 = $bottoml2 + $y; |
678 | |
679 | return 2 * $d; |
680 | } |
681 | |
682 | // check to see if we can cut down our diagonal range |
683 | if ( $x <= 0 ) { |
684 | $start_backward = $k + 1; |
685 | $value_to_add_backward = 0; |
686 | } elseif ( $y <= 0 && $end_backward > $k - 1 ) { |
687 | $end_backward = $k - 1; |
688 | } |
689 | } |
690 | } |
691 | } |
692 | /* |
693 | * computing the true LCS is too expensive, instead find the diagonal |
694 | * with the most progress and pretend a middle snake of length 0 occurs |
695 | * there. |
696 | */ |
697 | |
698 | $most_progress = self::findMostProgress( $M, $N, $limit, $V ); |
699 | |
700 | $snake0 = $bottoml1 + $most_progress[0]; |
701 | $snake1 = $bottoml2 + $most_progress[1]; |
702 | $snake2 = 0; |
703 | // Computing the LCS is too expensive. Using a heuristic. |
704 | $this->heuristicUsed = true; |
705 | |
706 | return 5; /* |
707 | * HACK: since we didn't really finish the LCS computation |
708 | * we don't really know the length of the SES. We don't do |
709 | * anything with the result anyway, unless it's <=1. We know |
710 | * for a fact SES > 1 so 5 is as good a number as any to |
711 | * return here |
712 | */ |
713 | } |
714 | |
715 | /** |
716 | * @param int $M |
717 | * @param int $N |
718 | * @param int $limit |
719 | * @param array $V |
720 | * @return array |
721 | */ |
722 | private static function findMostProgress( $M, $N, $limit, $V ) { |
723 | $delta = $N - $M; |
724 | |
725 | if ( ( $M & 1 ) == ( $limit & 1 ) ) { |
726 | $forward_start_diag = max( -$M, -$limit ); |
727 | } else { |
728 | $forward_start_diag = max( 1 - $M, -$limit ); |
729 | } |
730 | |
731 | $forward_end_diag = min( $N, $limit ); |
732 | |
733 | if ( ( $N & 1 ) == ( $limit & 1 ) ) { |
734 | $backward_start_diag = max( -$N, -$limit ); |
735 | } else { |
736 | $backward_start_diag = max( 1 - $N, -$limit ); |
737 | } |
738 | |
739 | $backward_end_diag = -min( $M, $limit ); |
740 | |
741 | $temp = [ 0, 0, 0 ]; |
742 | |
743 | $max_progress = array_fill( 0, ceil( max( $forward_end_diag - $forward_start_diag, |
744 | $backward_end_diag - $backward_start_diag ) / 2 ), $temp ); |
745 | $num_progress = 0; // the 1st entry is current, it is initialized |
746 | // with 0s |
747 | |
748 | // first search the forward diagonals |
749 | for ( $k = $forward_start_diag; $k <= $forward_end_diag; $k += 2 ) { |
750 | $x = $V[0][$limit + $k]; |
751 | $y = $x - $k; |
752 | if ( $x > $N || $y > $M ) { |
753 | continue; |
754 | } |
755 | |
756 | $progress = $x + $y; |
757 | if ( $progress > $max_progress[0][2] ) { |
758 | $num_progress = 0; |
759 | $max_progress[0][0] = $x; |
760 | $max_progress[0][1] = $y; |
761 | $max_progress[0][2] = $progress; |
762 | } elseif ( $progress == $max_progress[0][2] ) { |
763 | ++$num_progress; |
764 | $max_progress[$num_progress][0] = $x; |
765 | $max_progress[$num_progress][1] = $y; |
766 | $max_progress[$num_progress][2] = $progress; |
767 | } |
768 | } |
769 | |
770 | $max_progress_forward = true; // initially the maximum |
771 | // progress is in the forward |
772 | // direction |
773 | |
774 | // now search the backward diagonals |
775 | for ( $k = $backward_start_diag; $k <= $backward_end_diag; $k += 2 ) { |
776 | $x = $V[1][$limit + $k]; |
777 | $y = $x - $k - $delta; |
778 | if ( $x < 0 || $y < 0 ) { |
779 | continue; |
780 | } |
781 | |
782 | $progress = $N - $x + $M - $y; |
783 | if ( $progress > $max_progress[0][2] ) { |
784 | $num_progress = 0; |
785 | $max_progress_forward = false; |
786 | $max_progress[0][0] = $x; |
787 | $max_progress[0][1] = $y; |
788 | $max_progress[0][2] = $progress; |
789 | } elseif ( $progress == $max_progress[0][2] && !$max_progress_forward ) { |
790 | ++$num_progress; |
791 | $max_progress[$num_progress][0] = $x; |
792 | $max_progress[$num_progress][1] = $y; |
793 | $max_progress[$num_progress][2] = $progress; |
794 | } |
795 | } |
796 | |
797 | // return the middle diagonal with maximal progress. |
798 | return $max_progress[(int)floor( $num_progress / 2 )]; |
799 | } |
800 | |
801 | /** |
802 | * @return int |
803 | */ |
804 | public function getLcsLength() { |
805 | if ( $this->heuristicUsed && !$this->lcsLengthCorrectedForHeuristic ) { |
806 | $this->lcsLengthCorrectedForHeuristic = true; |
807 | $this->length = $this->m - array_sum( $this->added ); |
808 | } |
809 | |
810 | return $this->length; |
811 | } |
812 | |
813 | } |
814 | |
815 | /** @deprecated class alias since 1.41 */ |
816 | class_alias( DiffEngine::class, 'DiffEngine' ); |