Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 350 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
| DiffEngine | |
0.00% |
0 / 349 |
|
0.00% |
0 / 9 |
24806 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| diff | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
342 | |||
| setBailoutComplexity | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| shiftBoundaries | |
0.00% |
0 / 42 |
|
0.00% |
0 / 1 |
992 | |||
| diffInternal | |
0.00% |
0 / 73 |
|
0.00% |
0 / 1 |
930 | |||
| lcs_rec | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
56 | |||
| find_middle_snake | |
0.00% |
0 / 131 |
|
0.00% |
0 / 1 |
2756 | |||
| findMostProgress | |
0.00% |
0 / 48 |
|
0.00% |
0 / 1 |
210 | |||
| getLcsLength | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * New version of the difference engine |
| 4 | * |
| 5 | * Copyright © 2008 Guy Van den Broeck <guy@guyvdb.eu> |
| 6 | * |
| 7 | * @license GPL-2.0-or-later |
| 8 | * @file |
| 9 | * @ingroup DifferenceEngine |
| 10 | */ |
| 11 | |
| 12 | namespace Wikimedia\Diff; |
| 13 | |
| 14 | // FIXME: Don't use assert() in this file |
| 15 | // phpcs:disable MediaWiki.Usage.ForbiddenFunctions.assert |
| 16 | |
| 17 | /** |
| 18 | * This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which |
| 19 | * in turn is based on Myers' "An O(ND) difference algorithm and its variations" |
| 20 | * (http://citeseer.ist.psu.edu/myers86ond.html) with range compression (see Wu et al.'s |
| 21 | * "An O(NP) Sequence Comparison Algorithm"). |
| 22 | * |
| 23 | * This implementation supports an upper bound on the execution time. |
| 24 | * |
| 25 | * Some ideas (and a bit of code) are from analyze.c, from GNU |
| 26 | * diffutils-2.7, which can be found at: |
| 27 | * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz |
| 28 | * |
| 29 | * Complexity: O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M + N) space |
| 30 | * |
| 31 | * @author Guy Van den Broeck, Geoffrey T. Dairiki, Tim Starling |
| 32 | * @ingroup DifferenceEngine |
| 33 | */ |
| 34 | class DiffEngine { |
| 35 | |
| 36 | // Input variables |
| 37 | /** @var string[] */ |
| 38 | private $from; |
| 39 | /** @var string[] */ |
| 40 | private $to; |
| 41 | /** @var int */ |
| 42 | private $m; |
| 43 | /** @var int */ |
| 44 | private $n; |
| 45 | |
| 46 | /** @var int */ |
| 47 | private $tooLong; |
| 48 | /** @var float */ |
| 49 | private $powLimit; |
| 50 | |
| 51 | /** @var int */ |
| 52 | protected $bailoutComplexity = 0; |
| 53 | |
| 54 | // State variables |
| 55 | /** @var float */ |
| 56 | private $maxDifferences; |
| 57 | /** @var bool */ |
| 58 | private $lcsLengthCorrectedForHeuristic = false; |
| 59 | |
| 60 | // Output variables |
| 61 | /** @var int */ |
| 62 | public $length; |
| 63 | /** @var array */ |
| 64 | public $removed; |
| 65 | /** @var array */ |
| 66 | public $added; |
| 67 | /** @var bool */ |
| 68 | public $heuristicUsed; |
| 69 | |
| 70 | /** |
| 71 | * @param int $tooLong |
| 72 | * @param float $powLimit |
| 73 | */ |
| 74 | public function __construct( $tooLong = 2_000_000, $powLimit = 1.45 ) { |
| 75 | $this->tooLong = $tooLong; |
| 76 | $this->powLimit = $powLimit; |
| 77 | } |
| 78 | |
| 79 | /** |
| 80 | * Performs diff |
| 81 | * |
| 82 | * @param string[] $from_lines |
| 83 | * @param string[] $to_lines |
| 84 | * @throws ComplexityException |
| 85 | * |
| 86 | * @return DiffOp[] |
| 87 | */ |
| 88 | public function diff( $from_lines, $to_lines ) { |
| 89 | // Diff and store locally |
| 90 | $this->diffInternal( $from_lines, $to_lines ); |
| 91 | |
| 92 | // Merge edits when possible |
| 93 | $this->shiftBoundaries( $from_lines, $this->removed, $this->added ); |
| 94 | $this->shiftBoundaries( $to_lines, $this->added, $this->removed ); |
| 95 | |
| 96 | // Compute the edit operations. |
| 97 | $n_from = count( $from_lines ); |
| 98 | $n_to = count( $to_lines ); |
| 99 | |
| 100 | $edits = []; |
| 101 | $xi = $yi = 0; |
| 102 | while ( $xi < $n_from || $yi < $n_to ) { |
| 103 | assert( $yi < $n_to || $this->removed[$xi] ); |
| 104 | assert( $xi < $n_from || $this->added[$yi] ); |
| 105 | |
| 106 | // Skip matching "snake". |
| 107 | $copy = []; |
| 108 | while ( $xi < $n_from && $yi < $n_to |
| 109 | && !$this->removed[$xi] && !$this->added[$yi] |
| 110 | ) { |
| 111 | $copy[] = $from_lines[$xi++]; |
| 112 | ++$yi; |
| 113 | } |
| 114 | if ( $copy ) { |
| 115 | $edits[] = new DiffOpCopy( $copy ); |
| 116 | } |
| 117 | |
| 118 | // Find deletes & adds. |
| 119 | $delete = []; |
| 120 | while ( $xi < $n_from && $this->removed[$xi] ) { |
| 121 | $delete[] = $from_lines[$xi++]; |
| 122 | } |
| 123 | |
| 124 | $add = []; |
| 125 | while ( $yi < $n_to && $this->added[$yi] ) { |
| 126 | $add[] = $to_lines[$yi++]; |
| 127 | } |
| 128 | |
| 129 | if ( $delete && $add ) { |
| 130 | $edits[] = new DiffOpChange( $delete, $add ); |
| 131 | } elseif ( $delete ) { |
| 132 | $edits[] = new DiffOpDelete( $delete ); |
| 133 | } elseif ( $add ) { |
| 134 | $edits[] = new DiffOpAdd( $add ); |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | return $edits; |
| 139 | } |
| 140 | |
| 141 | /** |
| 142 | * Sets the complexity (in comparison operations) that can't be exceeded |
| 143 | * @param int $value |
| 144 | */ |
| 145 | public function setBailoutComplexity( $value ) { |
| 146 | $this->bailoutComplexity = $value; |
| 147 | } |
| 148 | |
| 149 | /** |
| 150 | * Adjust inserts/deletes of identical lines to join changes |
| 151 | * as much as possible. |
| 152 | * |
| 153 | * We do something when a run of changed lines include a |
| 154 | * line at one end and has an excluded, identical line at the other. |
| 155 | * We are free to choose which identical line is included. |
| 156 | * `compareseq' usually chooses the one at the beginning, |
| 157 | * but usually it is cleaner to consider the following identical line |
| 158 | * to be the "change". |
| 159 | * |
| 160 | * This is extracted verbatim from analyze.c (GNU diffutils-2.7). |
| 161 | * |
| 162 | * @param string[] $lines |
| 163 | * @param string[] &$changed |
| 164 | * @param string[] $other_changed |
| 165 | */ |
| 166 | private function shiftBoundaries( array $lines, array &$changed, array $other_changed ) { |
| 167 | $i = 0; |
| 168 | $j = 0; |
| 169 | |
| 170 | assert( count( $lines ) == count( $changed ) ); |
| 171 | $len = count( $lines ); |
| 172 | $other_len = count( $other_changed ); |
| 173 | |
| 174 | while ( 1 ) { |
| 175 | /* |
| 176 | * Scan forwards to find beginning of another run of changes. |
| 177 | * Also keep track of the corresponding point in the other file. |
| 178 | * |
| 179 | * Throughout this code, $i and $j are adjusted together so that |
| 180 | * the first $i elements of $changed and the first $j elements |
| 181 | * of $other_changed both contain the same number of zeros |
| 182 | * (unchanged lines). |
| 183 | * Furthermore, $j is always kept so that $j == $other_len or |
| 184 | * $other_changed[$j] == false. |
| 185 | */ |
| 186 | while ( $j < $other_len && $other_changed[$j] ) { |
| 187 | $j++; |
| 188 | } |
| 189 | |
| 190 | while ( $i < $len && !$changed[$i] ) { |
| 191 | assert( $j < $other_len && !$other_changed[$j] ); |
| 192 | $i++; |
| 193 | $j++; |
| 194 | while ( $j < $other_len && $other_changed[$j] ) { |
| 195 | $j++; |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | if ( $i == $len ) { |
| 200 | break; |
| 201 | } |
| 202 | |
| 203 | $start = $i; |
| 204 | |
| 205 | // Find the end of this run of changes. |
| 206 | while ( ++$i < $len && $changed[$i] ) { |
| 207 | continue; |
| 208 | } |
| 209 | |
| 210 | do { |
| 211 | /* |
| 212 | * Record the length of this run of changes, so that |
| 213 | * we can later determine whether the run has grown. |
| 214 | */ |
| 215 | $runlength = $i - $start; |
| 216 | |
| 217 | /* |
| 218 | * Move the changed region back, so long as the |
| 219 | * previous unchanged line matches the last changed one. |
| 220 | * This merges with previous changed regions. |
| 221 | */ |
| 222 | while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) { |
| 223 | $changed[--$start] = 1; |
| 224 | $changed[--$i] = false; |
| 225 | // @phan-suppress-next-line PhanPluginLoopVariableReuse |
| 226 | while ( $start > 0 && $changed[$start - 1] ) { |
| 227 | $start--; |
| 228 | } |
| 229 | assert( $j > 0 ); |
| 230 | while ( $other_changed[--$j] ) { |
| 231 | continue; |
| 232 | } |
| 233 | assert( $j >= 0 && !$other_changed[$j] ); |
| 234 | } |
| 235 | |
| 236 | /* |
| 237 | * Set CORRESPONDING to the end of the changed run, at the last |
| 238 | * point where it corresponds to a changed run in the other file. |
| 239 | * CORRESPONDING == LEN means no such point has been found. |
| 240 | */ |
| 241 | $corresponding = $j < $other_len ? $i : $len; |
| 242 | |
| 243 | /* |
| 244 | * Move the changed region forward, so long as the |
| 245 | * first changed line matches the following unchanged one. |
| 246 | * This merges with following changed regions. |
| 247 | * Do this second, so that if there are no merges, |
| 248 | * the changed region is moved forward as far as possible. |
| 249 | */ |
| 250 | while ( $i < $len && $lines[$start] == $lines[$i] ) { |
| 251 | $changed[$start++] = false; |
| 252 | $changed[$i++] = 1; |
| 253 | while ( $i < $len && $changed[$i] ) { |
| 254 | $i++; |
| 255 | } |
| 256 | |
| 257 | assert( $j < $other_len && !$other_changed[$j] ); |
| 258 | $j++; |
| 259 | if ( $j < $other_len && $other_changed[$j] ) { |
| 260 | $corresponding = $i; |
| 261 | while ( $j < $other_len && $other_changed[$j] ) { |
| 262 | $j++; |
| 263 | } |
| 264 | } |
| 265 | } |
| 266 | } while ( $runlength != $i - $start ); |
| 267 | |
| 268 | /* |
| 269 | * If possible, move the fully-merged run of changes |
| 270 | * back to a corresponding run in the other file. |
| 271 | */ |
| 272 | while ( $corresponding < $i ) { |
| 273 | $changed[--$start] = 1; |
| 274 | $changed[--$i] = 0; |
| 275 | assert( $j > 0 ); |
| 276 | while ( $other_changed[--$j] ) { |
| 277 | continue; |
| 278 | } |
| 279 | assert( $j >= 0 && !$other_changed[$j] ); |
| 280 | } |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | /** |
| 285 | * @param string[] $from |
| 286 | * @param string[] $to |
| 287 | * @throws ComplexityException |
| 288 | */ |
| 289 | protected function diffInternal( array $from, array $to ) { |
| 290 | // remember initial lengths |
| 291 | $m = count( $from ); |
| 292 | $n = count( $to ); |
| 293 | |
| 294 | $this->heuristicUsed = false; |
| 295 | |
| 296 | // output |
| 297 | $removed = $m > 0 ? array_fill( 0, $m, true ) : []; |
| 298 | $added = $n > 0 ? array_fill( 0, $n, true ) : []; |
| 299 | |
| 300 | // reduce the complexity for the next step (intentionally done twice) |
| 301 | // remove common tokens at the start |
| 302 | $i = 0; |
| 303 | while ( $i < $m && $i < $n && $from[$i] === $to[$i] ) { |
| 304 | $removed[$i] = $added[$i] = false; |
| 305 | unset( $from[$i], $to[$i] ); |
| 306 | ++$i; |
| 307 | } |
| 308 | |
| 309 | // remove common tokens at the end |
| 310 | $j = 1; |
| 311 | while ( $i + $j <= $m && $i + $j <= $n && $from[$m - $j] === $to[$n - $j] ) { |
| 312 | $removed[$m - $j] = $added[$n - $j] = false; |
| 313 | unset( $from[$m - $j], $to[$n - $j] ); |
| 314 | ++$j; |
| 315 | } |
| 316 | |
| 317 | $this->from = $newFromIndex = $this->to = $newToIndex = []; |
| 318 | |
| 319 | // remove tokens not in both sequences |
| 320 | $shared = []; |
| 321 | foreach ( $from as $key ) { |
| 322 | $shared[$key] = false; |
| 323 | } |
| 324 | |
| 325 | foreach ( $to as $index => &$el ) { |
| 326 | if ( array_key_exists( $el, $shared ) ) { |
| 327 | // keep it |
| 328 | $this->to[] = $el; |
| 329 | $shared[$el] = true; |
| 330 | $newToIndex[] = $index; |
| 331 | } |
| 332 | } |
| 333 | foreach ( $from as $index => &$el ) { |
| 334 | if ( $shared[$el] ) { |
| 335 | // keep it |
| 336 | $this->from[] = $el; |
| 337 | $newFromIndex[] = $index; |
| 338 | } |
| 339 | } |
| 340 | |
| 341 | unset( $shared, $from, $to ); |
| 342 | |
| 343 | $this->m = count( $this->from ); |
| 344 | $this->n = count( $this->to ); |
| 345 | |
| 346 | if ( $this->bailoutComplexity > 0 && $this->m * $this->n > $this->bailoutComplexity ) { |
| 347 | throw new ComplexityException(); |
| 348 | } |
| 349 | |
| 350 | $this->removed = $this->m > 0 ? array_fill( 0, $this->m, true ) : []; |
| 351 | $this->added = $this->n > 0 ? array_fill( 0, $this->n, true ) : []; |
| 352 | |
| 353 | if ( $this->m == 0 || $this->n == 0 ) { |
| 354 | $this->length = 0; |
| 355 | } else { |
| 356 | $this->maxDifferences = ceil( ( $this->m + $this->n ) / 2.0 ); |
| 357 | if ( $this->m * $this->n > $this->tooLong ) { |
| 358 | // limit complexity to D^POW_LIMIT for long sequences |
| 359 | $this->maxDifferences = floor( $this->maxDifferences ** ( $this->powLimit - 1.0 ) ); |
| 360 | } |
| 361 | |
| 362 | /* |
| 363 | * The common prefixes and suffixes are always part of some LCS, include |
| 364 | * them now to reduce our search space |
| 365 | */ |
| 366 | $max = min( $this->m, $this->n ); |
| 367 | for ( $forwardBound = 0; $forwardBound < $max |
| 368 | && $this->from[$forwardBound] === $this->to[$forwardBound]; |
| 369 | ++$forwardBound |
| 370 | ) { |
| 371 | $this->removed[$forwardBound] = $this->added[$forwardBound] = false; |
| 372 | } |
| 373 | |
| 374 | $backBoundL1 = $this->m - 1; |
| 375 | $backBoundL2 = $this->n - 1; |
| 376 | |
| 377 | while ( $backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound |
| 378 | && $this->from[$backBoundL1] === $this->to[$backBoundL2] |
| 379 | ) { |
| 380 | $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false; |
| 381 | } |
| 382 | |
| 383 | $temp = array_fill( 0, $this->m + $this->n + 1, 0 ); |
| 384 | $V = [ $temp, $temp ]; |
| 385 | $snake = [ 0, 0, 0 ]; |
| 386 | |
| 387 | $this->length = $forwardBound + $this->m - $backBoundL1 - 1 |
| 388 | + $this->lcs_rec( |
| 389 | $forwardBound, |
| 390 | $backBoundL1, |
| 391 | $forwardBound, |
| 392 | $backBoundL2, |
| 393 | $V, |
| 394 | $snake |
| 395 | ); |
| 396 | } |
| 397 | |
| 398 | $this->m = $m; |
| 399 | $this->n = $n; |
| 400 | |
| 401 | $this->length += $i + $j - 1; |
| 402 | |
| 403 | foreach ( $this->removed as $key => &$removed_elem ) { |
| 404 | if ( !$removed_elem ) { |
| 405 | $removed[$newFromIndex[$key]] = false; |
| 406 | } |
| 407 | } |
| 408 | foreach ( $this->added as $key => &$added_elem ) { |
| 409 | if ( !$added_elem ) { |
| 410 | $added[$newToIndex[$key]] = false; |
| 411 | } |
| 412 | } |
| 413 | $this->removed = $removed; |
| 414 | $this->added = $added; |
| 415 | } |
| 416 | |
| 417 | /** |
| 418 | * @param int $bottoml1 |
| 419 | * @param int $topl1 |
| 420 | * @param int $bottoml2 |
| 421 | * @param int $topl2 |
| 422 | * @param array &$V |
| 423 | * @param array &$snake |
| 424 | * @return int |
| 425 | */ |
| 426 | private function lcs_rec( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake ) { |
| 427 | // check that both sequences are non-empty |
| 428 | if ( $bottoml1 > $topl1 || $bottoml2 > $topl2 ) { |
| 429 | return 0; |
| 430 | } |
| 431 | |
| 432 | $d = $this->find_middle_snake( $bottoml1, $topl1, $bottoml2, |
| 433 | $topl2, $V, $snake ); |
| 434 | |
| 435 | // need to store these so we don't lose them when they're |
| 436 | // overwritten by the recursion |
| 437 | [ $startx, $starty, $len ] = $snake; |
| 438 | |
| 439 | // the middle snake is part of the LCS, store it |
| 440 | for ( $i = 0; $i < $len; ++$i ) { |
| 441 | $this->removed[$startx + $i] = $this->added[$starty + $i] = false; |
| 442 | } |
| 443 | |
| 444 | if ( $d > 1 ) { |
| 445 | return $len |
| 446 | + $this->lcs_rec( $bottoml1, $startx - 1, $bottoml2, |
| 447 | $starty - 1, $V, $snake ) |
| 448 | + $this->lcs_rec( $startx + $len, $topl1, $starty + $len, |
| 449 | $topl2, $V, $snake ); |
| 450 | } elseif ( $d == 1 ) { |
| 451 | /* |
| 452 | * In this case the sequences differ by exactly 1 line. We have |
| 453 | * already saved all the lines after the difference in the for loop |
| 454 | * above, now we need to save all the lines before the difference. |
| 455 | */ |
| 456 | $max = min( $startx - $bottoml1, $starty - $bottoml2 ); |
| 457 | for ( $i = 0; $i < $max; ++$i ) { |
| 458 | $this->removed[$bottoml1 + $i] = |
| 459 | $this->added[$bottoml2 + $i] = false; |
| 460 | } |
| 461 | |
| 462 | return $max + $len; |
| 463 | } |
| 464 | |
| 465 | return $len; |
| 466 | } |
| 467 | |
| 468 | /** |
| 469 | * @param int $bottoml1 |
| 470 | * @param int $topl1 |
| 471 | * @param int $bottoml2 |
| 472 | * @param int $topl2 |
| 473 | * @param array &$V |
| 474 | * @param array &$snake |
| 475 | * @return int |
| 476 | */ |
| 477 | private function find_middle_snake( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake ) { |
| 478 | $from = &$this->from; |
| 479 | $to = &$this->to; |
| 480 | $V0 = &$V[0]; |
| 481 | $V1 = &$V[1]; |
| 482 | $snake0 = &$snake[0]; |
| 483 | $snake1 = &$snake[1]; |
| 484 | $snake2 = &$snake[2]; |
| 485 | $bottoml1_min_1 = $bottoml1 - 1; |
| 486 | $bottoml2_min_1 = $bottoml2 - 1; |
| 487 | $N = $topl1 - $bottoml1_min_1; |
| 488 | $M = $topl2 - $bottoml2_min_1; |
| 489 | $delta = $N - $M; |
| 490 | $maxabsx = $N + $bottoml1; |
| 491 | $maxabsy = $M + $bottoml2; |
| 492 | $limit = min( $this->maxDifferences, ceil( ( $N + $M ) / 2 ) ); |
| 493 | |
| 494 | // value_to_add_forward: a 0 or 1 that we add to the start |
| 495 | // offset to make it odd/even |
| 496 | if ( $M & 1 ) { |
| 497 | $value_to_add_forward = 1; |
| 498 | } else { |
| 499 | $value_to_add_forward = 0; |
| 500 | } |
| 501 | |
| 502 | if ( $N & 1 ) { |
| 503 | $value_to_add_backward = 1; |
| 504 | } else { |
| 505 | $value_to_add_backward = 0; |
| 506 | } |
| 507 | |
| 508 | $start_forward = -$M; |
| 509 | $end_forward = $N; |
| 510 | $start_backward = -$N; |
| 511 | $end_backward = $M; |
| 512 | |
| 513 | $limit_min_1 = $limit - 1; |
| 514 | $limit_plus_1 = $limit + 1; |
| 515 | |
| 516 | $V0[$limit_plus_1] = 0; |
| 517 | $V1[$limit_min_1] = $N; |
| 518 | $limit = min( $this->maxDifferences, ceil( ( $N + $M ) / 2 ) ); |
| 519 | |
| 520 | if ( $delta & 1 ) { |
| 521 | for ( $d = 0; $d <= $limit; ++$d ) { |
| 522 | $start_diag = max( $value_to_add_forward + $start_forward, -$d ); |
| 523 | $end_diag = min( $end_forward, $d ); |
| 524 | $value_to_add_forward = 1 - $value_to_add_forward; |
| 525 | |
| 526 | // compute forward furthest reaching paths |
| 527 | for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) { |
| 528 | if ( $k == -$d || ( $k < $d |
| 529 | && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k] ) |
| 530 | ) { |
| 531 | $x = $V0[$limit_plus_1 + $k]; |
| 532 | } else { |
| 533 | $x = $V0[$limit_min_1 + $k] + 1; |
| 534 | } |
| 535 | |
| 536 | $absx = $snake0 = $x + $bottoml1; |
| 537 | $absy = $snake1 = $x - $k + $bottoml2; |
| 538 | |
| 539 | while ( $absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy] ) { |
| 540 | ++$absx; |
| 541 | ++$absy; |
| 542 | } |
| 543 | $x = $absx - $bottoml1; |
| 544 | |
| 545 | $snake2 = $absx - $snake0; |
| 546 | $V0[$limit + $k] = $x; |
| 547 | if ( $k >= $delta - $d + 1 && $k <= $delta + $d - 1 |
| 548 | && $x >= $V1[$limit + $k - $delta] |
| 549 | ) { |
| 550 | return 2 * $d - 1; |
| 551 | } |
| 552 | |
| 553 | // check to see if we can cut down the diagonal range |
| 554 | if ( $x >= $N && $end_forward > $k - 1 ) { |
| 555 | $end_forward = $k - 1; |
| 556 | } elseif ( $absy - $bottoml2 >= $M ) { |
| 557 | $start_forward = $k + 1; |
| 558 | $value_to_add_forward = 0; |
| 559 | } |
| 560 | } |
| 561 | |
| 562 | $start_diag = max( $value_to_add_backward + $start_backward, -$d ); |
| 563 | $end_diag = min( $end_backward, $d ); |
| 564 | $value_to_add_backward = 1 - $value_to_add_backward; |
| 565 | |
| 566 | // compute backward furthest reaching paths |
| 567 | for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) { |
| 568 | if ( $k == $d |
| 569 | || ( $k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k] ) |
| 570 | ) { |
| 571 | $x = $V1[$limit_min_1 + $k]; |
| 572 | } else { |
| 573 | $x = $V1[$limit_plus_1 + $k] - 1; |
| 574 | } |
| 575 | |
| 576 | $y = $x - $k - $delta; |
| 577 | |
| 578 | $snake2 = 0; |
| 579 | while ( $x > 0 && $y > 0 |
| 580 | && $from[$x + $bottoml1_min_1] === $to[$y + $bottoml2_min_1] |
| 581 | ) { |
| 582 | --$x; |
| 583 | --$y; |
| 584 | ++$snake2; |
| 585 | } |
| 586 | $V1[$limit + $k] = $x; |
| 587 | |
| 588 | // check to see if we can cut down our diagonal range |
| 589 | if ( $x <= 0 ) { |
| 590 | $start_backward = $k + 1; |
| 591 | $value_to_add_backward = 0; |
| 592 | } elseif ( $y <= 0 && $end_backward > $k - 1 ) { |
| 593 | $end_backward = $k - 1; |
| 594 | } |
| 595 | } |
| 596 | } |
| 597 | } else { |
| 598 | for ( $d = 0; $d <= $limit; ++$d ) { |
| 599 | $start_diag = max( $value_to_add_forward + $start_forward, -$d ); |
| 600 | $end_diag = min( $end_forward, $d ); |
| 601 | $value_to_add_forward = 1 - $value_to_add_forward; |
| 602 | |
| 603 | // compute forward furthest reaching paths |
| 604 | for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) { |
| 605 | if ( $k == -$d |
| 606 | || ( $k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k] ) |
| 607 | ) { |
| 608 | $x = $V0[$limit_plus_1 + $k]; |
| 609 | } else { |
| 610 | $x = $V0[$limit_min_1 + $k] + 1; |
| 611 | } |
| 612 | |
| 613 | $absx = $snake0 = $x + $bottoml1; |
| 614 | $absy = $snake1 = $x - $k + $bottoml2; |
| 615 | |
| 616 | while ( $absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy] ) { |
| 617 | ++$absx; |
| 618 | ++$absy; |
| 619 | } |
| 620 | $x = $absx - $bottoml1; |
| 621 | $snake2 = $absx - $snake0; |
| 622 | $V0[$limit + $k] = $x; |
| 623 | |
| 624 | // check to see if we can cut down the diagonal range |
| 625 | if ( $x >= $N && $end_forward > $k - 1 ) { |
| 626 | $end_forward = $k - 1; |
| 627 | } elseif ( $absy - $bottoml2 >= $M ) { |
| 628 | $start_forward = $k + 1; |
| 629 | $value_to_add_forward = 0; |
| 630 | } |
| 631 | } |
| 632 | |
| 633 | $start_diag = max( $value_to_add_backward + $start_backward, -$d ); |
| 634 | $end_diag = min( $end_backward, $d ); |
| 635 | $value_to_add_backward = 1 - $value_to_add_backward; |
| 636 | |
| 637 | // compute backward furthest reaching paths |
| 638 | for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) { |
| 639 | if ( $k == $d |
| 640 | || ( $k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k] ) |
| 641 | ) { |
| 642 | $x = $V1[$limit_min_1 + $k]; |
| 643 | } else { |
| 644 | $x = $V1[$limit_plus_1 + $k] - 1; |
| 645 | } |
| 646 | |
| 647 | $y = $x - $k - $delta; |
| 648 | |
| 649 | $snake2 = 0; |
| 650 | while ( $x > 0 && $y > 0 |
| 651 | && $from[$x + $bottoml1_min_1] === $to[$y + $bottoml2_min_1] |
| 652 | ) { |
| 653 | --$x; |
| 654 | --$y; |
| 655 | ++$snake2; |
| 656 | } |
| 657 | $V1[$limit + $k] = $x; |
| 658 | |
| 659 | if ( $k >= -$delta - $d && $k <= $d - $delta |
| 660 | && $x <= $V0[$limit + $k + $delta] |
| 661 | ) { |
| 662 | $snake0 = $bottoml1 + $x; |
| 663 | $snake1 = $bottoml2 + $y; |
| 664 | |
| 665 | return 2 * $d; |
| 666 | } |
| 667 | |
| 668 | // check to see if we can cut down our diagonal range |
| 669 | if ( $x <= 0 ) { |
| 670 | $start_backward = $k + 1; |
| 671 | $value_to_add_backward = 0; |
| 672 | } elseif ( $y <= 0 && $end_backward > $k - 1 ) { |
| 673 | $end_backward = $k - 1; |
| 674 | } |
| 675 | } |
| 676 | } |
| 677 | } |
| 678 | /* |
| 679 | * computing the true LCS is too expensive, instead find the diagonal |
| 680 | * with the most progress and pretend a middle snake of length 0 occurs |
| 681 | * there. |
| 682 | */ |
| 683 | |
| 684 | $most_progress = self::findMostProgress( $M, $N, $limit, $V ); |
| 685 | |
| 686 | $snake0 = $bottoml1 + $most_progress[0]; |
| 687 | $snake1 = $bottoml2 + $most_progress[1]; |
| 688 | $snake2 = 0; |
| 689 | // Computing the LCS is too expensive. Using a heuristic. |
| 690 | $this->heuristicUsed = true; |
| 691 | |
| 692 | return 5; /* |
| 693 | * HACK: since we didn't really finish the LCS computation |
| 694 | * we don't really know the length of the SES. We don't do |
| 695 | * anything with the result anyway, unless it's <=1. We know |
| 696 | * for a fact SES > 1 so 5 is as good a number as any to |
| 697 | * return here |
| 698 | */ |
| 699 | } |
| 700 | |
| 701 | /** |
| 702 | * @param int $M |
| 703 | * @param int $N |
| 704 | * @param int $limit |
| 705 | * @param array $V |
| 706 | * @return array |
| 707 | */ |
| 708 | private static function findMostProgress( $M, $N, $limit, $V ) { |
| 709 | $delta = $N - $M; |
| 710 | |
| 711 | if ( ( $M & 1 ) == ( $limit & 1 ) ) { |
| 712 | $forward_start_diag = max( -$M, -$limit ); |
| 713 | } else { |
| 714 | $forward_start_diag = max( 1 - $M, -$limit ); |
| 715 | } |
| 716 | |
| 717 | $forward_end_diag = min( $N, $limit ); |
| 718 | |
| 719 | if ( ( $N & 1 ) == ( $limit & 1 ) ) { |
| 720 | $backward_start_diag = max( -$N, -$limit ); |
| 721 | } else { |
| 722 | $backward_start_diag = max( 1 - $N, -$limit ); |
| 723 | } |
| 724 | |
| 725 | $backward_end_diag = -min( $M, $limit ); |
| 726 | |
| 727 | $temp = [ 0, 0, 0 ]; |
| 728 | |
| 729 | $max_progress = array_fill( 0, ceil( max( $forward_end_diag - $forward_start_diag, |
| 730 | $backward_end_diag - $backward_start_diag ) / 2 ), $temp ); |
| 731 | $num_progress = 0; // the 1st entry is current, it is initialized |
| 732 | // with 0s |
| 733 | |
| 734 | // first search the forward diagonals |
| 735 | for ( $k = $forward_start_diag; $k <= $forward_end_diag; $k += 2 ) { |
| 736 | $x = $V[0][$limit + $k]; |
| 737 | $y = $x - $k; |
| 738 | if ( $x > $N || $y > $M ) { |
| 739 | continue; |
| 740 | } |
| 741 | |
| 742 | $progress = $x + $y; |
| 743 | if ( $progress > $max_progress[0][2] ) { |
| 744 | $num_progress = 0; |
| 745 | $max_progress[0][0] = $x; |
| 746 | $max_progress[0][1] = $y; |
| 747 | $max_progress[0][2] = $progress; |
| 748 | } elseif ( $progress == $max_progress[0][2] ) { |
| 749 | ++$num_progress; |
| 750 | $max_progress[$num_progress][0] = $x; |
| 751 | $max_progress[$num_progress][1] = $y; |
| 752 | $max_progress[$num_progress][2] = $progress; |
| 753 | } |
| 754 | } |
| 755 | |
| 756 | $max_progress_forward = true; // initially the maximum |
| 757 | // progress is in the forward |
| 758 | // direction |
| 759 | |
| 760 | // now search the backward diagonals |
| 761 | for ( $k = $backward_start_diag; $k <= $backward_end_diag; $k += 2 ) { |
| 762 | $x = $V[1][$limit + $k]; |
| 763 | $y = $x - $k - $delta; |
| 764 | if ( $x < 0 || $y < 0 ) { |
| 765 | continue; |
| 766 | } |
| 767 | |
| 768 | $progress = $N - $x + $M - $y; |
| 769 | if ( $progress > $max_progress[0][2] ) { |
| 770 | $num_progress = 0; |
| 771 | $max_progress_forward = false; |
| 772 | $max_progress[0][0] = $x; |
| 773 | $max_progress[0][1] = $y; |
| 774 | $max_progress[0][2] = $progress; |
| 775 | } elseif ( $progress == $max_progress[0][2] && !$max_progress_forward ) { |
| 776 | ++$num_progress; |
| 777 | $max_progress[$num_progress][0] = $x; |
| 778 | $max_progress[$num_progress][1] = $y; |
| 779 | $max_progress[$num_progress][2] = $progress; |
| 780 | } |
| 781 | } |
| 782 | |
| 783 | // return the middle diagonal with maximal progress. |
| 784 | return $max_progress[(int)floor( $num_progress / 2 )]; |
| 785 | } |
| 786 | |
| 787 | /** |
| 788 | * @return int |
| 789 | */ |
| 790 | public function getLcsLength() { |
| 791 | if ( $this->heuristicUsed && !$this->lcsLengthCorrectedForHeuristic ) { |
| 792 | $this->lcsLengthCorrectedForHeuristic = true; |
| 793 | $this->length = $this->m - array_sum( $this->added ); |
| 794 | } |
| 795 | |
| 796 | return $this->length; |
| 797 | } |
| 798 | |
| 799 | } |
| 800 | |
| 801 | /** @deprecated class alias since 1.41 */ |
| 802 | class_alias( DiffEngine::class, 'DiffEngine' ); |