MediaWiki REL1_27
WikiDiff3.php
Go to the documentation of this file.
1<?php
39class WikiDiff3 {
40
41 // Input variables
42 private $from;
43 private $to;
44 private $m;
45 private $n;
46
47 private $tooLong;
48 private $powLimit;
49
50 // State variables
53
54 // Output variables
55 public $length;
56 public $removed;
57 public $added;
59
60 function __construct( $tooLong = 2000000, $powLimit = 1.45 ) {
61 $this->tooLong = $tooLong;
62 $this->powLimit = $powLimit;
63 }
64
65 public function diff( /*array*/ $from, /*array*/ $to ) {
66 // remember initial lengths
67 $m = count( $from );
68 $n = count( $to );
69
70 $this->heuristicUsed = false;
71
72 // output
73 $removed = $m > 0 ? array_fill( 0, $m, true ) : [];
74 $added = $n > 0 ? array_fill( 0, $n, true ) : [];
75
76 // reduce the complexity for the next step (intentionally done twice)
77 // remove common tokens at the start
78 $i = 0;
79 while ( $i < $m && $i < $n && $from[$i] === $to[$i] ) {
80 $removed[$i] = $added[$i] = false;
81 unset( $from[$i], $to[$i] );
82 ++$i;
83 }
84
85 // remove common tokens at the end
86 $j = 1;
87 while ( $i + $j <= $m && $i + $j <= $n && $from[$m - $j] === $to[$n - $j] ) {
88 $removed[$m - $j] = $added[$n - $j] = false;
89 unset( $from[$m - $j], $to[$n - $j] );
90 ++$j;
91 }
92
93 $this->from = $newFromIndex = $this->to = $newToIndex = [];
94
95 // remove tokens not in both sequences
96 $shared = [];
97 foreach ( $from as $key ) {
98 $shared[$key] = false;
99 }
100
101 foreach ( $to as $index => &$el ) {
102 if ( array_key_exists( $el, $shared ) ) {
103 // keep it
104 $this->to[] = $el;
105 $shared[$el] = true;
106 $newToIndex[] = $index;
107 }
108 }
109 foreach ( $from as $index => &$el ) {
110 if ( $shared[$el] ) {
111 // keep it
112 $this->from[] = $el;
113 $newFromIndex[] = $index;
114 }
115 }
116
117 unset( $shared, $from, $to );
118
119 $this->m = count( $this->from );
120 $this->n = count( $this->to );
121
122 $this->removed = $this->m > 0 ? array_fill( 0, $this->m, true ) : [];
123 $this->added = $this->n > 0 ? array_fill( 0, $this->n, true ) : [];
124
125 if ( $this->m == 0 || $this->n == 0 ) {
126 $this->length = 0;
127 } else {
128 $this->maxDifferences = ceil( ( $this->m + $this->n ) / 2.0 );
129 if ( $this->m * $this->n > $this->tooLong ) {
130 // limit complexity to D^POW_LIMIT for long sequences
131 $this->maxDifferences = floor( pow( $this->maxDifferences, $this->powLimit - 1.0 ) );
132 wfDebug( "Limiting max number of differences to $this->maxDifferences\n" );
133 }
134
135 /*
136 * The common prefixes and suffixes are always part of some LCS, include
137 * them now to reduce our search space
138 */
139 $max = min( $this->m, $this->n );
140 for ( $forwardBound = 0; $forwardBound < $max
141 && $this->from[$forwardBound] === $this->to[$forwardBound];
143 ) {
144 $this->removed[$forwardBound] = $this->added[$forwardBound] = false;
145 }
146
147 $backBoundL1 = $this->m - 1;
148 $backBoundL2 = $this->n - 1;
149
151 && $this->from[$backBoundL1] === $this->to[$backBoundL2]
152 ) {
153 $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false;
154 }
155
156 $temp = array_fill( 0, $this->m + $this->n + 1, 0 );
157 $V = [ $temp, $temp ];
158 $snake = [ 0, 0, 0 ];
159
160 $this->length = $forwardBound + $this->m - $backBoundL1 - 1
161 + $this->lcs_rec(
165 $backBoundL2,
166 $V,
167 $snake
168 );
169 }
170
171 $this->m = $m;
172 $this->n = $n;
173
174 $this->length += $i + $j - 1;
175
176 foreach ( $this->removed as $key => &$removed_elem ) {
177 if ( !$removed_elem ) {
178 $removed[$newFromIndex[$key]] = false;
179 }
180 }
181 foreach ( $this->added as $key => &$added_elem ) {
182 if ( !$added_elem ) {
183 $added[$newToIndex[$key]] = false;
184 }
185 }
186 $this->removed = $removed;
187 $this->added = $added;
188 }
189
191 // Diff and store locally
192 $this->diff( $from_lines, $to_lines );
194
195 $ranges = [];
196 $xi = $yi = 0;
197 while ( $xi < $this->m || $yi < $this->n ) {
198 // Matching "snake".
199 while ( $xi < $this->m && $yi < $this->n
200 && !$this->removed[$xi]
201 && !$this->added[$yi]
202 ) {
203 ++$xi;
204 ++$yi;
205 }
206 // Find deletes & adds.
207 $xstart = $xi;
208 while ( $xi < $this->m && $this->removed[$xi] ) {
209 ++$xi;
210 }
211
212 $ystart = $yi;
213 while ( $yi < $this->n && $this->added[$yi] ) {
214 ++$yi;
215 }
216
217 if ( $xi > $xstart || $yi > $ystart ) {
219 }
220 }
221
222 return $ranges;
223 }
224
225 private function lcs_rec( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake ) {
226 // check that both sequences are non-empty
227 if ( $bottoml1 > $topl1 || $bottoml2 > $topl2 ) {
228 return 0;
229 }
230
232 $topl2, $V, $snake );
233
234 // need to store these so we don't lose them when they're
235 // overwritten by the recursion
236 $len = $snake[2];
237 $startx = $snake[0];
238 $starty = $snake[1];
239
240 // the middle snake is part of the LCS, store it
241 for ( $i = 0; $i < $len; ++$i ) {
242 $this->removed[$startx + $i] = $this->added[$starty + $i] = false;
243 }
244
245 if ( $d > 1 ) {
246 return $len
247 + $this->lcs_rec( $bottoml1, $startx - 1, $bottoml2,
248 $starty - 1, $V, $snake )
249 + $this->lcs_rec( $startx + $len, $topl1, $starty + $len,
250 $topl2, $V, $snake );
251 } elseif ( $d == 1 ) {
252 /*
253 * In this case the sequences differ by exactly 1 line. We have
254 * already saved all the lines after the difference in the for loop
255 * above, now we need to save all the lines before the difference.
256 */
257 $max = min( $startx - $bottoml1, $starty - $bottoml2 );
258 for ( $i = 0; $i < $max; ++$i ) {
259 $this->removed[$bottoml1 + $i] =
260 $this->added[$bottoml2 + $i] = false;
261 }
262
263 return $max + $len;
264 }
265
266 return $len;
267 }
268
271 $to = &$this->to;
272 $V0 = &$V[0];
273 $V1 = &$V[1];
274 $snake0 = &$snake[0];
275 $snake1 = &$snake[1];
276 $snake2 = &$snake[2];
281 $delta = $N - $M;
284 $limit = min( $this->maxDifferences, ceil( ( $N + $M ) / 2 ) );
285
286 // value_to_add_forward: a 0 or 1 that we add to the start
287 // offset to make it odd/even
288 if ( ( $M & 1 ) == 1 ) {
290 } else {
292 }
293
294 if ( ( $N & 1 ) == 1 ) {
296 } else {
298 }
299
304
305 $limit_min_1 = $limit - 1;
306 $limit_plus_1 = $limit + 1;
307
308 $V0[$limit_plus_1] = 0;
310 $limit = min( $this->maxDifferences, ceil( ( $N + $M ) / 2 ) );
311
312 if ( ( $delta & 1 ) == 1 ) {
313 for ( $d = 0; $d <= $limit; ++$d ) {
315 $end_diag = min( $end_forward, $d );
317
318 // compute forward furthest reaching paths
319 for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
320 if ( $k == -$d || ( $k < $d
322 ) {
323 $x = $V0[$limit_plus_1 + $k];
324 } else {
325 $x = $V0[$limit_min_1 + $k] + 1;
326 }
327
329 $absy = $snake1 = $x - $k + $bottoml2;
330
331 while ( $absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy] ) {
332 ++$absx;
333 ++$absy;
334 }
335 $x = $absx - $bottoml1;
336
338 $V0[$limit + $k] = $x;
339 if ( $k >= $delta - $d + 1 && $k <= $delta + $d - 1
340 && $x >= $V1[$limit + $k - $delta]
341 ) {
342 return 2 * $d - 1;
343 }
344
345 // check to see if we can cut down the diagonal range
346 if ( $x >= $N && $end_forward > $k - 1 ) {
347 $end_forward = $k - 1;
348 } elseif ( $absy - $bottoml2 >= $M ) {
349 $start_forward = $k + 1;
351 }
352 }
353
355 $end_diag = min( $end_backward, $d );
357
358 // compute backward furthest reaching paths
359 for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
360 if ( $k == $d
361 || ( $k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k] )
362 ) {
363 $x = $V1[$limit_min_1 + $k];
364 } else {
365 $x = $V1[$limit_plus_1 + $k] - 1;
366 }
367
368 $y = $x - $k - $delta;
369
370 $snake2 = 0;
371 while ( $x > 0 && $y > 0
373 ) {
374 --$x;
375 --$y;
376 ++$snake2;
377 }
378 $V1[$limit + $k] = $x;
379
380 // check to see if we can cut down our diagonal range
381 if ( $x <= 0 ) {
382 $start_backward = $k + 1;
385 $end_backward = $k - 1;
386 }
387 }
388 }
389 } else {
390 for ( $d = 0; $d <= $limit; ++$d ) {
392 $end_diag = min( $end_forward, $d );
394
395 // compute forward furthest reaching paths
396 for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
397 if ( $k == -$d
398 || ( $k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k] )
399 ) {
400 $x = $V0[$limit_plus_1 + $k];
401 } else {
402 $x = $V0[$limit_min_1 + $k] + 1;
403 }
404
406 $absy = $snake1 = $x - $k + $bottoml2;
407
408 while ( $absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy] ) {
409 ++$absx;
410 ++$absy;
411 }
412 $x = $absx - $bottoml1;
414 $V0[$limit + $k] = $x;
415
416 // check to see if we can cut down the diagonal range
417 if ( $x >= $N && $end_forward > $k - 1 ) {
418 $end_forward = $k - 1;
419 } elseif ( $absy - $bottoml2 >= $M ) {
420 $start_forward = $k + 1;
422 }
423 }
424
426 $end_diag = min( $end_backward, $d );
428
429 // compute backward furthest reaching paths
430 for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) {
431 if ( $k == $d
432 || ( $k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k] )
433 ) {
434 $x = $V1[$limit_min_1 + $k];
435 } else {
436 $x = $V1[$limit_plus_1 + $k] - 1;
437 }
438
439 $y = $x - $k - $delta;
440
441 $snake2 = 0;
442 while ( $x > 0 && $y > 0
444 ) {
445 --$x;
446 --$y;
447 ++$snake2;
448 }
449 $V1[$limit + $k] = $x;
450
451 if ( $k >= -$delta - $d && $k <= $d - $delta
452 && $x <= $V0[$limit + $k + $delta]
453 ) {
454 $snake0 = $bottoml1 + $x;
455 $snake1 = $bottoml2 + $y;
456
457 return 2 * $d;
458 }
459
460 // check to see if we can cut down our diagonal range
461 if ( $x <= 0 ) {
462 $start_backward = $k + 1;
465 $end_backward = $k - 1;
466 }
467 }
468 }
469 }
470 /*
471 * computing the true LCS is too expensive, instead find the diagonal
472 * with the most progress and pretend a midle snake of length 0 occurs
473 * there.
474 */
475
477
480 $snake2 = 0;
481 wfDebug( "Computing the LCS is too expensive. Using a heuristic.\n" );
482 $this->heuristicUsed = true;
483
484 return 5; /*
485 * HACK: since we didn't really finish the LCS computation
486 * we don't really know the length of the SES. We don't do
487 * anything with the result anyway, unless it's <=1. We know
488 * for a fact SES > 1 so 5 is as good a number as any to
489 * return here
490 */
491 }
492
493 private static function findMostProgress( $M, $N, $limit, $V ) {
494 $delta = $N - $M;
495
496 if ( ( $M & 1 ) == ( $limit & 1 ) ) {
498 } else {
499 $forward_start_diag = max( 1 - $M, -$limit );
500 }
501
502 $forward_end_diag = min( $N, $limit );
503
504 if ( ( $N & 1 ) == ( $limit & 1 ) ) {
506 } else {
508 }
509
510 $backward_end_diag = -min( $M, $limit );
511
512 $temp = [ 0, 0, 0 ];
513
516 $num_progress = 0; // the 1st entry is current, it is initialized
517 // with 0s
518
519 // first search the forward diagonals
520 for ( $k = $forward_start_diag; $k <= $forward_end_diag; $k += 2 ) {
521 $x = $V[0][$limit + $k];
522 $y = $x - $k;
523 if ( $x > $N || $y > $M ) {
524 continue;
525 }
526
527 $progress = $x + $y;
528 if ( $progress > $max_progress[0][2] ) {
529 $num_progress = 0;
530 $max_progress[0][0] = $x;
531 $max_progress[0][1] = $y;
532 $max_progress[0][2] = $progress;
533 } elseif ( $progress == $max_progress[0][2] ) {
538 }
539 }
540
541 $max_progress_forward = true; // initially the maximum
542 // progress is in the forward
543 // direction
544
545 // now search the backward diagonals
546 for ( $k = $backward_start_diag; $k <= $backward_end_diag; $k += 2 ) {
547 $x = $V[1][$limit + $k];
548 $y = $x - $k - $delta;
549 if ( $x < 0 || $y < 0 ) {
550 continue;
551 }
552
553 $progress = $N - $x + $M - $y;
554 if ( $progress > $max_progress[0][2] ) {
555 $num_progress = 0;
556 $max_progress_forward = false;
557 $max_progress[0][0] = $x;
558 $max_progress[0][1] = $y;
559 $max_progress[0][2] = $progress;
565 }
566 }
567
568 // return the middle diagonal with maximal progress.
569 return $max_progress[(int)floor( $num_progress / 2 )];
570 }
571
575 public function getLcsLength() {
576 if ( $this->heuristicUsed && !$this->lcsLengthCorrectedForHeuristic ) {
577 $this->lcsLengthCorrectedForHeuristic = true;
578 $this->length = $this->m - array_sum( $this->added );
579 }
580
581 return $this->length;
582 }
583
584}
585
593
596
598 public $leftend;
599
602
605
607 public $rightend;
608
611
613 $this->leftstart = $leftstart;
614 $this->leftend = $leftend;
615 $this->leftlength = $leftend - $leftstart;
616 $this->rightstart = $rightstart;
617 $this->rightend = $rightend;
618 $this->rightlength = $rightend - $rightstart;
619 }
620
621}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for and distribution as defined by Sections through of this document Licensor shall mean the copyright owner or entity authorized by the copyright owner that is granting the License Legal Entity shall mean the union of the acting entity and all other entities that control are controlled by or are under common control with that entity For the purposes of this definition control direct or to cause the direction or management of such whether by contract or including but not limited to software source documentation and configuration files Object form shall mean any form resulting from mechanical transformation or translation of a Source including but not limited to compiled object generated and conversions to other media types Work shall mean the work of whether in Source or Object made available under the as indicated by a copyright notice that is included in or attached to the whether in Source or Object that is based or other modifications as a an original work of authorship For the purposes of this Derivative Works shall not include works that remain separable from
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
$i
Definition Parser.php:1694
Alternative representation of a set of changes, by the index ranges that are changed.
__construct( $leftstart, $leftend, $rightstart, $rightend)
This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which in turn...
Definition WikiDiff3.php:39
diff_range( $from_lines, $to_lines)
lcs_rec( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake)
diff($from, $to)
Definition WikiDiff3.php:65
__construct( $tooLong=2000000, $powLimit=1.45)
Definition WikiDiff3.php:60
$lcsLengthCorrectedForHeuristic
Definition WikiDiff3.php:52
find_middle_snake( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake)
static findMostProgress( $M, $N, $limit, $V)
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object to manipulate or replace but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok inclusive $limit
Definition hooks.txt:1081
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37