LCOV - code coverage report
Current view: top level - src/lib - DiffEngine.h (source / functions) Hit Total Coverage
Test: mediawiki/php/wikidiff2 test coverage report Lines: 235 248 94.8 %
Date: 2023-07-04 10:20:16 Functions: 25 30 83.3 %

          Line data    Source code
       1             : /**
       2             :  * GPL blah blah, see below for history
       3             :  */
       4             : 
       5             : #ifndef DIFFENGINE_H
       6             : #define DIFFENGINE_H
       7             : 
       8             : #include <vector>
       9             : #include <map>
      10             : #include <set>
      11             : #include <utility>
      12             : #include <algorithm>
      13             : #include <cassert>
      14             : #include <string>
      15             : #include <numeric>
      16             : 
      17             : #include "IntSet.h"
      18             : #include "Word.h"
      19             : 
      20             : namespace wikidiff2 {
      21             : 
      22             : #ifdef DEBUG_MOVED_LINES
      23             : inline void debugLog(const char *fmt, ...) {
      24             :     static FILE *f = nullptr;
      25             :     char ch[2048];
      26             :     va_list ap;
      27             :     va_start(ap, fmt);
      28             :     vsnprintf(ch, sizeof(ch), fmt, ap);
      29             :     va_end(ap);
      30             :     if(!f) f = fopen("/tmp/wd2-debug-log.log", "a");
      31             :     if(!f) throw std::runtime_error("failed to open debug log...");
      32             :     fwrite(ch, 1, strlen(ch), f);
      33             :     fflush(f);
      34             : };
      35             : #else
      36             : #define debugLog(...)
      37             : #endif
      38             : 
      39             : /**
      40             :  * Options to be passed to the various constructors
      41             :  */
      42             : struct DiffConfig {
      43             :     /**
      44             :      * Complexity is the product of input sizes, after identical leading and
      45             :      * trailing parts are removed. This is supposed to be proportional to
      46             :      * worst-case runtime. If the complexity is larger than this limit, trivial
      47             :      * output will be produced, and the bailout flag will be set in the Diff.
      48             :      */
      49             :     long long bailoutComplexity;
      50             : };
      51             : 
      52             : /**
      53             :  * Diff operation
      54             :  *
      55             :  * from and to are vectors containing pointers to the objects passed in from_lines and to_lines
      56             :  *
      57             :  * op is one of the following
      58             :  *    copy:    A sequence of lines (in from and to) which are the same in both files.
      59             :  *    del:     A sequence of lines (in from) which were in the first file but not the second.
      60             :  *    add:     A sequence of lines (in to) which were in the second file but not the first.
      61             :  *    change:  A sequence of lines which are different between the two files. Lines from the
      62             :  *             first file are in from, lines from the second are in to. The two vectors need
      63             :  *             not be the same length.
      64             :  */
      65             : template<typename T>
      66             : class DiffOp
      67             : {
      68             :     public:
      69             :         typedef std::vector<const T*, WD2_ALLOCATOR<const T*> > PointerVector;
      70        3997 :         DiffOp(int op_, const PointerVector & from_, const PointerVector & to_)
      71        3997 :             : op(op_), from(from_), to(to_) {}
      72             : 
      73             :         enum {copy, del, add, change};
      74             :         int op;
      75             :         PointerVector from;
      76             :         PointerVector to;
      77             : };
      78             : 
      79             : /**
      80             :  * Basic diff template class. After construction, edits will contain a vector of DiffOpTemplate
      81             :  * objects representing the diff
      82             :  */
      83             : template<typename T>
      84             : class Diff
      85             : {
      86             :     public:
      87             :         typedef std::vector<T, WD2_ALLOCATOR<T> > ValueVector;
      88             :         typedef std::vector<DiffOp<T>, WD2_ALLOCATOR<DiffOp<T>> > DiffOpVector;
      89             : 
      90          47 :         Diff() {}
      91             : 
      92             :         Diff(const DiffConfig & config, const ValueVector & from_lines, const ValueVector & to_lines);
      93             : 
      94        4236 :         void add_edit(const DiffOp<T> & edit) {
      95        4236 :             edits.push_back(edit);
      96        4236 :         }
      97             : 
      98       11921 :         size_t size() const {
      99       11921 :             return edits.size();
     100             :         }
     101             : 
     102         303 :         DiffOp<T> & operator[](int i) {
     103         303 :             return edits[i];
     104             :         }
     105             : 
     106       21891 :         const DiffOp<T> & operator[](int i) const {
     107       21891 :             return edits[i];
     108             :         }
     109             : 
     110          47 :         void swap(Diff<T> & other) {
     111          47 :             edits.swap(other.edits);
     112          47 :         }
     113             : 
     114             :         DiffOpVector edits;
     115             :         bool bailout = false;
     116             : };
     117             : 
     118             : /**
     119             :  * Class used internally by Diff to actually compute the diffs.
     120             :  *
     121             :  * The algorithm used here is mostly lifted from the perl module
     122             :  * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
     123             :  *   http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
     124             :  *
     125             :  * More ideas are taken from:
     126             :  *   http://www.ics.uci.edu/~eppstein/161/960229.html
     127             :  *
     128             :  * Some ideas are (and a bit of code) are from from analyze.c, from GNU
     129             :  * diffutils-2.7, which can be found at:
     130             :  *   ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
     131             :  *
     132             :  * This implementation is largely due to Geoffrey T. Dairiki, who wrote this
     133             :  * diff engine for phpwiki 1-3.3. It was then adopted by MediaWiki.
     134             :  *
     135             :  * Finally, it was ported to C++ by Tim Starling in February 2006
     136             :  *
     137             :  * @access private
     138             :  */
     139             : 
     140             : template<typename T>
     141             : class DiffEngine
     142             : {
     143             :     public:
     144             :         // Vectors
     145             :         typedef std::vector<bool> BoolVector; // skip the allocator here to get the specialisation
     146             :         typedef std::vector<const T*, WD2_ALLOCATOR<const T*> > PointerVector;
     147             :         typedef std::vector<T, WD2_ALLOCATOR<T> > ValueVector;
     148             :         typedef std::vector<int, WD2_ALLOCATOR<int> > IntVector;
     149             :         typedef std::vector<std::pair<int, int>, WD2_ALLOCATOR<std::pair<int, int> > > IntPairVector;
     150             : 
     151             :         // Maps
     152             :         typedef std::map<T, IntVector, std::less<T>, WD2_ALLOCATOR<std::pair<const T, IntVector>>> MatchesMap;
     153             : 
     154             :         // Sets
     155             :         typedef std::set<T, std::less<T>, WD2_ALLOCATOR<T> > ValueSet;
     156             : 
     157         698 :         DiffEngine(const DiffConfig & config_)
     158         698 :             : config(config_), done(false)
     159         698 :         {}
     160             : 
     161             :         void clear();
     162             :         void diff(const ValueVector & from_lines,
     163             :                 const ValueVector & to_lines, Diff<T> & diff);
     164             :         int lcs_pos (int ypos);
     165             :         void compareseq (int xoff, int xlim, int yoff, int ylim);
     166             :         void shift_boundaries (const ValueVector & lines, BoolVector & changed,
     167             :                 const BoolVector & other_changed);
     168             :     protected:
     169             :         int diag (int xoff, int xlim, int yoff, int ylim, int nchunks,
     170             :                 IntPairVector & seps);
     171             : 
     172             :         DiffConfig config;
     173             :         BoolVector xchanged, ychanged;
     174             :         PointerVector xv, yv;
     175             :         IntVector xind, yind;
     176             :         IntVector seq;
     177             :         IntSet in_seq;
     178             :         int lcs;
     179             :         bool done;
     180             :         enum {MAX_CHUNKS=8};
     181             : };
     182             : 
     183             : //-----------------------------------------------------------------------------
     184             : // DiffEngine implementation
     185             : //-----------------------------------------------------------------------------
     186             : template<typename T>
     187           0 : void DiffEngine<T>::clear()
     188             : {
     189           0 :     xchanged.clear();
     190           0 :     ychanged.clear();
     191           0 :     xv.clear();
     192           0 :     yv.clear();
     193           0 :     xind.clear();
     194           0 :     yind.clear();
     195           0 :     seq.clear();
     196           0 :     in_seq.clear();
     197           0 :     done = false;
     198           0 : }
     199             : 
     200             : template<typename T>
     201         698 : void DiffEngine<T>::diff (const ValueVector & from_lines,
     202             :         const ValueVector & to_lines, Diff<T> & diff)
     203             : {
     204         698 :     int n_from = (int)from_lines.size();
     205         698 :     int n_to = (int)to_lines.size();
     206             : 
     207             :     // If this diff engine has been used before for a diff, clear the member variables
     208         698 :     if (done) {
     209           0 :         clear();
     210             :     }
     211         698 :     xchanged.resize(n_from);
     212         698 :     ychanged.resize(n_to);
     213         698 :     seq.resize(std::max(n_from, n_to) + 1);
     214             : 
     215             :     // Skip leading common lines.
     216             :     int skip, endskip;
     217        8880 :     for (skip = 0; skip < n_from && skip < n_to; skip++) {
     218        8764 :         if (from_lines[skip] != to_lines[skip])
     219         582 :             break;
     220        8182 :         xchanged[skip] = ychanged[skip] = false;
     221             :     }
     222             :     // Skip trailing common lines.
     223         698 :     int xi = n_from, yi = n_to;
     224        7628 :     for (endskip = 0; --xi > skip && --yi > skip; endskip++) {
     225        7345 :         if (from_lines[xi] != to_lines[yi])
     226         415 :             break;
     227        6930 :         xchanged[xi] = ychanged[yi] = false;
     228             :     }
     229             : 
     230        1396 :     long long complexity = (long long)(n_from - skip - endskip)
     231         698 :         * (n_to - skip - endskip);
     232             : 
     233             :     // If too complex, just output "whole left side replaced with right"
     234         698 :     if (config.bailoutComplexity > 0 && complexity > config.bailoutComplexity) {
     235           6 :         PointerVector del;
     236           3 :         PointerVector add;
     237             : 
     238       26012 :         for (xi = 0; xi < n_from; xi++) {
     239       26009 :             del.push_back(&from_lines[xi]);
     240             :         }
     241       24012 :         for (yi = 0; yi < n_to; yi++) {
     242       24009 :             add.push_back(&to_lines[yi]);
     243             :         }
     244           3 :         diff.add_edit(DiffOp<T>(DiffOp<T>::change, del, add));
     245           3 :         diff.bailout = true;
     246             : 
     247           3 :         done = true;
     248           3 :         return;
     249             :     }
     250             : 
     251             :     // Ignore lines which do not exist in both files.
     252        1390 :     ValueSet xhash, yhash;
     253       11719 :     for (xi = skip; xi < n_from - endskip; xi++) {
     254       11024 :         xhash.insert(from_lines[xi]);
     255             :     }
     256             : 
     257       16678 :     for (yi = skip; yi < n_to - endskip; yi++) {
     258       15983 :         const T & line = to_lines[yi];
     259       15983 :         if ( (ychanged[yi] = (xhash.find(line) == xhash.end())) )
     260       10487 :             continue;
     261        5496 :         yhash.insert(line);
     262        5496 :         yv.push_back(&line);
     263        5496 :         yind.push_back(yi);
     264             :     }
     265       11719 :     for (xi = skip; xi < n_from - endskip; xi++) {
     266       11024 :         const T & line = from_lines[xi];
     267       11024 :         if ( (xchanged[xi] = (yhash.find(line) == yhash.end())) )
     268        6536 :             continue;
     269        4488 :         xv.push_back(&line);
     270        4488 :         xind.push_back(xi);
     271             :     }
     272             : 
     273             :     // Find the LCS.
     274         695 :     compareseq(0, xv.size(), 0, yv.size());
     275             : 
     276             :     // Merge edits when possible
     277         695 :     shift_boundaries(from_lines, xchanged, ychanged);
     278         695 :     shift_boundaries(to_lines, ychanged, xchanged);
     279             : 
     280             :     // Compute the edit operations.
     281         695 :     xi = yi = 0;
     282        2971 :     while (xi < n_from || yi < n_to) {
     283             :         assert(yi < n_to || xchanged[xi]);
     284             :         assert(xi < n_from || ychanged[yi]);
     285             : 
     286             :         // Skip matching "snake".
     287        4552 :         PointerVector del;
     288        4552 :         PointerVector add;
     289        4552 :         PointerVector empty;
     290       20418 :         while (xi < n_from && yi < n_to && !xchanged[xi] && !ychanged[yi]) {
     291       18142 :             del.push_back(&from_lines[xi]);
     292       18142 :             add.push_back(&to_lines[yi]);
     293       18142 :             ++xi;
     294       18142 :             ++yi;
     295             :         }
     296        2276 :         if (del.size()) {
     297        1829 :             diff.add_edit(DiffOp<T>(DiffOp<T>::copy, del, add));
     298        1829 :             del.clear();
     299        1829 :             add.clear();
     300             :         }
     301             : 
     302             :         // Find deletes & adds.
     303       10268 :         while (xi < n_from && xchanged[xi])
     304        7992 :             del.push_back(&from_lines[xi++]);
     305             : 
     306       15227 :         while (yi < n_to && ychanged[yi])
     307       12951 :             add.push_back(&to_lines[yi++]);
     308             : 
     309        2276 :         if (del.size() && add.size()) {
     310        1102 :             diff.add_edit(DiffOp<T>(DiffOp<T>::change, del, add));
     311        1174 :         } else if (del.size())
     312         425 :             diff.add_edit(DiffOp<T>(DiffOp<T>::del, del, empty));
     313         749 :         else if (add.size())
     314         467 :             diff.add_edit(DiffOp<T>(DiffOp<T>::add, empty, add));
     315             :     }
     316             : 
     317         695 :     done = true;
     318             : }
     319             : 
     320             : /* Divide the Largest Common Subsequence (LCS) of the sequences
     321             :  * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
     322             :  * sized segments.
     323             :  *
     324             :  * Returns (LCS, SEPS). LCS is the length of the LCS. SEPS is an
     325             :  * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
     326             :  * sub sequences.  The first sub-sequence is contained in [X0, X1),
     327             :  * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
     328             :  * that (X0, Y0) == (XOFF, YOFF) and
     329             :  * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
     330             :  *
     331             :  * This function assumes that the first lines of the specified portions
     332             :  * of the two files do not match, and likewise that the last lines do not
     333             :  * match.  The caller must trim matching lines from the beginning and end
     334             :  * of the portions it is going to specify.
     335             :  */
     336             : template <typename T>
     337         421 : int DiffEngine<T>::diag (int xoff, int xlim, int yoff, int ylim, int nchunks,
     338             :         IntPairVector & seps)
     339             : {
     340             :     using std::swap;
     341             :     using std::make_pair;
     342             :     using std::copy;
     343         421 :     bool flip = false;
     344         842 :     MatchesMap ymatches;
     345             : 
     346         421 :     if (xlim - xoff > ylim - yoff) {
     347             :         // Things seems faster (I'm not sure I understand why)
     348             :         // when the shortest sequence in X.
     349         143 :         flip = true;
     350         143 :         swap(xoff, yoff);
     351         143 :         swap(xlim, ylim);
     352             :     }
     353             : 
     354         421 :     if (flip)
     355        3663 :         for (int i = ylim - 1; i >= yoff; i--)
     356        3520 :             ymatches[*xv[i]].push_back(i);
     357             :     else
     358        4430 :         for (int i = ylim - 1; i >= yoff; i--)
     359        4152 :             ymatches[*yv[i]].push_back(i);
     360             : 
     361         421 :     int nlines = ylim - yoff;
     362         421 :     lcs = 0;
     363         421 :     seq[0] = yoff - 1;
     364         421 :     in_seq.clear();
     365             : 
     366             :     // 2-d array, line major, chunk minor
     367         421 :     IntVector ymids(nlines * nchunks);
     368             : 
     369         421 :     int numer = xlim - xoff + nchunks - 1;
     370         421 :     int x = xoff, x1, y1;
     371        2305 :     for (int chunk = 0; chunk < nchunks; chunk++) {
     372        1884 :         if (chunk > 0)
     373       12000 :             for (int i = 0; i <= lcs; i++)
     374       10537 :                 ymids.at(i * nchunks + chunk-1) = seq[i];
     375             : 
     376        1884 :         x1 = xoff + (int)((numer + (xlim-xoff)*chunk) / nchunks);
     377        5484 :         for ( ; x < x1; x++) {
     378        3600 :             const T & line = flip ? *yv[x] : *xv[x];
     379        3600 :             typename MatchesMap::iterator iter = ymatches.find(line);
     380        3600 :             if (iter == ymatches.end())
     381         288 :                 continue;
     382        3312 :             IntVector * pMatches = &(iter->second);
     383        3312 :             IntVector::iterator y;
     384        3312 :             int k = 0;
     385             : 
     386        3512 :             for (y = pMatches->begin(); y != pMatches->end(); ++y) {
     387        3427 :                 if (!in_seq.contains(*y)) {
     388        3227 :                     k = lcs_pos(*y);
     389             :                     assert(k > 0);
     390        3227 :                     copy(ymids.begin() + (k-1) * nchunks, ymids.begin() + k * nchunks,
     391        3227 :                             ymids.begin() + k * nchunks);
     392        3227 :                     ++y;
     393        3227 :                     break;
     394             :                 }
     395             :             }
     396       65905 :             for ( ; y != pMatches->end(); ++y) {
     397       62593 :                 if (*y > seq[k-1]) {
     398             :                     assert(*y < seq[k]);
     399             :                     // Optimization: this is a common case:
     400             :                     //  next match is just replacing previous match.
     401       36525 :                     in_seq.erase(seq[k]);
     402       36525 :                     seq[k] = *y;
     403       36525 :                     in_seq.insert(*y);
     404       26068 :                 } else if (!in_seq.contains(*y)) {
     405        5752 :                     k = lcs_pos(*y);
     406             :                     assert(k > 0);
     407        5752 :                     copy(ymids.begin() + (k-1) * nchunks, ymids.begin() + k * nchunks,
     408       11504 :                             ymids.begin() + k * nchunks);
     409             :                 }
     410             :             }
     411             :         }
     412             :     }
     413             : 
     414         421 :     seps.clear();
     415         421 :     seps.resize(nchunks + 1);
     416             : 
     417         564 :     seps[0] = flip ? make_pair(yoff, xoff) : make_pair(xoff, yoff);
     418         421 :     IntVector::iterator ymid = ymids.begin() + lcs * nchunks;
     419        1884 :     for (int n = 0; n < nchunks - 1; n++) {
     420        1463 :         x1 = xoff + (numer + (xlim - xoff) * n) / nchunks;
     421        1463 :         y1 = ymid[n] + 1;
     422        1982 :         seps[n+1] = flip ? make_pair(y1, x1) : make_pair(x1, y1);
     423             :     }
     424         564 :     seps[nchunks] = flip ? make_pair(ylim, xlim) : make_pair(xlim, ylim);
     425         842 :     return lcs;
     426             : }
     427             : 
     428             : template <typename T>
     429        8979 : int DiffEngine<T>::lcs_pos (int ypos) {
     430        8979 :     int end = lcs;
     431        8979 :     if (end == 0 || ypos > seq[end]) {
     432        2564 :         seq[++lcs] = ypos;
     433        2564 :         in_seq.insert(ypos);
     434        2564 :         return lcs;
     435             :     }
     436             : 
     437        6415 :     int beg = 1;
     438       39247 :     while (beg < end) {
     439       32832 :         int mid = (beg + end) / 2;
     440       32832 :         if ( ypos > seq[mid] )
     441       19011 :             beg = mid + 1;
     442             :         else
     443       13821 :             end = mid;
     444             :     }
     445             : 
     446             :     assert(ypos != seq[end]);
     447             : 
     448        6415 :     in_seq.erase(seq[end]);
     449        6415 :     seq[end] = ypos;
     450        6415 :     in_seq.insert(ypos);
     451        6415 :     return end;
     452             : }
     453             : 
     454             : /* Find LCS of two sequences.
     455             :  *
     456             :  * The results are recorded in the vectors {x,y}changed[], by
     457             :  * storing a 1 in the element for each line that is an insertion
     458             :  * or deletion (ie. is not in the LCS).
     459             :  *
     460             :  * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
     461             :  *
     462             :  * Note that XLIM, YLIM are exclusive bounds.
     463             :  * All line numbers are origin-0 and discarded lines are not counted.
     464             :  */
     465             : template <typename T>
     466        2452 : void DiffEngine<T>::compareseq (int xoff, int xlim, int yoff, int ylim) {
     467             :     using std::pair;
     468             : 
     469        4904 :     IntPairVector seps;
     470             :     int lcs;
     471             : 
     472             :     // Slide down the bottom initial diagonal.
     473        4203 :     while (xoff < xlim && yoff < ylim && *xv[xoff] == *yv[yoff]) {
     474        1751 :         ++xoff;
     475        1751 :         ++yoff;
     476             :     }
     477             : 
     478             :     // Slide up the top initial diagonal.
     479        3733 :     while (xlim > xoff && ylim > yoff && *xv[xlim - 1] == *yv[ylim - 1]) {
     480        1281 :         --xlim;
     481        1281 :         --ylim;
     482             :     }
     483             : 
     484        2452 :     if (xoff == xlim || yoff == ylim)
     485        2031 :         lcs = 0;
     486             :     else {
     487             :         // This is ad hoc but seems to work well.
     488             :         //nchunks = sqrt(min(xlim - xoff, ylim - yoff) / 2.5);
     489             :         //nchunks = max(2,min(8,(int)nchunks));
     490         421 :         int nchunks = std::min(MAX_CHUNKS-1, std::min(xlim - xoff, ylim - yoff)) + 1;
     491         421 :         lcs = diag(xoff, xlim, yoff, ylim, nchunks, seps);
     492             :     }
     493             : 
     494        2452 :     if (lcs == 0) {
     495             :         // X and Y sequences have no common subsequence:
     496             :         // mark all changed.
     497        4556 :         while (yoff < ylim)
     498        2464 :             ychanged[yind[yoff++]] = true;
     499        3548 :         while (xoff < xlim)
     500        1456 :             xchanged[xind[xoff++]] = true;
     501             :     } else {
     502             :         // Use the partitions to split this problem into subproblems.
     503         360 :         IntPairVector::iterator pt1, pt2;
     504         360 :         pt1 = pt2 = seps.begin();
     505        2117 :         while (++pt2 != seps.end()) {
     506        1757 :             compareseq (pt1->first, pt2->first, pt1->second, pt2->second);
     507        1757 :             pt1 = pt2;
     508             :         }
     509             :     }
     510        2452 : }
     511             : 
     512             : /* Adjust inserts/deletes of identical lines to join changes
     513             :  * as much as possible.
     514             :  *
     515             :  * We do something when a run of changed lines include a
     516             :  * line at one end and has an excluded, identical line at the other.
     517             :  * We are free to choose which identical line is included.
     518             :  * `compareseq' usually chooses the one at the beginning,
     519             :  * but usually it is cleaner to consider the following identical line
     520             :  * to be the "change".
     521             :  *
     522             :  * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
     523             :  */
     524             : template <typename T>
     525        1390 : void DiffEngine<T>::shift_boundaries (const ValueVector & lines, BoolVector & changed,
     526             :         const BoolVector & other_changed)
     527             : {
     528        1390 :     int i = 0;
     529        1390 :     int j = 0;
     530             : 
     531        1390 :     int len = (int)lines.size();
     532        1390 :     int other_len = (int)other_changed.size();
     533             : 
     534        3110 :     while (1) {
     535             :         /*
     536             :          * Scan forwards to find beginning of another run of changes.
     537             :          * Also keep track of the corresponding point in the other file.
     538             :          *
     539             :          * Throughout this code, i and j are adjusted together so that
     540             :          * the first i elements of changed and the first j elements
     541             :          * of other_changed both contain the same number of zeros
     542             :          * (unchanged lines).
     543             :          * Furthermore, j is always kept so that j == other_len or
     544             :          * other_changed[j] == false.
     545             :          */
     546       13010 :         while (j < other_len && other_changed[j])
     547        8510 :             j++;
     548             : 
     549       40876 :         while (i < len && ! changed[i]) {
     550       36376 :             i++; j++;
     551       48822 :             while (j < other_len && other_changed[j])
     552       12446 :                 j++;
     553             :         }
     554             : 
     555        4500 :         if (i == len)
     556        1390 :             break;
     557             : 
     558        3110 :         int start = i, runlength, corresponding;
     559             : 
     560             :         // Find the end of this run of changes.
     561       20931 :         while (++i < len && changed[i])
     562       17821 :             continue;
     563             : 
     564          21 :         do {
     565             :             /*
     566             :              * Record the length of this run of changes, so that
     567             :              * we can later determine whether the run has grown.
     568             :              */
     569        3131 :             runlength = i - start;
     570             : 
     571             :             /*
     572             :              * Move the changed region back, so long as the
     573             :              * previous unchanged line matches the last changed one.
     574             :              * This merges with previous changed regions.
     575             :              */
     576        3287 :             while (start > 0 && lines[start - 1] == lines[i - 1]) {
     577         156 :                 changed[--start] = true;
     578         156 :                 changed[--i] = false;
     579         173 :                 while (start > 0 && changed[start - 1])
     580          17 :                     start--;
     581         389 :                 while (other_changed[--j])
     582         233 :                     continue;
     583             :             }
     584             : 
     585             :             /*
     586             :              * Set CORRESPONDING to the end of the changed run, at the last
     587             :              * point where it corresponds to a changed run in the other file.
     588             :              * CORRESPONDING == LEN means no such point has been found.
     589             :              */
     590        3131 :             corresponding = j < other_len ? i : len;
     591             : 
     592             :             /*
     593             :              * Move the changed region forward, so long as the
     594             :              * first changed line matches the following unchanged one.
     595             :              * This merges with following changed regions.
     596             :              * Do this second, so that if there are no merges,
     597             :              * the changed region is moved forward as far as possible.
     598             :              */
     599        3328 :             while (i < len && lines[start] == lines[i]) {
     600         197 :                 changed[start++] = false;
     601         197 :                 changed[i++] = true;
     602         209 :                 while (i < len && changed[i])
     603          12 :                     i++;
     604             : 
     605         197 :                 j++;
     606         197 :                 if (j < other_len && other_changed[j]) {
     607          42 :                     corresponding = i;
     608         262 :                     while (j < other_len && other_changed[j])
     609         220 :                         j++;
     610             :                 }
     611             :             }
     612        3131 :         } while (runlength != i - start);
     613             : 
     614             :         /*
     615             :          * If possible, move the fully-merged run of changes
     616             :          * back to a corresponding run in the other file.
     617             :          */
     618        3243 :         while (corresponding < i) {
     619         133 :             changed[--start] = 1;
     620         133 :             changed[--i] = 0;
     621         133 :             while (other_changed[--j])
     622           0 :                 continue;
     623             :         }
     624             :     }
     625        1390 : }
     626             : //-----------------------------------------------------------------------------
     627             : // Diff implementation
     628             : //-----------------------------------------------------------------------------
     629             : 
     630             : template<typename T>
     631         698 : Diff<T>::Diff(const DiffConfig& config, const ValueVector & from_lines, const ValueVector & to_lines)
     632             : {
     633        1396 :     DiffEngine<T> engine(config);
     634         698 :     engine.diff(from_lines, to_lines, *this);
     635         698 : }
     636             : 
     637             : } // namespace wikidiff2
     638             : 
     639             : #endif

Generated by: LCOV version 1.13