LCOV - code coverage report
Current view: top level - src/lib - LineDiffProcessor.cpp (source / functions) Hit Total Coverage
Test: mediawiki/php/wikidiff2 test coverage report Lines: 66 71 93.0 %
Date: 2023-07-04 10:20:16 Functions: 5 5 100.0 %

          Line data    Source code
       1             : #include "LineDiffProcessor.h"
       2             : 
       3             : namespace wikidiff2 {
       4             : 
       5             : LineDiffProcessor::PointerVector LineDiffProcessor::empty;
       6             : 
       7             : /**
       8             :  * Get diff stats from the cache.
       9             :  */
      10         108 : const WordDiffStats & LineDiffProcessor::getConcatDiffStats(
      11             :     PointerVectorIterator from, PointerVectorIterator fromEnd,
      12             :     PointerVectorIterator to, PointerVectorIterator toEnd)
      13             : {
      14             :     // TODO: assert that lines are actually consecutive in memory
      15         108 :     return wordDiffCache.getConcatDiffStats(
      16         216 :         *from, fromEnd - from,
      17         108 :         *to, toEnd - to);
      18             : }
      19             : 
      20             : /**
      21             :  * Go through the changed lines. Detect splits. If the lines are dissimilar,
      22             :  * convert to delete+add.
      23             :  */
      24          64 : void LineDiffProcessor::detectChanges(StringDiff & result, StringDiffOp & diffOp)
      25             : {
      26          64 :     PointerVectorIterator pDel = diffOp.from.begin(),
      27          64 :         pDelEnd = diffOp.from.end(),
      28          64 :         pAdd = diffOp.to.begin(),
      29          64 :         pAddEnd = diffOp.to.end();
      30             : 
      31          64 :     SplitInfo split{0, 0};
      32          64 :     int savedSize = 0;
      33             : 
      34         108 :     auto flushSaved = [&]() {
      35         108 :         if (savedSize) {
      36          60 :             result.add_edit(StringDiffOp(StringDiffOp::change,
      37         120 :                 PointerVector(pDel - savedSize, pDel),
      38         120 :                 PointerVector(pAdd - savedSize, pAdd)));
      39          60 :             savedSize = 0;
      40             :         }
      41         172 :     };
      42             : 
      43         172 :     for (; pAdd != pAddEnd && pDel != pDelEnd; pDel++, pAdd += split.size) {
      44         108 :         split = getSplit(pDel, pDelEnd, pAdd, pAddEnd);
      45         108 :         if (split.size > 1) {
      46             :             // Add the split as a separate change
      47           0 :             flushSaved();
      48           0 :             result.add_edit(StringDiffOp(StringDiffOp::change,
      49           0 :                 PointerVector(pDel, pDel + 1),
      50           0 :                 PointerVector(pAdd, pAdd + split.size)));
      51         108 :         } else if (split.similarity > config.changeThreshold) {
      52             :             // Save regular change for aggregation
      53          64 :             savedSize++;
      54             :         } else {
      55             :             // Convert dissimilar change to delete + add
      56          44 :             flushSaved();
      57          44 :             result.add_edit(StringDiffOp(StringDiffOp::add,
      58          88 :                 empty, PointerVector(pAdd, pAdd + 1)));
      59          44 :             result.add_edit(StringDiffOp(StringDiffOp::del,
      60          88 :                 PointerVector(pDel, pDel + 1), empty));
      61             :             // Set split.size = 1 for the loop increment
      62          44 :             split.size = 1;
      63             :         }
      64             :     }
      65          64 :     flushSaved();
      66             : 
      67             :     // Handle the trailing part which doesn't match due to unequal length
      68          64 :     if (pDel != pDelEnd) {
      69          14 :         result.add_edit(StringDiffOp(StringDiffOp::del,
      70          28 :             PointerVector(pDel, pDelEnd), empty));
      71          50 :     } else if (pAdd != pAddEnd) {
      72           9 :         result.add_edit(StringDiffOp(StringDiffOp::add,
      73          18 :             empty, PointerVector(pAdd, pAddEnd)));
      74             :     }
      75          64 : }
      76             : 
      77             : /**
      78             :  * Determine whether there is a line split at the start of the given LHS and
      79             :  * RHS half-open ranges.
      80             :  *
      81             :  * @param pDel The start of the LHS range
      82             :  * @param pDelEnd The end (not inclusive) of the LHS range
      83             :  * @param pAdd The start of the RHS range
      84             :  * @param pAddEnd The end (not inclusive) of the RHS range
      85             :  */
      86         108 : LineDiffProcessor::SplitInfo LineDiffProcessor::getSplit(
      87             :     PointerVectorIterator pDel, PointerVectorIterator pDelEnd,
      88             :     PointerVectorIterator pAdd, PointerVectorIterator pAddEnd)
      89             : {
      90         108 :     int splitSize = 0;
      91         108 :     int bestSplitSize = 0;
      92         108 :     double bestSimilarity = -1;
      93         108 :     double singleSimilarity = -1;
      94         180 :     while (pAdd + splitSize < pAddEnd && splitSize < config.maxSplitSize) {
      95         108 :         splitSize++;
      96             :         const WordDiffStats & ds = getConcatDiffStats(
      97         108 :                 pDel, pDel + 1, pAdd, pAdd + splitSize);
      98         108 :         double similarity = ds.charSimilarity;
      99         108 :         if (splitSize == 1) {
     100         108 :             singleSimilarity = similarity;
     101             :         }
     102         108 :         if (ds.bailout && splitSize == 1) {
     103             :             // Treat bailout with splitSize=1 as similar
     104           3 :             similarity = 1.0;
     105             :         }
     106         108 :         if (similarity > bestSimilarity) {
     107         108 :             bestSimilarity = similarity;
     108         108 :             bestSplitSize = splitSize;
     109             :         }
     110         108 :         if (ds.bailout || similarity <= config.initialSplitThreshold) {
     111             :             break;
     112             :         }
     113             :     }
     114         108 :     if (bestSplitSize > 1 && bestSimilarity < config.finalSplitThreshold) {
     115             :         // If a split was not detected, reduce the split size to 1 and return
     116             :         // the similarity for single line comparison
     117           0 :         return SplitInfo{1, singleSimilarity};
     118             :     }
     119         108 :     return SplitInfo{bestSplitSize, bestSimilarity};
     120             : }
     121             : 
     122          47 : void LineDiffProcessor::process(StringDiff & lineDiff) {
     123          94 :     StringDiff result;
     124          47 :     auto n = lineDiff.size();
     125             : 
     126         350 :     for (size_t i = 0; i < n; i++) {
     127         303 :         StringDiffOp & diffOp = lineDiff[i];
     128         303 :         if (diffOp.op == StringDiffOp::change) {
     129          64 :             detectChanges(result, diffOp);
     130             :         } else {
     131         239 :             result.add_edit(diffOp);
     132             :         }
     133             :     }
     134          47 :     lineDiff.swap(result);
     135          47 : }
     136             : 
     137             : } // namespace wikidiff2

Generated by: LCOV version 1.13