Line data Source code
1 : #include "LineDiffProcessor.h"
2 :
3 : namespace wikidiff2 {
4 :
5 : LineDiffProcessor::PointerVector LineDiffProcessor::empty;
6 :
7 : /**
8 : * Get diff stats from the cache.
9 : */
10 108 : const WordDiffStats & LineDiffProcessor::getConcatDiffStats(
11 : PointerVectorIterator from, PointerVectorIterator fromEnd,
12 : PointerVectorIterator to, PointerVectorIterator toEnd)
13 : {
14 : // TODO: assert that lines are actually consecutive in memory
15 108 : return wordDiffCache.getConcatDiffStats(
16 216 : *from, fromEnd - from,
17 108 : *to, toEnd - to);
18 : }
19 :
20 : /**
21 : * Go through the changed lines. Detect splits. If the lines are dissimilar,
22 : * convert to delete+add.
23 : */
24 64 : void LineDiffProcessor::detectChanges(StringDiff & result, StringDiffOp & diffOp)
25 : {
26 64 : PointerVectorIterator pDel = diffOp.from.begin(),
27 64 : pDelEnd = diffOp.from.end(),
28 64 : pAdd = diffOp.to.begin(),
29 64 : pAddEnd = diffOp.to.end();
30 :
31 64 : SplitInfo split{0, 0};
32 64 : int savedSize = 0;
33 :
34 108 : auto flushSaved = [&]() {
35 108 : if (savedSize) {
36 60 : result.add_edit(StringDiffOp(StringDiffOp::change,
37 120 : PointerVector(pDel - savedSize, pDel),
38 120 : PointerVector(pAdd - savedSize, pAdd)));
39 60 : savedSize = 0;
40 : }
41 172 : };
42 :
43 172 : for (; pAdd != pAddEnd && pDel != pDelEnd; pDel++, pAdd += split.size) {
44 108 : split = getSplit(pDel, pDelEnd, pAdd, pAddEnd);
45 108 : if (split.size > 1) {
46 : // Add the split as a separate change
47 0 : flushSaved();
48 0 : result.add_edit(StringDiffOp(StringDiffOp::change,
49 0 : PointerVector(pDel, pDel + 1),
50 0 : PointerVector(pAdd, pAdd + split.size)));
51 108 : } else if (split.similarity > config.changeThreshold) {
52 : // Save regular change for aggregation
53 64 : savedSize++;
54 : } else {
55 : // Convert dissimilar change to delete + add
56 44 : flushSaved();
57 44 : result.add_edit(StringDiffOp(StringDiffOp::add,
58 88 : empty, PointerVector(pAdd, pAdd + 1)));
59 44 : result.add_edit(StringDiffOp(StringDiffOp::del,
60 88 : PointerVector(pDel, pDel + 1), empty));
61 : // Set split.size = 1 for the loop increment
62 44 : split.size = 1;
63 : }
64 : }
65 64 : flushSaved();
66 :
67 : // Handle the trailing part which doesn't match due to unequal length
68 64 : if (pDel != pDelEnd) {
69 14 : result.add_edit(StringDiffOp(StringDiffOp::del,
70 28 : PointerVector(pDel, pDelEnd), empty));
71 50 : } else if (pAdd != pAddEnd) {
72 9 : result.add_edit(StringDiffOp(StringDiffOp::add,
73 18 : empty, PointerVector(pAdd, pAddEnd)));
74 : }
75 64 : }
76 :
77 : /**
78 : * Determine whether there is a line split at the start of the given LHS and
79 : * RHS half-open ranges.
80 : *
81 : * @param pDel The start of the LHS range
82 : * @param pDelEnd The end (not inclusive) of the LHS range
83 : * @param pAdd The start of the RHS range
84 : * @param pAddEnd The end (not inclusive) of the RHS range
85 : */
86 108 : LineDiffProcessor::SplitInfo LineDiffProcessor::getSplit(
87 : PointerVectorIterator pDel, PointerVectorIterator pDelEnd,
88 : PointerVectorIterator pAdd, PointerVectorIterator pAddEnd)
89 : {
90 108 : int splitSize = 0;
91 108 : int bestSplitSize = 0;
92 108 : double bestSimilarity = -1;
93 108 : double singleSimilarity = -1;
94 180 : while (pAdd + splitSize < pAddEnd && splitSize < config.maxSplitSize) {
95 108 : splitSize++;
96 : const WordDiffStats & ds = getConcatDiffStats(
97 108 : pDel, pDel + 1, pAdd, pAdd + splitSize);
98 108 : double similarity = ds.charSimilarity;
99 108 : if (splitSize == 1) {
100 108 : singleSimilarity = similarity;
101 : }
102 108 : if (ds.bailout && splitSize == 1) {
103 : // Treat bailout with splitSize=1 as similar
104 3 : similarity = 1.0;
105 : }
106 108 : if (similarity > bestSimilarity) {
107 108 : bestSimilarity = similarity;
108 108 : bestSplitSize = splitSize;
109 : }
110 108 : if (ds.bailout || similarity <= config.initialSplitThreshold) {
111 : break;
112 : }
113 : }
114 108 : if (bestSplitSize > 1 && bestSimilarity < config.finalSplitThreshold) {
115 : // If a split was not detected, reduce the split size to 1 and return
116 : // the similarity for single line comparison
117 0 : return SplitInfo{1, singleSimilarity};
118 : }
119 108 : return SplitInfo{bestSplitSize, bestSimilarity};
120 : }
121 :
122 47 : void LineDiffProcessor::process(StringDiff & lineDiff) {
123 94 : StringDiff result;
124 47 : auto n = lineDiff.size();
125 :
126 350 : for (size_t i = 0; i < n; i++) {
127 303 : StringDiffOp & diffOp = lineDiff[i];
128 303 : if (diffOp.op == StringDiffOp::change) {
129 64 : detectChanges(result, diffOp);
130 : } else {
131 239 : result.add_edit(diffOp);
132 : }
133 : }
134 47 : lineDiff.swap(result);
135 47 : }
136 :
137 : } // namespace wikidiff2
|