Line data Source code
1 : #ifndef WIKIDIFF2_H
2 : #define WIKIDIFF2_H
3 :
4 : #include "wd2_allocator.h"
5 : #include "Formatter.h"
6 : #include "DiffEngine.h"
7 : #include "Word.h"
8 : #include "LineDiffProcessor.h"
9 : #include <string>
10 : #include <vector>
11 : #include <set>
12 : #include <list>
13 : #include <sstream>
14 :
15 : // uncomment this for inline HTML debug output related to moved lines
16 : //#define DEBUG_MOVED_LINES
17 :
18 : namespace wikidiff2 {
19 :
20 : class Wikidiff2 {
21 : public:
22 : typedef std::basic_string<char, std::char_traits<char>, WD2_ALLOCATOR<char> > String;
23 : typedef std::vector<String, WD2_ALLOCATOR<String> > StringVector;
24 : typedef std::list<Formatter*, WD2_ALLOCATOR<Formatter*> > FormatterPtrList;
25 :
26 : typedef Diff<String> StringDiff;
27 : typedef Diff<Word> WordDiff;
28 :
29 : /**
30 : * Options used to configure the class, passed to the constructor
31 : */
32 : struct Config {
33 : /**
34 : * The number of copied lines shown before and after each change
35 : */
36 : int64_t numContextLines;
37 :
38 : /**
39 : * If the similarity metric between lines exceeds this value the
40 : * line will be shown as a change with a word diff. If not, it will
41 : * be shown as a delete and add. Between 0 and 1.
42 : */
43 : double changeThreshold;
44 :
45 : /**
46 : * If the similarity metric between lines exceeds this value, the
47 : * pair may be considered as a move candidate. Between 0 and 1.
48 : */
49 : double movedLineThreshold;
50 :
51 : /**
52 : * When the number of added and deleted lines in a diff is greater
53 : * than this limit, no attempt to detect moved lines will be made.
54 : */
55 : int64_t maxMovedLines;
56 :
57 : /**
58 : * When comparing two lines for changes within the line, a word-level
59 : * diff will be done unless the product of the LHS word count and
60 : * the RHS word count exceeds this limit.
61 : */
62 : int64_t maxWordLevelDiffComplexity;
63 :
64 : /**
65 : * The maximum number of RHS lines which can be compared with
66 : * one LHS line.
67 : */
68 : int64_t maxSplitSize;
69 :
70 : /**
71 : * The minimum similarity which must be maintained during a split
72 : * detection search. The split size increases until either the
73 : * similarity between the LHS and the multiple RHS lines becomes
74 : * less than initialSplitThreshold, or maxSplitSize is reached.
75 : */
76 : double initialSplitThreshold;
77 :
78 : /**
79 : * The minimum similarity between one LHS line and multiple RHS
80 : * lines which must be achieved to format the block as a split.
81 : */
82 : double finalSplitThreshold;
83 : };
84 :
85 : Wikidiff2(const Config& config_);
86 :
87 : void execute(const String & text1, const String & text2);
88 :
89 : void addFormatter(Formatter & formatter);
90 :
91 : private:
92 : Config config;
93 : DiffConfig lineDiffConfig;
94 : DiffConfig wordDiffConfig;
95 : WordDiffCache wordDiffCache;
96 : LineDiffProcessor::Config ldpConfig;
97 : FormatterPtrList formatters;
98 : LineDiffProcessor lineDiffProcessor;
99 :
100 : struct DiffMapEntry
101 : {
102 : WordDiffStats ds;
103 : int opIndexFrom, opLineFrom, opIndexTo, opLineTo;
104 : bool lhsDisplayed = false, rhsDisplayed = false;
105 :
106 : DiffMapEntry(const WordDiffStats & diffStats,
107 : int opIndexFrom_, int opLineFrom_, int opIndexTo_, int opLineTo_);
108 : };
109 : // PhpAllocator can't be specialized for std::pair, so we're using the standard allocator.
110 : typedef std::map<uint64_t, std::shared_ptr<struct Wikidiff2::DiffMapEntry> > DiffMap;
111 : DiffMap diffMap;
112 :
113 : class AllowPrintMovedLineDiff {
114 : bool detectMovedLines = true; // will be set to false when too many 'add' or 'delete' ops appear in diff.
115 : bool detectMovedLinesValid = false; // whether detectMovedLines is valid.
116 : public:
117 : bool operator() (const StringDiff & linediff, int maxMovedLines); // calculates & caches comparison count
118 : } allowPrintMovedLineDiff;
119 :
120 : void printDiff(const StringDiff & linediff);
121 :
122 : void explodeLines(const String & text, StringVector &lines);
123 :
124 : std::shared_ptr<DiffMapEntry> getDiffMapEntry(
125 : const String * text1, const String * text2,
126 : int opIndexFrom, int opLineFrom,
127 : int opIndexTo, int opLineTo);
128 :
129 : bool printMovedLineDiff(const StringDiff & linediff, int opIndex, int opLine,
130 : int leftLine, int rightLine, int offsetFrom, int offsetTo);
131 :
132 : void printAdd(const String & line, int leftLine, int rightLine, int offsetFrom, int offsetTo);
133 : void printDelete(const String & line, int leftLine, int rightLine, int offsetFrom, int offsetTo);
134 :
135 : void printWordDiff(
136 : const WordDiff & diff,
137 : int leftLine, int rightLine,
138 : int offsetFrom, int offsetTo,
139 : bool printLeft = true, bool printRight = true,
140 : const String & srcAnchor = "", const String & dstAnchor = "",
141 : bool moveDirectionDownwards = false);
142 :
143 : void printWordDiffFromStrings(
144 : const String * text1, const String * text2,
145 : int leftLine, int rightLine,
146 : int offsetFrom, int offsetTo,
147 : bool printLeft = true, bool printRight = true,
148 : const String & srcAnchor = "", const String & dstAnchor = "",
149 : bool moveDirectionDownwards = false);
150 :
151 : void printConcatDiff(
152 : const String * lines1, int numLines1,
153 : const String * lines2, int numLines2,
154 : int leftLine, int rightLine,
155 : int offsetFrom, int offsetTo);
156 :
157 : void printFileHeader();
158 : void printFileFooter();
159 : void printBlockHeader(int leftLine, int rightLine);
160 : void printContext(const String & input, int leftLine, int rightLine, int offsetFrom, int offsetTo);
161 : };
162 :
163 801 : inline Wikidiff2::DiffMapEntry::DiffMapEntry(const WordDiffStats & diffStats,
164 : int opIndexFrom_, int opLineFrom_,
165 : int opIndexTo_, int opLineTo_
166 801 : ):
167 : ds(diffStats),
168 801 : opIndexFrom(opIndexFrom_), opLineFrom(opLineFrom_), opIndexTo(opIndexTo_), opLineTo(opLineTo_)
169 : {
170 801 : }
171 :
172 288 : inline bool Wikidiff2::AllowPrintMovedLineDiff::operator () (const StringDiff & linediff, int maxMovedLines)
173 : {
174 288 : if(!detectMovedLinesValid) {
175 : // count the number of added or removed lines which could have been moved.
176 33 : int adds = 0, deletes = 0;
177 416 : for(int i = 0; i < linediff.size(); ++i) {
178 383 : if(linediff[i].op == DiffOp<String>::add)
179 84 : adds += linediff[i].to.size();
180 383 : if(linediff[i].op == DiffOp<String>::del)
181 118 : deletes += linediff[i].from.size();
182 : // number of comparisons is (number of additions) x (number of deletions).
183 : // if count is too large, don't try detecting moved lines.
184 383 : if(adds+deletes > maxMovedLines) {
185 0 : detectMovedLines = false;
186 0 : break;
187 : }
188 : }
189 33 : detectMovedLinesValid = true;
190 : }
191 288 : return detectMovedLines;
192 : }
193 :
194 : } // namespace wikidiff2
195 :
196 : #endif
|