Line data Source code
1 : #ifndef WORDDIFFCACHE_H
2 : #define WORDDIFFCACHE_H
3 :
4 : #include "DiffEngine.h"
5 : #include "WordDiffStats.h"
6 : #include "TextUtil.h"
7 :
8 : #include <map>
9 : #include <vector>
10 : #include <memory>
11 : #include <limits>
12 :
13 : namespace wikidiff2 {
14 :
15 : class WordDiffCache {
16 : public:
17 : typedef std::basic_string<char, std::char_traits<char>, WD2_ALLOCATOR<char> > String;
18 : typedef Diff<Word> WordDiff;
19 : typedef std::shared_ptr<WordDiff> WordDiffPtr;
20 : typedef std::vector<Word, WD2_ALLOCATOR<Word> > WordVector;
21 : typedef std::vector<String, WD2_ALLOCATOR<String> > StringVector;
22 : typedef std::vector<const String*, WD2_ALLOCATOR<const String*> > PointerVector;
23 : typedef PointerVector::iterator PointerVectorIterator;
24 :
25 47 : WordDiffCache(const DiffConfig & config_)
26 47 : : diffConfig(config_), textUtil(TextUtil::getInstance())
27 47 : {}
28 :
29 : /**
30 : * Get a diff comparing one or more lines with one or more other lines,
31 : * by concatenating the lines.
32 : *
33 : * @param from The first line on the left-hand side. This pointer must
34 : * be within the line vectors registered with setLines().
35 : * @param fromSize The number of lines on the left-hand side.
36 : * @param to The first line on the right-hand side. This pointer must
37 : * be within the line vectors registered with setLines().
38 : * @param toSize The number of lines on the right-hand side.
39 : */
40 : WordDiffPtr getConcatDiff(const String * from, size_t fromSize,
41 : const String * to, size_t toSize);
42 :
43 : /**
44 : * Get a diff comparing a single line with another single line.
45 : * The addresses of the input strings must be in the vectors
46 : * registered with setLines().
47 : */
48 : WordDiffPtr getDiff(const String * from, const String * to);
49 :
50 : /**
51 : * Get diff stats for a single line comparison. The addresses of the
52 : * input strings must be in the vectors registered with setLines().
53 : */
54 : const WordDiffStats & getDiffStats(const String * from, const String * to);
55 :
56 : /**
57 : * Get diff stats for a multi-line comparison.
58 : *
59 : * @param from The first line on the left-hand side. This pointer must
60 : * be within the line vectors registered with setLines().
61 : * @param fromSize The number of lines on the left-hand side.
62 : * @param to The first line on the right-hand side. This pointer must
63 : * be within the line vectors registered with setLines().
64 : * @param toSize The number of lines on the right-hand side.
65 : */
66 : const WordDiffStats & getConcatDiffStats(const String * from, size_t fromSize,
67 : const String * to, size_t toSize);
68 : /**
69 : * Register line vector pointers so that we can interpret String pointers
70 : * as array offsets. The vectors must not be destroyed or resized (or have
71 : * other things done to them that invalidate their iterators) unless the
72 : * caller is done with accessing WordDiffPtr objects and the cache is
73 : * destroyed.
74 : */
75 : void setLines(const StringVector * lines0, const StringVector * lines1);
76 :
77 : /**
78 : * Write some statistics about hit ratios to stderr.
79 : */
80 : void dumpDebugReport();
81 :
82 : private:
83 : /** The key class used to find diffs in the cache */
84 : struct DiffCacheKey {
85 : int from;
86 : int fromSize;
87 : int to;
88 : int toSize;
89 :
90 1677 : DiffCacheKey(size_t from_, size_t fromSize_, size_t to_, size_t toSize_)
91 1677 : : from(sizetToInt(from_)),
92 3354 : fromSize(sizetToInt(fromSize_)),
93 3354 : to(sizetToInt(to_)),
94 1677 : toSize(sizetToInt(toSize_))
95 1677 : {}
96 :
97 : /** For std::map */
98 : bool operator<(const DiffCacheKey & other) const;
99 : };
100 :
101 : /** The key class used to find exploded word vectors in the cache */
102 : struct WordsCacheKey {
103 : int line;
104 : int size;
105 :
106 1302 : WordsCacheKey(size_t line_, size_t size_)
107 1302 : : line(sizetToInt(line_)), size(sizetToInt(size_))
108 1302 : {}
109 :
110 : /** For std::map */
111 : bool operator<(const WordsCacheKey & other) const;
112 : };
113 :
114 : typedef std::map<WordsCacheKey, WordVector, std::less<WordsCacheKey>,
115 : WD2_ALLOCATOR<std::pair<const WordsCacheKey, WordVector> > > WordsCache;
116 :
117 : typedef std::map<DiffCacheKey, WordDiffPtr, std::less<DiffCacheKey>,
118 : WD2_ALLOCATOR<std::pair<const DiffCacheKey, WordDiffPtr> > > DiffCache;
119 :
120 : typedef std::map<DiffCacheKey, WordDiffStats, std::less<DiffCacheKey>,
121 : WD2_ALLOCATOR<std::pair<const DiffCacheKey, WordDiffStats>>> StatsCache;
122 :
123 : static String newlineStorage;
124 : static Word newline;
125 :
126 : DiffConfig diffConfig;
127 : WordsCache wordsCache;
128 : WordVector tempWords;
129 : DiffCache diffCache;
130 : StatsCache statsCache;
131 : TextUtil & textUtil;
132 :
133 : /** The registered line vectors, used to convert pointers to integer offsets */
134 : const StringVector* linesVecPtrs[2];
135 :
136 : struct {
137 : /** The number of hits on the word diff cache */
138 : int diffHits = 0;
139 : /** The number of requests to the word diff cache */
140 : int diffTotal = 0;
141 : /** The number of hits on the WordDiffStats cache */
142 : int statHits = 0;
143 : /** The number of requests to the WordDiffStats cache */
144 : int statTotal = 0;
145 : /** The number of hits on the exploded word vector cache */
146 : int wordHits = 0;
147 : /** The number of requests to the exploded word vector cache */
148 : int wordTotal = 0;
149 : /** The number of hits on the multi-line word vector cache */
150 : int concatWordHits = 0;
151 : /** The number of requests to the multi-line word vector cache */
152 : int concatWordTotal = 0;
153 : } hitStats;
154 :
155 : const WordVector & explodeWords(const String * line);
156 : const WordVector & getConcatWords(const String * lines, size_t numLines);
157 : size_t getKey(const String * str);
158 :
159 9312 : static int sizetToInt(size_t x) {
160 9312 : if (x > std::numeric_limits<int>().max()) {
161 0 : throwOutOfRange();
162 : }
163 9312 : return (int)x;
164 : }
165 :
166 : static void throwOutOfRange();
167 :
168 : };
169 :
170 : } // namespace wikidiff2
171 : #endif // WORDDIFFCACHE_H
|