LCOV - code coverage report
Current view: top level - src/lib - WordDiffCache.cpp (source / functions) Hit Total Coverage
Test: mediawiki/php/wikidiff2 test coverage report Lines: 72 104 69.2 %
Date: 2023-07-04 10:20:16 Functions: 10 12 83.3 %

          Line data    Source code
       1             : #include "WordDiffCache.h"
       2             : #include "WordDiffSegmenter.h"
       3             : 
       4             : namespace wikidiff2 {
       5             : 
       6             : WordDiffCache::String WordDiffCache::newlineStorage = "\n";
       7             : Word WordDiffCache::newline(newlineStorage.begin(),
       8             :     newlineStorage.begin() + 1, newlineStorage.begin() + 1);
       9             : 
      10         170 : WordDiffCache::WordDiffPtr WordDiffCache::getDiff(const String * from, const String * to)
      11             : {
      12         170 :     return getConcatDiff(from, 1, to, 1);
      13             : }
      14             : 
      15         768 : WordDiffCache::WordDiffPtr WordDiffCache::getConcatDiff(
      16             :     const String * fromStart, size_t fromSize,
      17             :     const String * toStart, size_t toSize)
      18             : {
      19             :     DiffCacheKey key(
      20             :         getKey(fromStart), fromSize,
      21         768 :         getKey(toStart), toSize);
      22         768 :     DiffCache::iterator it = diffCache.find(key);
      23         768 :     if (it == diffCache.end()) {
      24         651 :         const WordVector & words1 = getConcatWords(fromStart, fromSize);
      25         651 :         const WordVector & words2 = getConcatWords(toStart, toSize);
      26           0 :         WordDiffPtr wordDiffPtr = std::allocate_shared<WordDiff>(WD2_ALLOCATOR<WordDiff>(),
      27         651 :             diffConfig, words1, words2);
      28             : 
      29         651 :         if (fromSize > 1 || toSize > 1) {
      30           0 :             WordDiffSegmenter::segment(*wordDiffPtr);
      31             :         }
      32             : 
      33         651 :         auto result = diffCache.insert(std::make_pair(key, wordDiffPtr));
      34         651 :         it = result.first;
      35             :     } else {
      36         117 :         hitStats.diffHits++;
      37             :     }
      38         768 :     hitStats.diffTotal++;
      39        1536 :     return it->second;
      40             : }
      41             : 
      42             : /**
      43             :  * Get an integer key from a String address.
      44             :  *
      45             :  * Maybe a weird concept, but not much worse than what we were doing in Diff
      46             :  * already. A hashtable would be slow since String doesn't memoize hashes.
      47             :  * We don't accept any pointer, just one that is inside a previously
      48             :  * registered vector.
      49             :  *
      50             :  * Theoretically we could track line numbers right through DiffEngine/Diff
      51             :  * instead of reconstructing the line numbers from the pointers.
      52             :  */
      53        4656 : size_t WordDiffCache::getKey(const String * str)
      54             : {
      55        4656 :     size_t r = 0;
      56        6984 :     for (size_t i = 0; i < 2; i++) {
      57        6984 :         const StringVector & lines = *(linesVecPtrs[i]);
      58        6984 :         size_t n = lines.size();
      59        6984 :         if (n && str >= &lines[0] && str <= &lines[n - 1]) {
      60        4656 :             return r + str - &lines[0];
      61             :         }
      62        2328 :         r += n;
      63             :     }
      64           0 :     throw std::invalid_argument("WordDiffCache::getKey: unregistered string pointer");
      65             : }
      66             : 
      67         801 : const WordDiffStats & WordDiffCache::getDiffStats(const String * from, const String * to)
      68             : {
      69         801 :     return getConcatDiffStats(from, 1, to, 1);
      70             : }
      71             : 
      72         909 : const WordDiffStats & WordDiffCache::getConcatDiffStats(
      73             :     const String * fromStart, size_t fromSize,
      74             :     const String * toStart, size_t toSize)
      75             : {
      76             :     DiffCacheKey key(
      77             :         getKey(fromStart), fromSize,
      78         909 :         getKey(toStart), toSize);
      79         909 :     StatsCache::iterator it = statsCache.find(key);
      80         909 :     if (it == statsCache.end()) {
      81         598 :         WordDiffPtr diff = getConcatDiff(fromStart, fromSize, toStart, toSize);
      82         598 :         auto result = statsCache.insert(std::make_pair(key, WordDiffStats(*diff)));
      83         598 :         it = result.first;
      84             :     } else {
      85         311 :         hitStats.statHits++;
      86             :     }
      87         909 :     hitStats.statTotal++;
      88         909 :     return it->second;
      89             : }
      90             : 
      91          94 : void WordDiffCache::setLines(const StringVector * lines0, const StringVector * lines1)
      92             : {
      93          94 :     linesVecPtrs[0] = lines0;
      94          94 :     linesVecPtrs[1] = lines1;
      95          94 :     wordsCache.clear();
      96          94 :     diffCache.clear();
      97          94 :     statsCache.clear();
      98          94 : }
      99             : 
     100             : /**
     101             :  * Get a vector of words for a line.
     102             :  */
     103        1302 : const WordDiffCache::WordVector & WordDiffCache::explodeWords(const String * line)
     104             : {
     105        1302 :     WordsCacheKey key(getKey(line), 1);
     106        1302 :     auto it = wordsCache.find(key);
     107        1302 :     hitStats.wordTotal++;
     108        1302 :     if (it != wordsCache.end()) {
     109         933 :         hitStats.wordHits++;
     110         933 :         return it->second;
     111             :     }
     112         369 :     textUtil.explodeWords(*line, tempWords);
     113         369 :     auto result = wordsCache.insert(WordsCache::value_type(key, WordVector()));
     114         369 :     result.first->second.swap(tempWords);
     115         369 :     return result.first->second;
     116             : }
     117             : 
     118             : /**
     119             :  * Get a word vector for one or more lines. If more than one line is requested,
     120             :  * the lines will be concatenated with a newline separator.
     121             :  *
     122             :  * TODO: Unfortunately the concatenated word vector must stay in memory as long
     123             :  * as the associated diffs stay in memory, because the diffs contain pointers
     124             :  * to the words. An improvement would be to have DiffEngine store integer
     125             :  * offsets instead of pointers. This would allow concatenated word vectors to
     126             :  * be temporary, reducing memory usage.
     127             :  */
     128        1302 : const WordDiffCache::WordVector & WordDiffCache::getConcatWords(
     129             :     const String * lines, size_t numLines)
     130             : {
     131        1302 :     if (numLines == 1) {
     132        1302 :         return explodeWords(lines);
     133             :     }
     134             : 
     135           0 :     WordsCacheKey key(getKey(lines), numLines);
     136           0 :     auto it = wordsCache.find(key);
     137           0 :     hitStats.concatWordTotal++;
     138           0 :     if (it != wordsCache.end()) {
     139           0 :         hitStats.concatWordHits++;
     140           0 :         return it->second;
     141             :     }
     142             : 
     143           0 :     WordVector concatWords;
     144           0 :     size_t numWords = 0;
     145           0 :     for (size_t i = 0; i < numLines; i++) {
     146           0 :         numWords += explodeWords(lines + i).size() + 1;
     147             :     }
     148           0 :     concatWords.reserve(numWords);
     149             : 
     150           0 :     for (size_t i = 0; i < numLines; i++) {
     151           0 :         const WordVector & words = explodeWords(lines + i);
     152           0 :         if (i > 0) {
     153           0 :             concatWords.push_back(newline);
     154             :         }
     155           0 :         for (auto it = words.begin(); it != words.end(); it++) {
     156           0 :             concatWords.push_back(*it);
     157             :         }
     158             :     }
     159           0 :     auto result = wordsCache.insert(std::make_pair(key, WordVector()));
     160           0 :     result.first->second.swap(concatWords);
     161           0 :     return result.first->second;
     162             : }
     163             : 
     164           0 : void WordDiffCache::dumpDebugReport()
     165             : {
     166           0 :     auto h = hitStats;
     167             :     using std::endl;
     168           0 :     std::cerr << "Diff cache: " << h.diffHits << " / " << h.diffTotal << endl
     169           0 :         << "Stat cache " << h.statHits << " / " << h.statTotal << endl
     170           0 :         << "Word cache " << h.wordHits << " / " << h.wordTotal << endl
     171           0 :         << "Concatenated line word cache " << h.concatWordHits << " / " << h.concatWordTotal << endl;
     172           0 : }
     173             : 
     174           0 : void WordDiffCache::throwOutOfRange()
     175             : {
     176           0 :     throw std::out_of_range("Numeric value out of range");
     177             : }
     178             : 
     179       16841 : bool WordDiffCache::DiffCacheKey::operator<(const DiffCacheKey & other) const
     180             : {
     181       16841 :     if (from < other.from) return true;
     182       11003 :     if (from > other.from) return false;
     183        6222 :     if (fromSize < other.fromSize) return true;
     184        6222 :     if (fromSize > other.fromSize) return false;
     185        6222 :     if (to < other.to) return true;
     186        3325 :     if (to > other.to) return false;
     187         856 :     if (toSize < other.toSize) return true;
     188         856 :     if (toSize > other.toSize) return false;
     189         856 :     return false;
     190             : }
     191             : 
     192        7648 : bool WordDiffCache::WordsCacheKey::operator<(const WordsCacheKey & other) const
     193             : {
     194        7648 :     if (line < other.line) return true;
     195        4022 :     if (line > other.line) return false;
     196        1866 :     if (size < other.size) return true;
     197        1866 :     if (size > other.size) return false;
     198        1866 :     return false;
     199             : }
     200             : 
     201             : } // namespace wikidiff2

Generated by: LCOV version 1.13