View Javadoc
1   package org.wikimedia.search.extra.analysis.homoglyph;
2   
3   import static java.util.stream.Collectors.toList;
4   
5   import java.util.Comparator;
6   import java.util.List;
7   import java.util.regex.Pattern;
8   
9   import com.google.common.annotations.VisibleForTesting;
10  
11  import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
12  
13  
14  public class TranslationTable {
15      private static final Comparator<GlyphPair> SORT_BY_LENGTH = Comparator.comparingInt(gp -> gp.getOriginal().length());
16      private static final Comparator<GlyphPair> SORT_BY_NATURAL_ORDER = Comparator.comparing(GlyphPair::getOriginal);
17  
18      private final List<GlyphPair> scriptOneToScriptTwo;
19      private final List<GlyphPair> scriptTwoToScriptOne;
20      private final Pattern script1Reg;
21      private final Pattern script2Reg;
22  
23      public TranslationTable(Pattern script1Reg, Pattern script2Reg, List<GlyphPair> homoglyphPairs) {
24          this.script1Reg = script1Reg;
25          this.script2Reg = script2Reg;
26          scriptOneToScriptTwo = scriptOneToScriptTwoList(homoglyphPairs);
27          scriptTwoToScriptOne = scriptTwoToScriptOneList(homoglyphPairs);
28      }
29  
30      @VisibleForTesting
31      @SuppressFBWarnings(value = "OCP_OVERLY_CONCRETE_PARAMETER", justification = "glyph order is semantically important")
32      public final List<GlyphPair> scriptOneToScriptTwoList(List<GlyphPair> homoglyphPairs) {
33          return homoglyphPairs.stream()
34                  .sorted(SORT_BY_LENGTH.reversed().thenComparing(SORT_BY_NATURAL_ORDER))
35                  .collect(toList());
36      }
37  
38      @VisibleForTesting
39      @SuppressFBWarnings(value = "OCP_OVERLY_CONCRETE_PARAMETER", justification = "glyph order is semantically important")
40      public final List<GlyphPair> scriptTwoToScriptOneList(List<GlyphPair> homoglyphPairs) {
41          return homoglyphPairs.stream()
42                  .map(GlyphPair::swap)
43                  .sorted(SORT_BY_LENGTH.reversed().thenComparing(SORT_BY_NATURAL_ORDER))
44                  .collect((toList()));
45      }
46  
47      public void replaceScriptOne(StringBuilder scriptOne) {
48          translate(scriptOne, scriptOneToScriptTwo);
49      }
50  
51      private void translate(StringBuilder scriptToTranslate, List<GlyphPair> scriptList) {
52          scriptList.forEach(pair -> {
53              int found = scriptToTranslate.indexOf(pair.getOriginal());
54              while (found >= 0) {
55                  scriptToTranslate.replace(found, found + pair.getOriginal().length(), pair.getMirror());
56                  found = scriptToTranslate.indexOf(pair.getOriginal(), found + pair.getMirror().length());
57              }
58          });
59      }
60  
61      public void replaceScriptTwo(StringBuilder scriptTwo) {
62          translate(scriptTwo, scriptTwoToScriptOne);
63      }
64  
65      public Pattern getScript1Reg() {
66          return script1Reg;
67      }
68  
69      public Pattern getScript2Reg() {
70          return script2Reg;
71      }
72  }