1 package org.wikimedia.search.extra.analysis.homoglyph; 2 3 import static java.util.stream.Collectors.toList; 4 5 import java.util.Comparator; 6 import java.util.List; 7 import java.util.regex.Pattern; 8 9 import com.google.common.annotations.VisibleForTesting; 10 11 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; 12 13 14 public class TranslationTable { 15 private static final Comparator<GlyphPair> SORT_BY_LENGTH = Comparator.comparingInt(gp -> gp.getOriginal().length()); 16 private static final Comparator<GlyphPair> SORT_BY_NATURAL_ORDER = Comparator.comparing(GlyphPair::getOriginal); 17 18 private final List<GlyphPair> scriptOneToScriptTwo; 19 private final List<GlyphPair> scriptTwoToScriptOne; 20 private final Pattern script1Reg; 21 private final Pattern script2Reg; 22 23 public TranslationTable(Pattern script1Reg, Pattern script2Reg, List<GlyphPair> homoglyphPairs) { 24 this.script1Reg = script1Reg; 25 this.script2Reg = script2Reg; 26 scriptOneToScriptTwo = scriptOneToScriptTwoList(homoglyphPairs); 27 scriptTwoToScriptOne = scriptTwoToScriptOneList(homoglyphPairs); 28 } 29 30 @VisibleForTesting 31 @SuppressFBWarnings(value = "OCP_OVERLY_CONCRETE_PARAMETER", justification = "glyph order is semantically important") 32 public final List<GlyphPair> scriptOneToScriptTwoList(List<GlyphPair> homoglyphPairs) { 33 return homoglyphPairs.stream() 34 .sorted(SORT_BY_LENGTH.reversed().thenComparing(SORT_BY_NATURAL_ORDER)) 35 .collect(toList()); 36 } 37 38 @VisibleForTesting 39 @SuppressFBWarnings(value = "OCP_OVERLY_CONCRETE_PARAMETER", justification = "glyph order is semantically important") 40 public final List<GlyphPair> scriptTwoToScriptOneList(List<GlyphPair> homoglyphPairs) { 41 return homoglyphPairs.stream() 42 .map(GlyphPair::swap) 43 .sorted(SORT_BY_LENGTH.reversed().thenComparing(SORT_BY_NATURAL_ORDER)) 44 .collect((toList())); 45 } 46 47 public void replaceScriptOne(StringBuilder scriptOne) { 48 translate(scriptOne, scriptOneToScriptTwo); 49 } 50 51 private void translate(StringBuilder scriptToTranslate, List<GlyphPair> scriptList) { 52 scriptList.forEach(pair -> { 53 int found = scriptToTranslate.indexOf(pair.getOriginal()); 54 while (found >= 0) { 55 scriptToTranslate.replace(found, found + pair.getOriginal().length(), pair.getMirror()); 56 found = scriptToTranslate.indexOf(pair.getOriginal(), found + pair.getMirror().length()); 57 } 58 }); 59 } 60 61 public void replaceScriptTwo(StringBuilder scriptTwo) { 62 translate(scriptTwo, scriptTwoToScriptOne); 63 } 64 65 public Pattern getScript1Reg() { 66 return script1Reg; 67 } 68 69 public Pattern getScript2Reg() { 70 return script2Reg; 71 } 72 }