View Javadoc
1   package org.wikimedia.search.extra.analysis.homoglyph;
2   
3   import static java.util.Arrays.asList;
4   import static java.util.Collections.unmodifiableList;
5   import static java.util.regex.Pattern.compile;
6   import static org.wikimedia.search.extra.analysis.homoglyph.GlyphPair.gp;
7   
8   import java.util.List;
9   import java.util.regex.Pattern;
10  
11  public final class TranslationTableDictionaries {
12      public static final List<GlyphPair> LATIN_TO_CYRILLIC = unmodifiableList(asList(
13              gp("a", "а"),
14              gp("A", "А"),
15              gp("ă", "ӑ"),
16              gp("Ă", "Ӑ"),
17              gp("ä", "ӓ"),
18              gp("Ä", "Ӓ"),
19              gp("æ", "ӕ"),
20              gp("Æ", "Ӕ"),
21              gp("B", "В"),
22              gp("c", "с"),
23              gp("C", "С"),
24              gp("ç", "ҫ"),
25              gp("Ç", "Ҫ"),
26              gp("e", "е"),
27              gp("E", "Е"),
28              gp("è", "ѐ"),
29              gp("È", "Ѐ"),
30              gp("ë", "ё"),
31              gp("Ë", "Ё"),
32              gp("ĕ", "ӗ"),
33              gp("Ĕ", "Ӗ"),
34              gp("ə", "ә"),
35              gp("Ə", "Ә"),
36              gp("H", "Н"),
37              gp("i", "і"),
38              gp("I", "І"),
39              gp("ï", "ї"),
40              gp("Ï", "Ї"),
41              gp("j", "ј"),
42              gp("J", "Ј"),
43              gp("k", "к"),
44              gp("K", "К"),
45              gp("M", "М"),
46              gp("o", "о"),
47              gp("O", "О"),
48              gp("ö", "ӧ"),
49              gp("Ö", "Ӧ"),
50              gp("p", "р"),
51              gp("P", "Р"),
52              gp("Q", "Ԛ"),
53              gp("s", "ѕ"),
54              gp("S", "Ѕ"),
55              gp("T", "Т"),
56              gp("W", "Ԝ"),
57              gp("x", "х"),
58              gp("X", "Х"),
59              gp("y", "у"),
60              gp("Y", "У"),
61              gp("ȳ", "ӯ"),
62              gp("ÿ", "ӱ"),
63              gp("á", "а́"),
64              gp("é", "е́"),
65              gp("í", "і́"),
66              gp("ó", "о́"),
67              gp("ý", "у́"),
68              gp("ħ", "ћ"),
69              gp("ɜ", "з")
70      ));
71  
72      public static final Pattern LATIN_REG = compile("\\p{IsLatin}");
73      public static final Pattern CYR_REG = compile("\\p{IsCyrillic}");
74  
75      private TranslationTableDictionaries() {
76      }
77  }