1 package org.wikimedia.search.extra.analysis.homoglyph; 2 3 import static java.util.Arrays.asList; 4 import static java.util.Collections.unmodifiableList; 5 import static java.util.regex.Pattern.compile; 6 import static org.wikimedia.search.extra.analysis.homoglyph.GlyphPair.gp; 7 8 import java.util.List; 9 import java.util.regex.Pattern; 10 11 public final class TranslationTableDictionaries { 12 public static final List<GlyphPair> LATIN_TO_CYRILLIC = unmodifiableList(asList( 13 gp("a", "а"), 14 gp("A", "А"), 15 gp("ă", "ӑ"), 16 gp("Ă", "Ӑ"), 17 gp("ä", "ӓ"), 18 gp("Ä", "Ӓ"), 19 gp("æ", "ӕ"), 20 gp("Æ", "Ӕ"), 21 gp("B", "В"), 22 gp("c", "с"), 23 gp("C", "С"), 24 gp("ç", "ҫ"), 25 gp("Ç", "Ҫ"), 26 gp("e", "е"), 27 gp("E", "Е"), 28 gp("è", "ѐ"), 29 gp("È", "Ѐ"), 30 gp("ë", "ё"), 31 gp("Ë", "Ё"), 32 gp("ĕ", "ӗ"), 33 gp("Ĕ", "Ӗ"), 34 gp("ə", "ә"), 35 gp("Ə", "Ә"), 36 gp("H", "Н"), 37 gp("i", "і"), 38 gp("I", "І"), 39 gp("ï", "ї"), 40 gp("Ï", "Ї"), 41 gp("j", "ј"), 42 gp("J", "Ј"), 43 gp("k", "к"), 44 gp("K", "К"), 45 gp("M", "М"), 46 gp("o", "о"), 47 gp("O", "О"), 48 gp("ö", "ӧ"), 49 gp("Ö", "Ӧ"), 50 gp("p", "р"), 51 gp("P", "Р"), 52 gp("Q", "Ԛ"), 53 gp("s", "ѕ"), 54 gp("S", "Ѕ"), 55 gp("T", "Т"), 56 gp("W", "Ԝ"), 57 gp("x", "х"), 58 gp("X", "Х"), 59 gp("y", "у"), 60 gp("Y", "У"), 61 gp("ȳ", "ӯ"), 62 gp("ÿ", "ӱ"), 63 gp("á", "а́"), 64 gp("é", "е́"), 65 gp("í", "і́"), 66 gp("ó", "о́"), 67 gp("ý", "у́"), 68 gp("ħ", "ћ"), 69 gp("ɜ", "з") 70 )); 71 72 public static final Pattern LATIN_REG = compile("\\p{IsLatin}"); 73 public static final Pattern CYR_REG = compile("\\p{IsCyrillic}"); 74 75 private TranslationTableDictionaries() { 76 } 77 }