1 package org.wikimedia.search.extra.analysis.khmer; 2 3 import static org.hamcrest.CoreMatchers.equalTo; 4 5 import java.io.IOException; 6 7 import org.apache.lucene.analysis.Analyzer; 8 import org.apache.lucene.analysis.TokenStream; 9 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 10 import org.elasticsearch.Version; 11 import org.elasticsearch.cluster.metadata.IndexMetadata; 12 import org.elasticsearch.common.settings.Settings; 13 import org.elasticsearch.env.Environment; 14 import org.elasticsearch.index.IndexSettings; 15 import org.elasticsearch.index.analysis.IndexAnalyzers; 16 import org.elasticsearch.test.ESTestCase; 17 import org.elasticsearch.test.IndexSettingsModule; 18 import org.hamcrest.MatcherAssert; 19 import org.junit.Test; 20 21 public class KhmerCharFilterESTest extends ESTestCase { 22 private IndexAnalyzers indexAnalyzers; 23 24 @Test 25 public void testPrebuilt() throws IOException { 26 assertAnalyzerAvailable("khmer_prebuilt", "prebuilt.json"); 27 } 28 29 private void assertAnalyzerAvailable(String analyzerName, String analysisResource) throws IOException { 30 Settings indexSettings = settings(Version.CURRENT) 31 .loadFromStream(analysisResource, this.getClass().getResourceAsStream(analysisResource), false) 32 .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) 33 .build(); 34 IndexSettings indexProps = IndexSettingsModule.newIndexSettings("test", indexSettings); 35 Settings settings = Settings.builder() 36 .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) 37 .build(); 38 indexAnalyzers = createTestAnalysis(indexProps, settings, new ExtraAnalysisKhmerPlugin()).indexAnalyzers; 39 match(analyzerName, "វិគីីភីឌាភាសាខ្្មែរ សូមសា្វគមន៍!", "វិគីភីឌាភាសាខ្មែរ សូមស្វាគមន៍"); 40 } 41 42 private void match(String analyzerName, String source, String target) throws IOException { 43 Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer(); 44 45 TokenStream stream = analyzer.tokenStream("_all", source); 46 47 stream.reset(); 48 CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); 49 50 StringBuilder sb = new StringBuilder(); 51 while (stream.incrementToken()) { 52 sb.append(termAtt.toString()).append(" "); 53 } 54 55 MatcherAssert.assertThat(target, equalTo(sb.toString().trim())); 56 } 57 58 }