View Javadoc
1   package org.wikimedia.search.extra.analysis.khmer;
2   
3   import static org.hamcrest.CoreMatchers.equalTo;
4   
5   import java.io.IOException;
6   
7   import org.apache.lucene.analysis.Analyzer;
8   import org.apache.lucene.analysis.TokenStream;
9   import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
10  import org.elasticsearch.Version;
11  import org.elasticsearch.cluster.metadata.IndexMetadata;
12  import org.elasticsearch.common.settings.Settings;
13  import org.elasticsearch.env.Environment;
14  import org.elasticsearch.index.IndexSettings;
15  import org.elasticsearch.index.analysis.IndexAnalyzers;
16  import org.elasticsearch.test.ESTestCase;
17  import org.elasticsearch.test.IndexSettingsModule;
18  import org.hamcrest.MatcherAssert;
19  import org.junit.Test;
20  
21  public class KhmerCharFilterESTest extends ESTestCase {
22      private IndexAnalyzers indexAnalyzers;
23  
24      @Test
25      public void testPrebuilt() throws IOException {
26          assertAnalyzerAvailable("khmer_prebuilt", "prebuilt.json");
27      }
28  
29      private void assertAnalyzerAvailable(String analyzerName, String analysisResource) throws IOException {
30          Settings indexSettings = settings(Version.CURRENT)
31                  .loadFromStream(analysisResource, this.getClass().getResourceAsStream(analysisResource), false)
32                  .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
33                  .build();
34          IndexSettings indexProps = IndexSettingsModule.newIndexSettings("test", indexSettings);
35          Settings settings = Settings.builder()
36                  .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
37                  .build();
38          indexAnalyzers = createTestAnalysis(indexProps, settings, new ExtraAnalysisKhmerPlugin()).indexAnalyzers;
39          match(analyzerName, "វិគីីភីឌាភាសាខ្្មែរ សូមសា្វគមន៍!", "វិគីភីឌាភាសាខ្មែរ សូមស្វាគមន៍");
40      }
41  
42      private void match(String analyzerName, String source, String target) throws IOException {
43          Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();
44  
45          TokenStream stream = analyzer.tokenStream("_all", source);
46  
47          stream.reset();
48          CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
49  
50          StringBuilder sb = new StringBuilder();
51          while (stream.incrementToken()) {
52              sb.append(termAtt.toString()).append(" ");
53          }
54  
55          MatcherAssert.assertThat(target, equalTo(sb.toString().trim()));
56      }
57  
58  }