View Javadoc
1   package org.wikimedia.search.extra.analysis.turkish;
2   
3   import static org.hamcrest.CoreMatchers.equalTo;
4   
5   import java.io.IOException;
6   
7   import org.apache.lucene.analysis.Analyzer;
8   import org.apache.lucene.analysis.TokenStream;
9   import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
10  import org.elasticsearch.Version;
11  import org.elasticsearch.cluster.metadata.IndexMetadata;
12  import org.elasticsearch.common.settings.Settings;
13  import org.elasticsearch.env.Environment;
14  import org.elasticsearch.index.IndexSettings;
15  import org.elasticsearch.index.analysis.IndexAnalyzers;
16  import org.elasticsearch.test.ESTestCase;
17  import org.elasticsearch.test.IndexSettingsModule;
18  import org.hamcrest.MatcherAssert;
19  import org.junit.Test;
20  
21  public class BetterApostropheFilterESTest extends ESTestCase {
22      private IndexAnalyzers indexAnalyzers;
23  
24      @Test
25      public void testPrebuilt() throws IOException {
26          assertAnalyzerAvailable("turkish_prebuilt", "prebuilt.json");
27      }
28  
29      @Test
30      public void testRedefined() throws IOException {
31          assertAnalyzerAvailable("turkish_redefined", "redefined.json");
32      }
33  
34      private void assertAnalyzerAvailable(String analyzerName, String analysisResource) throws IOException {
35          Settings indexSettings = settings(Version.CURRENT)
36                  .loadFromStream(analysisResource, this.getClass().getResourceAsStream(analysisResource), false)
37                  .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
38                  .build();
39          IndexSettings indexProps = IndexSettingsModule.newIndexSettings("test", indexSettings);
40          Settings settings = Settings.builder()
41                  .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
42                  .build();
43          indexAnalyzers = createTestAnalysis(indexProps, settings, new ExtraAnalysisTurkishPlugin()).indexAnalyzers;
44          match(analyzerName,
45              "Vikipedi'nin ana sunucuları ABD'nin Florida Eyaleti'ndeki Tampa kentinde",
46              "Vikipedi ana sunucuları ABD Florida Eyaleti Tampa kentinde");
47      }
48  
49      private void match(String analyzerName, String source, String target) throws IOException {
50          Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();
51  
52          TokenStream stream = analyzer.tokenStream("_all", source);
53          stream.reset();
54          CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
55  
56          StringBuilder sb = new StringBuilder();
57          while (stream.incrementToken()) {
58              sb.append(termAtt.toString()).append(" ");
59          }
60  
61          MatcherAssert.assertThat(target, equalTo(sb.toString().trim()));
62      }
63  }