1 package org.wikimedia.search.extra.analysis.turkish; 2 3 import static org.hamcrest.CoreMatchers.equalTo; 4 5 import java.io.IOException; 6 7 import org.apache.lucene.analysis.Analyzer; 8 import org.apache.lucene.analysis.TokenStream; 9 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 10 import org.elasticsearch.Version; 11 import org.elasticsearch.cluster.metadata.IndexMetadata; 12 import org.elasticsearch.common.settings.Settings; 13 import org.elasticsearch.env.Environment; 14 import org.elasticsearch.index.IndexSettings; 15 import org.elasticsearch.index.analysis.IndexAnalyzers; 16 import org.elasticsearch.test.ESTestCase; 17 import org.elasticsearch.test.IndexSettingsModule; 18 import org.hamcrest.MatcherAssert; 19 import org.junit.Test; 20 21 public class BetterApostropheFilterESTest extends ESTestCase { 22 private IndexAnalyzers indexAnalyzers; 23 24 @Test 25 public void testPrebuilt() throws IOException { 26 assertAnalyzerAvailable("turkish_prebuilt", "prebuilt.json"); 27 } 28 29 @Test 30 public void testRedefined() throws IOException { 31 assertAnalyzerAvailable("turkish_redefined", "redefined.json"); 32 } 33 34 private void assertAnalyzerAvailable(String analyzerName, String analysisResource) throws IOException { 35 Settings indexSettings = settings(Version.CURRENT) 36 .loadFromStream(analysisResource, this.getClass().getResourceAsStream(analysisResource), false) 37 .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) 38 .build(); 39 IndexSettings indexProps = IndexSettingsModule.newIndexSettings("test", indexSettings); 40 Settings settings = Settings.builder() 41 .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) 42 .build(); 43 indexAnalyzers = createTestAnalysis(indexProps, settings, new ExtraAnalysisTurkishPlugin()).indexAnalyzers; 44 match(analyzerName, 45 "Vikipedi'nin ana sunucuları ABD'nin Florida Eyaleti'ndeki Tampa kentinde", 46 "Vikipedi ana sunucuları ABD Florida Eyaleti Tampa kentinde"); 47 } 48 49 private void match(String analyzerName, String source, String target) throws IOException { 50 Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer(); 51 52 TokenStream stream = analyzer.tokenStream("_all", source); 53 stream.reset(); 54 CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); 55 56 StringBuilder sb = new StringBuilder(); 57 while (stream.incrementToken()) { 58 sb.append(termAtt.toString()).append(" "); 59 } 60 61 MatcherAssert.assertThat(target, equalTo(sb.toString().trim())); 62 } 63 }