1 package org.wikimedia.search.extra; 2 3 import java.util.Arrays; 4 import java.util.Collection; 5 import java.util.Collections; 6 import java.util.HashMap; 7 import java.util.Map; 8 import java.util.regex.Pattern; 9 10 import org.apache.lucene.analysis.Analyzer; 11 import org.apache.lucene.analysis.CharArraySet; 12 import org.apache.lucene.analysis.LowerCaseFilter; 13 import org.apache.lucene.analysis.TokenStream; 14 import org.apache.lucene.analysis.core.WhitespaceTokenizer; 15 import org.apache.lucene.analysis.el.GreekLowerCaseFilter; 16 import org.apache.lucene.analysis.en.EnglishAnalyzer; 17 import org.apache.lucene.analysis.ga.IrishLowerCaseFilter; 18 import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter; 19 import org.apache.lucene.analysis.ngram.NGramTokenizer; 20 import org.apache.lucene.analysis.pattern.PatternReplaceFilter; 21 import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; 22 import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; 23 import org.elasticsearch.index.analysis.Analysis; 24 import org.elasticsearch.index.analysis.AnalyzerProvider; 25 import org.elasticsearch.index.analysis.TokenFilterFactory; 26 import org.elasticsearch.index.analysis.TokenizerFactory; 27 import org.elasticsearch.indices.analysis.AnalysisModule; 28 import org.elasticsearch.plugins.AnalysisPlugin; 29 import org.elasticsearch.plugins.Plugin; 30 import org.elasticsearch.test.ESIntegTestCase; 31 import org.elasticsearch.test.ESIntegTestCase.ClusterScope; 32 33 @ClusterScope(scope = ESIntegTestCase.Scope.SUITE, transportClientRatio = 0.0) 34 public class AbstractPluginIntegrationTest extends ESIntegTestCase { 35 @Override 36 protected Collection<Class<? extends Plugin>> nodePlugins() { 37 return Collections.<Class<? extends Plugin>>unmodifiableList(Arrays.asList(ExtraCorePlugin.class, MockPlugin.class)); 38 } 39 40 public static class MockPlugin extends Plugin implements AnalysisPlugin { 41 @Override 42 public Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> getTokenFilters() { 43 Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> map = new HashMap<>(); 44 map.put("lowercase", (isettings, env, name, settings) -> new TokenFilterFactory() { 45 @Override 46 public String name() { 47 return name; 48 } 49 50 @Override 51 public TokenStream create(TokenStream tokenStream) { 52 String lang = settings.get("language"); 53 switch (lang) { 54 case "greek": 55 return new GreekLowerCaseFilter(tokenStream); 56 case "irish": 57 return new IrishLowerCaseFilter(tokenStream); 58 case "turkish": 59 return new TurkishLowerCaseFilter(tokenStream); 60 default: 61 return new LowerCaseFilter(tokenStream); 62 } 63 } 64 }); 65 map.put("pattern_replace", (isettings, env, name, settings) -> new TokenFilterFactory() { 66 @Override 67 public String name() { 68 return name; 69 } 70 71 @Override 72 public TokenStream create(TokenStream tokenStream) { 73 Pattern p = Pattern.compile(settings.get("pattern")); 74 String repl = settings.get("replacement"); 75 return new PatternReplaceFilter(tokenStream, p, repl, true); 76 } 77 }); 78 map.put("keyword_repeat", (isettings, env, name, settings) -> new TokenFilterFactory() { 79 @Override 80 public String name() { 81 return name; 82 } 83 84 @Override 85 public TokenStream create(TokenStream tokenStream) { 86 return new KeywordRepeatFilter(tokenStream); 87 } 88 }); 89 return Collections.unmodifiableMap(map); 90 } 91 92 @Override 93 public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() { 94 Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> map = new HashMap<>(); 95 map.put("whitespace", (isettings, env, name, settings) -> TokenizerFactory.newFactory("whitespace", WhitespaceTokenizer::new)); 96 map.put("nGram", (isettings, env, name, settings) -> 97 TokenizerFactory.newFactory("nGram", () -> 98 new NGramTokenizer(settings.getAsInt("min_gram", 3), settings.getAsInt("max_gram", 3)))); 99 100 return Collections.unmodifiableMap(map); 101 } 102 103 @Override 104 public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() { 105 return Collections.singletonMap("english", 106 (isettings, env, name, settings) -> new AbstractIndexAnalyzerProvider<Analyzer>(isettings, name, settings) { 107 @Override 108 public Analyzer get() { 109 return new EnglishAnalyzer( 110 Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet()), 111 Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); 112 } 113 } 114 ); 115 } 116 } 117 }