View Javadoc
1   package org.wikimedia.search.extra;
2   
3   import java.util.Arrays;
4   import java.util.Collection;
5   import java.util.Collections;
6   import java.util.HashMap;
7   import java.util.Map;
8   import java.util.regex.Pattern;
9   
10  import org.apache.lucene.analysis.Analyzer;
11  import org.apache.lucene.analysis.CharArraySet;
12  import org.apache.lucene.analysis.LowerCaseFilter;
13  import org.apache.lucene.analysis.TokenStream;
14  import org.apache.lucene.analysis.core.WhitespaceTokenizer;
15  import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
16  import org.apache.lucene.analysis.en.EnglishAnalyzer;
17  import org.apache.lucene.analysis.ga.IrishLowerCaseFilter;
18  import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter;
19  import org.apache.lucene.analysis.ngram.NGramTokenizer;
20  import org.apache.lucene.analysis.pattern.PatternReplaceFilter;
21  import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
22  import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
23  import org.elasticsearch.index.analysis.Analysis;
24  import org.elasticsearch.index.analysis.AnalyzerProvider;
25  import org.elasticsearch.index.analysis.TokenFilterFactory;
26  import org.elasticsearch.index.analysis.TokenizerFactory;
27  import org.elasticsearch.indices.analysis.AnalysisModule;
28  import org.elasticsearch.plugins.AnalysisPlugin;
29  import org.elasticsearch.plugins.Plugin;
30  import org.elasticsearch.test.ESIntegTestCase;
31  import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
32  
33  @ClusterScope(scope = ESIntegTestCase.Scope.SUITE, transportClientRatio = 0.0)
34  public class AbstractPluginIntegrationTest extends ESIntegTestCase {
35      @Override
36      protected Collection<Class<? extends Plugin>> nodePlugins() {
37          return Collections.<Class<? extends Plugin>>unmodifiableList(Arrays.asList(ExtraCorePlugin.class, MockPlugin.class));
38      }
39  
40      public static class MockPlugin extends Plugin implements AnalysisPlugin {
41          @Override
42          public Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
43              Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> map = new HashMap<>();
44              map.put("lowercase", (isettings, env, name, settings) -> new TokenFilterFactory() {
45                  @Override
46                  public String name() {
47                      return name;
48                  }
49  
50                  @Override
51                  public TokenStream create(TokenStream tokenStream) {
52                      String lang = settings.get("language");
53                      switch (lang) {
54                          case "greek":
55                              return new GreekLowerCaseFilter(tokenStream);
56                          case "irish":
57                              return new IrishLowerCaseFilter(tokenStream);
58                          case "turkish":
59                              return new TurkishLowerCaseFilter(tokenStream);
60                          default:
61                              return new LowerCaseFilter(tokenStream);
62                      }
63                  }
64              });
65              map.put("pattern_replace", (isettings, env, name, settings) -> new TokenFilterFactory() {
66                  @Override
67                  public String name() {
68                      return name;
69                  }
70  
71                  @Override
72                  public TokenStream create(TokenStream tokenStream) {
73                      Pattern p = Pattern.compile(settings.get("pattern"));
74                      String repl = settings.get("replacement");
75                      return new PatternReplaceFilter(tokenStream, p, repl, true);
76                  }
77              });
78              map.put("keyword_repeat", (isettings, env, name, settings) -> new TokenFilterFactory() {
79                  @Override
80                  public String name() {
81                      return name;
82                  }
83  
84                  @Override
85                  public TokenStream create(TokenStream tokenStream) {
86                      return new KeywordRepeatFilter(tokenStream);
87                  }
88              });
89              return Collections.unmodifiableMap(map);
90          }
91  
92          @Override
93          public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
94              Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> map = new HashMap<>();
95              map.put("whitespace", (isettings, env, name, settings) -> TokenizerFactory.newFactory("whitespace", WhitespaceTokenizer::new));
96              map.put("nGram", (isettings, env, name, settings) ->
97                      TokenizerFactory.newFactory("nGram", () ->
98                              new NGramTokenizer(settings.getAsInt("min_gram", 3), settings.getAsInt("max_gram", 3))));
99  
100             return Collections.unmodifiableMap(map);
101         }
102 
103         @Override
104         public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
105             return Collections.singletonMap("english",
106                 (isettings, env, name, settings) -> new AbstractIndexAnalyzerProvider<Analyzer>(isettings, name, settings) {
107                     @Override
108                     public Analyzer get() {
109                         return new EnglishAnalyzer(
110                             Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet()),
111                             Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
112                     }
113                 }
114             );
115         }
116     }
117 }