View Javadoc
1   package org.wikimedia.search.extra.analysis.textify;
2   
3   import static org.elasticsearch.plugins.AnalysisPlugin.requiresAnalysisSettings;
4   import static java.util.Collections.singletonList;
5   import static java.util.Collections.singletonMap;
6   
7   import java.util.Arrays;
8   import java.util.List;
9   import java.util.Map;
10  
11  import org.elasticsearch.index.analysis.CharFilterFactory;
12  import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
13  import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
14  import org.elasticsearch.index.analysis.TokenFilterFactory;
15  import org.elasticsearch.index.analysis.TokenizerFactory;
16  import org.elasticsearch.index.analysis.IcuTokenizerFactory;
17  import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
18  import org.elasticsearch.plugins.AnalysisPlugin;
19  import org.elasticsearch.plugins.Plugin;
20  
21  /**
22   * Setup the Elasticsearch plugin.
23   */
24  public class ExtraAnalysisTextifyPlugin extends Plugin implements AnalysisPlugin {
25  
26      @Override
27      public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
28          return singletonMap("limited_mapping",
29              requiresAnalysisSettings(LimitedMappingCharFilterFactory::new));
30      }
31  
32      @Override
33      public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
34          return Arrays.asList(
35              PreConfiguredCharFilter.singleton("acronym_fixer", true, AcronymFixerCharFilter::new),
36              PreConfiguredCharFilter.singleton("camelCase_splitter", true, CamelCaseCharFilter::new)
37          );
38      }
39  
40      // Create a local copy of icu_tokenizer so icu_token_repair can access the ScriptAttribute
41      @Override
42      public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
43          return singletonMap("textify_icu_tokenizer", IcuTokenizerFactory::new);
44      }
45  
46      @Override
47      public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
48          return singletonMap("icu_token_repair", requiresAnalysisSettings(ICUTokenRepairFilterFactory::new));
49      }
50  
51      @Override
52      public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
53          return singletonList(PreConfiguredTokenFilter.singleton("icu_token_repair", true,
54              ICUTokenRepairFilter::new));
55      }
56  }