1 package org.wikimedia.search.extra.analysis.textify;
2
3 import static org.elasticsearch.plugins.AnalysisPlugin.requiresAnalysisSettings;
4 import static java.util.Collections.singletonList;
5 import static java.util.Collections.singletonMap;
6
7 import java.util.Arrays;
8 import java.util.List;
9 import java.util.Map;
10
11 import org.elasticsearch.index.analysis.CharFilterFactory;
12 import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
13 import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
14 import org.elasticsearch.index.analysis.TokenFilterFactory;
15 import org.elasticsearch.index.analysis.TokenizerFactory;
16 import org.elasticsearch.index.analysis.IcuTokenizerFactory;
17 import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
18 import org.elasticsearch.plugins.AnalysisPlugin;
19 import org.elasticsearch.plugins.Plugin;
20
21
22
23
24 public class ExtraAnalysisTextifyPlugin extends Plugin implements AnalysisPlugin {
25
26 @Override
27 public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
28 return singletonMap("limited_mapping",
29 requiresAnalysisSettings(LimitedMappingCharFilterFactory::new));
30 }
31
32 @Override
33 public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
34 return Arrays.asList(
35 PreConfiguredCharFilter.singleton("acronym_fixer", true, AcronymFixerCharFilter::new),
36 PreConfiguredCharFilter.singleton("camelCase_splitter", true, CamelCaseCharFilter::new)
37 );
38 }
39
40
41 @Override
42 public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
43 return singletonMap("textify_icu_tokenizer", IcuTokenizerFactory::new);
44 }
45
46 @Override
47 public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
48 return singletonMap("icu_token_repair", requiresAnalysisSettings(ICUTokenRepairFilterFactory::new));
49 }
50
51 @Override
52 public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
53 return singletonList(PreConfiguredTokenFilter.singleton("icu_token_repair", true,
54 ICUTokenRepairFilter::new));
55 }
56 }