View Javadoc
1   package org.wikimedia.search.extra.analysis.filters;
2   
3   import org.apache.lucene.analysis.TokenStream;
4   import org.elasticsearch.common.settings.Settings;
5   import org.elasticsearch.common.settings.SettingsException;
6   import org.elasticsearch.env.Environment;
7   import org.elasticsearch.index.IndexSettings;
8   import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
9   
10  
11  /**
12   * Factories for the term_frequency token filters.
13   */
14  public class TermFreqTokenFilterFactory extends AbstractTokenFilterFactory {
15      private final char splitChar;
16      private final int maxTF;
17  
18      public TermFreqTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
19          super(indexSettings, name, settings);
20          maxTF = settings.getAsInt("max_tf", 1000);
21          if (maxTF <= 0) {
22              throw new SettingsException("[max_tf] must be strictly positive");
23          }
24          String tmp = settings.get("split_char", "|");
25          if (tmp.length() == 1) {
26              splitChar = tmp.charAt(0);
27          } else {
28              throw new SettingsException("[split_char] expects a single char");
29          }
30      }
31  
32      @Override
33      public TokenStream create(TokenStream tokenStream) {
34          return new TermFreqTokenFilter(tokenStream, splitChar, maxTF);
35      }
36  }