View Javadoc
1   package org.wikimedia.search.extra.analysis.slovak;
2   
3   import java.io.IOException;
4   
5   import org.apache.lucene.analysis.TokenFilter;
6   import org.apache.lucene.analysis.TokenStream;
7   import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
8   
9   import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
10  
11  /*
12   * Light Stemmer for Slovak.
13   *
14   * Input is expected to be in lowercase, but with diacritical marks
15   */
16  @SuppressFBWarnings(value = "EQ_DOESNT_OVERRIDE_EQUALS", justification = "Standard pattern for token filters.")
17  public final class SlovakStemmerFilter extends TokenFilter {
18  
19      private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
20      private static final SlovakStemmer STEMMER = new SlovakStemmer();
21  
22      public SlovakStemmerFilter(TokenStream input) {
23          super(input);
24      }
25  
26      @Override
27      public boolean incrementToken() throws IOException {
28          if (input.incrementToken()) {
29              int newlen = STEMMER.stem(termAtt.buffer(), termAtt.length());
30              termAtt.setLength(newlen);
31              return true;
32          }
33          return false;
34      }
35  
36  }