View Javadoc
1   package org.wikimedia.search.extra.analysis.turkish;
2   
3   import java.io.IOException;
4   
5   import org.apache.lucene.analysis.TokenFilter;
6   import org.apache.lucene.analysis.TokenStream;
7   import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
8   
9   import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
10  
11  /*
12   * Better Apostrophe handling for Turkish.
13   *
14   * Input is expected to be in lowercase, but with diacritical marks
15   */
16  @SuppressFBWarnings(value = "EQ_DOESNT_OVERRIDE_EQUALS", justification = "Standard pattern for token filters.")
17  public final class BetterApostropheFilter extends TokenFilter {
18  
19      private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
20      private static final BetterApostrophe APOSTROPHE = new BetterApostrophe();
21  
22      public BetterApostropheFilter(TokenStream input) {
23          super(input);
24      }
25  
26      @Override
27      public boolean incrementToken() throws IOException {
28          if (input.incrementToken()) {
29              CharSequence converted = APOSTROPHE.apos(termAtt);
30              if (converted != termAtt) {
31                  termAtt.setEmpty().append(converted);
32              }
33              return true;
34          }
35          return false;
36      }
37  
38  }