1 package org.wikimedia.search.extra.analysis.filters;
2
3 import java.io.IOException;
4
5 import javax.annotation.Nullable;
6
7 import org.apache.lucene.analysis.TokenFilter;
8 import org.apache.lucene.analysis.TokenStream;
9 import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter;
10 import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter;
11 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
12 import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
13 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
14 import org.apache.lucene.analysis.util.TokenFilterFactory;
15
16 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
17
18
19
20
21
22
23
24
25
26 @SuppressFBWarnings(
27 value = "EQ_DOESNT_OVERRIDE_EQUALS",
28 justification = "equals() as defined in org.apache.lucene.util.AttributeSource seems strong enough.")
29 public class PreserveOriginalFilter extends TokenFilter {
30 private final CharTermAttribute cattr;
31 private final PositionIncrementAttribute posIncr;
32 private final OriginalTermAttribute original;
33 @Nullable private State preserve;
34
35
36
37
38
39
40
41 public PreserveOriginalFilter(TokenStream input) {
42 super(input);
43 cattr = getAttribute(CharTermAttribute.class);
44 posIncr = addAttribute(PositionIncrementAttribute.class);
45 original = getAttribute(OriginalTermAttribute.class);
46 if (original == null) {
47 throw new IllegalArgumentException("PreserveOriginalFilter must be used with a PreserveOriginalFilter.Recorder fitler in the same analysis chain.");
48 }
49 }
50
51
52
53
54
55
56
57 public PreserveOriginalFilter(TokenStream input, TokenFilterFactory wrapped) {
58 this(wrapped.create(new Recorder(input)));
59 }
60
61 @Override
62 public final boolean incrementToken() throws IOException {
63 if (preserve != null) {
64 restoreState(preserve);
65 cattr.copyBuffer(original.buffer(), 0, original.length());
66 posIncr.setPositionIncrement(0);
67 preserve = null;
68 return true;
69 }
70
71 if (input.incrementToken()) {
72 if (!original.equals(cattr)) {
73 preserve = captureState();
74 }
75 return true;
76 } else {
77 return false;
78 }
79 }
80
81
82
83
84 public static class Recorder extends TokenFilter {
85 private final OriginalTermAttribute original = this.addAttribute(OriginalTermAttribute.class);
86 private final CharTermAttribute cattr = this.addAttribute(CharTermAttribute.class);
87 public Recorder(TokenStream input) {
88 super(input);
89 }
90
91
92
93
94 @Override
95 public final boolean incrementToken() throws IOException {
96 if (input.incrementToken()) {
97 original.copyBuffer(cattr.buffer(), 0, cattr.length());
98 return true;
99 }
100 return false;
101 }
102 }
103
104
105
106
107
108
109 public interface OriginalTermAttribute extends CharTermAttribute {}
110
111
112
113
114
115
116
117
118
119 public static class OriginalTermAttributeImpl extends CharTermAttributeImpl implements OriginalTermAttribute {}
120 }