1 package org.wikimedia.search.extra.analysis.filters;
2
3 import java.io.IOException;
4 import java.util.ArrayList;
5 import java.util.Iterator;
6 import java.util.List;
7
8 import org.apache.lucene.analysis.Analyzer;
9 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
10 import org.apache.lucene.analysis.TokenStream;
11 import org.apache.lucene.analysis.Tokenizer;
12 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
13 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
14 import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
15 import org.elasticsearch.common.collect.Tuple;
16
17 public class TermFreqTokenFilterTest extends BaseTokenStreamTestCase {
18 public void testSimple() throws IOException {
19
20
21 String input = " Q|1 Q2|2 Q3 Q4| Q4|A Q5|0 Q10|10000000";
22 try (Analyzer analyzer = newAnalyzer()) {
23 TokenStream ts = analyzer.tokenStream("", input);
24 assertTokenStreamContents(ts,
25 new String[]{"Q", "Q2", "Q3", "Q4|", "Q4|A", "Q5", "Q10"},
26 new int[]{1, 5, 10, 13, 17, 22, 27, 27, 27},
27 new int[]{4, 9, 12, 16, 21, 26, 39, 39, 39},
28 null,
29 new int[]{1, 1, 1, 1, 1, 1, 1},
30 null,
31 39,
32 null,
33 true);
34
35 }
36 }
37
38 public void testAttr() throws IOException {
39 String input = " Q|1 Q2|2 Q3 Q4| Q4|A Q5|0 Q10|10000000";
40 List<Tuple<String, Integer>> expects = new ArrayList<>();
41 expects.add(new Tuple<>("Q", 1));
42 expects.add(new Tuple<>("Q2", 2));
43 expects.add(new Tuple<>("Q3", 1));
44 expects.add(new Tuple<>("Q4|", 1));
45 expects.add(new Tuple<>("Q4|A", 1));
46 expects.add(new Tuple<>("Q5", 1));
47 expects.add(new Tuple<>("Q10", 3));
48 try (Analyzer analyzer = newAnalyzer()) {
49 TokenStream ts = analyzer.tokenStream("", input);
50 CharTermAttribute cattr = ts.getAttribute(CharTermAttribute.class);
51 TermFrequencyAttribute fattr = ts.getAttribute(TermFrequencyAttribute.class);
52 Iterator<Tuple<String, Integer>> ite = expects.iterator();
53 ts.reset();
54 while (ite.hasNext()) {
55 assertTrue(ts.incrementToken());
56 Tuple<String, Integer> tuple = ite.next();
57 assertEquals(tuple.v1(), cattr.toString());
58 assertEquals((int) tuple.v2(), fattr.getTermFrequency());
59 }
60 assertFalse(ts.incrementToken());
61 }
62 }
63
64 private Analyzer newAnalyzer() {
65 return new Analyzer() {
66 @Override
67 protected TokenStreamComponents createComponents(String fieldName) {
68 Tokenizer tok = new WhitespaceTokenizer();
69 TokenStream ts = new TermFreqTokenFilter(tok, '|', 3);
70 return new TokenStreamComponents(tok, ts);
71 }
72 };
73 }
74 }