View Javadoc
1   package org.wikimedia.search.extra.analysis.filters;
2   
3   import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
4   import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
5   import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertOrderedSearchHits;
6   
7   import java.io.IOException;
8   import java.util.HashMap;
9   import java.util.Map;
10  import java.util.concurrent.ExecutionException;
11  
12  import org.elasticsearch.action.index.IndexRequestBuilder;
13  import org.elasticsearch.action.search.SearchResponse;
14  import org.elasticsearch.common.xcontent.XContentBuilder;
15  import org.elasticsearch.index.query.QueryBuilders;
16  import org.junit.Before;
17  import org.junit.Test;
18  import org.wikimedia.search.extra.AbstractPluginIntegrationTest;
19  
20  public class TermFreqTokenFilterIntegrationTest extends AbstractPluginIntegrationTest {
21      @Before
22      public void init() throws IOException, InterruptedException, ExecutionException {
23          XContentBuilder settings = jsonBuilder()
24                  .startObject()
25                  .field("number_of_shards", 1)
26                  .startObject("analysis")
27                  .startObject("filter")
28                  .startObject("term_freq_test")
29                  .field("type", "term_freq")
30                  .field("split_char", "=")
31                  .field("max_tf", 3)
32                  .endObject()
33                  .endObject()
34                  .startObject("analyzer")
35                  .startObject("term_freq")
36                  .field("tokenizer", "whitespace")
37                  .array("filter", "term_freq")
38                  .endObject()
39                  .startObject("term_freq_test")
40                  .field("tokenizer", "whitespace")
41                  .array("filter", "term_freq_test")
42                  .endObject()
43                  .endObject()
44                  .endObject()
45                  .endObject();
46  
47          XContentBuilder mapping = jsonBuilder()
48                  .startObject()
49                  .startObject("test")
50                  .startObject("properties")
51                  .startObject("test")
52                  .field("type", "text")
53                  .field("analyzer", "term_freq")
54                  .field("index_options", "freqs")
55                  .field("similarity", "BM25")
56                  .endObject()
57                  .startObject("another")
58                  .field("type", "text")
59                  .field("analyzer", "term_freq_test")
60                  .field("index_options", "freqs")
61                  .field("similarity", "BM25")
62                  .endObject()
63                  .endObject()
64                  .endObject()
65                  .endObject();
66  
67          assertAcked(prepareCreate("test").addMapping("test", mapping).setSettings(settings));
68          ensureGreen();
69          indexRandom(false, doc("docA", "Q1|2 Q2|10", new String[]{"Q1=1", "Q2=10"}));
70          indexRandom(false, doc("docB", "Q1|10 Q2|2", new String[]{"Q1=2", "Q2=3"}));
71          refresh();
72      }
73  
74      @Test
75      public void testSimple() {
76          SearchResponse sr = client().prepareSearch("test")
77                  .setQuery(QueryBuilders.matchQuery("test", "Q1"))
78                  .get();
79          assertOrderedSearchHits(sr, "docB", "docA");
80  
81          // make sure that tf has been properly set by using BM25 which will use tf in its ranking formula
82          sr = client().prepareSearch("test")
83                  .setQuery(QueryBuilders.matchQuery("test", "Q2")).get();
84          assertOrderedSearchHits(sr, "docA", "docB");
85  
86          sr = client().prepareSearch("test")
87                  .setQuery(QueryBuilders.boolQuery()
88                          .must(QueryBuilders.matchQuery("another", "Q1"))
89                          .must(QueryBuilders.matchQuery("another", "Q2")))
90                  .get();
91          assertOrderedSearchHits(sr, "docB", "docA");
92      }
93  
94      private IndexRequestBuilder doc(String id, String test, String[] another) {
95          Map<String, Object> doc = new HashMap<>();
96          doc.put("test", test);
97          doc.put("another", another);
98          return client().prepareIndex("test", "test", id).setSource(doc);
99      }
100 }