View Javadoc
1   package org.wikimedia.search.extra.analysis.filters;
2   
3   import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
4   import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
5   import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits;
6   import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertOrderedSearchHits;
7   import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
8   
9   import java.io.IOException;
10  import java.util.concurrent.ExecutionException;
11  
12  import org.elasticsearch.action.index.IndexRequestBuilder;
13  import org.elasticsearch.action.search.SearchResponse;
14  import org.elasticsearch.common.xcontent.XContentBuilder;
15  import org.elasticsearch.index.query.QueryBuilders;
16  import org.junit.Before;
17  import org.junit.Test;
18  import org.wikimedia.search.extra.AbstractPluginIntegrationTest;
19  
20  public class PreserverOriginalIntegrationTest extends AbstractPluginIntegrationTest {
21      @Before
22      public void init() throws IOException, InterruptedException, ExecutionException {
23          XContentBuilder settings = jsonBuilder()
24                  .startObject()
25                  .field("number_of_shards", 1)
26                  .startObject("analysis")
27                  .startObject("analyzer")
28                  .startObject("preserve")
29                  .field("tokenizer", "whitespace")
30                  .array("filter", "preserve_original_recorder", "lowercase", "preserve_original")
31                  .endObject()
32                  .endObject()
33                  .endObject()
34                  .endObject();
35  
36          XContentBuilder mapping = jsonBuilder()
37                  .startObject()
38                  .startObject("test")
39                  .startObject("properties")
40                  .startObject("test")
41                  .field("type", "text")
42                  .field("analyzer", "preserve")
43                  .field("similarity", "BM25")
44                  .endObject()
45                  .endObject()
46                  .endObject()
47                  .endObject();
48  
49          assertAcked(prepareCreate("test").addMapping("test", mapping).setSettings(settings));
50          ensureGreen();
51          indexRandom(false, doc("all_lower", "hello world"));
52          indexRandom(false, doc("mixed", "Hello World with more text"));
53          refresh();
54      }
55  
56      @Test
57      public void testSimpleMatchPrefersExact() {
58          SearchResponse sr = client().prepareSearch("test")
59                  .setQuery(QueryBuilders.matchQuery("test", "hello"))
60                  .get();
61          assertOrderedSearchHits(sr, "all_lower", "mixed");
62  
63          // Prefers exact over folded
64          sr = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("test", "Hello")).get();
65          assertOrderedSearchHits(sr, "mixed", "all_lower");
66      }
67  
68      public void testTermPositions() {
69          SearchResponse sr = client().prepareSearch("test").setQuery(QueryBuilders.matchPhraseQuery("test", "hello world")).get();
70          assertSearchHits(sr, "all_lower", "mixed");
71  
72          // Just to make sure that positions are kept
73          // We use the plain whitespace so it will only match to original terms.
74          // We can't really test that phrase prefers original terms here, this is
75          // probably because the phrase scorer uses the phrase freq and does not
76          // really care about the term freq.
77          sr = client().prepareSearch("test")
78                  .setQuery(QueryBuilders.matchPhraseQuery("test", "Hello World").analyzer("whitespace"))
79                  .get();
80          assertOrderedSearchHits(sr, "mixed");
81  
82          sr = client().prepareSearch("test")
83                  .setQuery(QueryBuilders.matchPhraseQuery("test", "Hello hello"))
84                  .get();
85          assertNoSearchHits(sr);
86      }
87  
88      private IndexRequestBuilder doc(String id, String fieldValue) {
89          return client().prepareIndex("test", "test", id).setSource("test", fieldValue);
90      }
91  }