View Javadoc
1   package org.wikimedia.search.extra.levenshtein;
2   
3   import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
4   import static org.elasticsearch.index.query.QueryBuilders.functionScoreQuery;
5   import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
6   import static org.elasticsearch.index.query.QueryBuilders.termQuery;
7   import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
8   import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFailures;
9   import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertOrderedSearchHits;
10  
11  import java.io.IOException;
12  
13  import org.elasticsearch.ElasticsearchException;
14  import org.elasticsearch.action.search.SearchResponse;
15  import org.elasticsearch.common.lucene.search.function.CombineFunction;
16  import org.elasticsearch.rest.RestStatus;
17  import org.hamcrest.Matchers;
18  import org.junit.Test;
19  import org.wikimedia.search.extra.AbstractPluginIntegrationTest;
20  
21  public class LevenshteinDistanceScoreIntegrationTest extends AbstractPluginIntegrationTest {
22      @Test
23      public void testLevenshteinScore() throws ElasticsearchException, IOException {
24          assertAcked(prepareCreate("test").addMapping(
25                  "type1",
26                  jsonBuilder().startObject().startObject("type1").startObject("properties")
27                          .startObject("content").field("type", "text")
28                              .field("store", false)
29                              .field("copy_to", "content_stored").endObject()
30                          .startObject("content_stored")
31                              .field("type", "text").field("store", true).endObject()
32                          .endObject().endObject().endObject()).get());
33  
34          client().prepareIndex("test", "type1", "1").setSource("content", "Haste makes waste").get();
35          client().prepareIndex("test", "type1", "2").setSource("content", "A stitch in time saves nine").get();
36          client().prepareIndex("test", "type1", "3").setSource("content", "Ignorance is bliss").get();
37          client().prepareIndex("test", "type1", "4").setSource("content", "Paste makes waste").get();
38          client().prepareIndex("test", "type1", "5").setSource("content", "A stitch in time saves nine essay").get();
39          client().prepareIndex("test", "type1", "6").setSource("content", "Ignorance is strength").get();
40          client().prepareIndex("test", "type1", "7").setSource().get();
41  
42          refresh();
43  
44          // test with data loaded from _source
45          assertions("content");
46          // Test with data loaded from stored values
47          assertions("content_stored");
48  
49          assertFailures(client().prepareSearch("test").setExplain(randomBoolean())
50                  .setQuery(functionScoreQuery(termQuery("content", "ignorance"),
51                              new LevenshteinDistanceScoreBuilder("blah", "Ignorance is strength"))
52                          .boostMode(CombineFunction.REPLACE)), RestStatus.BAD_REQUEST,
53                          Matchers.containsString("Unable to load field type for field blah"));
54  
55          assertFailures(client().prepareSearch("test").setExplain(randomBoolean())
56                  .setQuery(functionScoreQuery(matchAllQuery(),
57                              new LevenshteinDistanceScoreBuilder("content", "Ignorance is strength"))
58                          .boostMode(CombineFunction.REPLACE)), RestStatus.INTERNAL_SERVER_ERROR,
59                          Matchers.containsString("content is null"));
60  
61          assertOrderedSearchHits(client().prepareSearch("test").setExplain(randomBoolean())
62                  .setQuery(functionScoreQuery(matchAllQuery(),
63                              new LevenshteinDistanceScoreBuilder("content", "Ignorance is strength")
64                              .missing(""))
65                          .boostMode(CombineFunction.REPLACE)).setSize(2).get(), new String[]{"6", "3"});
66      }
67  
68      private void assertions(String field) {
69          SearchResponse response = client().prepareSearch("test").setExplain(randomBoolean())
70                  .setQuery(functionScoreQuery(termQuery("content", "makes"),
71                          new LevenshteinDistanceScoreBuilder(field, "Haste makes waste"))
72                          .boostMode(CombineFunction.REPLACE)).get();
73          assertOrderedSearchHits(response, "1", "4");
74  
75          response = client().prepareSearch("test").setExplain(randomBoolean())
76                  .setQuery(functionScoreQuery(termQuery("content", "makes"),
77                          new LevenshteinDistanceScoreBuilder(field, "Paste makes waste"))
78                          .boostMode(CombineFunction.REPLACE)).get();
79          assertOrderedSearchHits(response, "4", "1");
80  
81          response = client().prepareSearch("test").setExplain(randomBoolean())
82                  .setQuery(functionScoreQuery(termQuery("content", "stitch"),
83                          new LevenshteinDistanceScoreBuilder(field, "A stitch in time saves nine"))
84                          .boostMode(CombineFunction.REPLACE)).get();
85          assertOrderedSearchHits(response, "2", "5");
86  
87          response = client().prepareSearch("test").setExplain(randomBoolean())
88                  .setQuery(functionScoreQuery(termQuery("content", "stitch"),
89                          new LevenshteinDistanceScoreBuilder(field, "A stitch in time saves nine essay"))
90                          .boostMode(CombineFunction.REPLACE)).get();
91          assertOrderedSearchHits(response, "5", "2");
92  
93          response = client().prepareSearch("test").setExplain(randomBoolean())
94                  .setQuery(functionScoreQuery(termQuery("content", "ignorance"),
95                          new LevenshteinDistanceScoreBuilder(field, "Ignorance is bliss"))
96                          .boostMode(CombineFunction.REPLACE)).get();
97          assertOrderedSearchHits(response, "3", "6");
98  
99          response = client().prepareSearch("test").setExplain(randomBoolean())
100                 .setQuery(functionScoreQuery(termQuery("content", "ignorance"),
101                         new LevenshteinDistanceScoreBuilder(field, "Ignorance is strength"))
102                         .boostMode(CombineFunction.REPLACE)).get();
103         assertOrderedSearchHits(response, "6", "3");
104     }
105 }