1 package org.wikimedia.search.extra.levenshtein;
2
3 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
4 import static org.elasticsearch.index.query.QueryBuilders.functionScoreQuery;
5 import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
6 import static org.elasticsearch.index.query.QueryBuilders.termQuery;
7 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
8 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFailures;
9 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertOrderedSearchHits;
10
11 import java.io.IOException;
12
13 import org.elasticsearch.ElasticsearchException;
14 import org.elasticsearch.action.search.SearchResponse;
15 import org.elasticsearch.common.lucene.search.function.CombineFunction;
16 import org.elasticsearch.rest.RestStatus;
17 import org.hamcrest.Matchers;
18 import org.junit.Test;
19 import org.wikimedia.search.extra.AbstractPluginIntegrationTest;
20
21 public class LevenshteinDistanceScoreIntegrationTest extends AbstractPluginIntegrationTest {
22 @Test
23 public void testLevenshteinScore() throws ElasticsearchException, IOException {
24 assertAcked(prepareCreate("test").addMapping(
25 "type1",
26 jsonBuilder().startObject().startObject("type1").startObject("properties")
27 .startObject("content").field("type", "text")
28 .field("store", false)
29 .field("copy_to", "content_stored").endObject()
30 .startObject("content_stored")
31 .field("type", "text").field("store", true).endObject()
32 .endObject().endObject().endObject()).get());
33
34 client().prepareIndex("test", "type1", "1").setSource("content", "Haste makes waste").get();
35 client().prepareIndex("test", "type1", "2").setSource("content", "A stitch in time saves nine").get();
36 client().prepareIndex("test", "type1", "3").setSource("content", "Ignorance is bliss").get();
37 client().prepareIndex("test", "type1", "4").setSource("content", "Paste makes waste").get();
38 client().prepareIndex("test", "type1", "5").setSource("content", "A stitch in time saves nine essay").get();
39 client().prepareIndex("test", "type1", "6").setSource("content", "Ignorance is strength").get();
40 client().prepareIndex("test", "type1", "7").setSource().get();
41
42 refresh();
43
44
45 assertions("content");
46
47 assertions("content_stored");
48
49 assertFailures(client().prepareSearch("test").setExplain(randomBoolean())
50 .setQuery(functionScoreQuery(termQuery("content", "ignorance"),
51 new LevenshteinDistanceScoreBuilder("blah", "Ignorance is strength"))
52 .boostMode(CombineFunction.REPLACE)), RestStatus.BAD_REQUEST,
53 Matchers.containsString("Unable to load field type for field blah"));
54
55 assertFailures(client().prepareSearch("test").setExplain(randomBoolean())
56 .setQuery(functionScoreQuery(matchAllQuery(),
57 new LevenshteinDistanceScoreBuilder("content", "Ignorance is strength"))
58 .boostMode(CombineFunction.REPLACE)), RestStatus.INTERNAL_SERVER_ERROR,
59 Matchers.containsString("content is null"));
60
61 assertOrderedSearchHits(client().prepareSearch("test").setExplain(randomBoolean())
62 .setQuery(functionScoreQuery(matchAllQuery(),
63 new LevenshteinDistanceScoreBuilder("content", "Ignorance is strength")
64 .missing(""))
65 .boostMode(CombineFunction.REPLACE)).setSize(2).get(), new String[]{"6", "3"});
66 }
67
68 private void assertions(String field) {
69 SearchResponse response = client().prepareSearch("test").setExplain(randomBoolean())
70 .setQuery(functionScoreQuery(termQuery("content", "makes"),
71 new LevenshteinDistanceScoreBuilder(field, "Haste makes waste"))
72 .boostMode(CombineFunction.REPLACE)).get();
73 assertOrderedSearchHits(response, "1", "4");
74
75 response = client().prepareSearch("test").setExplain(randomBoolean())
76 .setQuery(functionScoreQuery(termQuery("content", "makes"),
77 new LevenshteinDistanceScoreBuilder(field, "Paste makes waste"))
78 .boostMode(CombineFunction.REPLACE)).get();
79 assertOrderedSearchHits(response, "4", "1");
80
81 response = client().prepareSearch("test").setExplain(randomBoolean())
82 .setQuery(functionScoreQuery(termQuery("content", "stitch"),
83 new LevenshteinDistanceScoreBuilder(field, "A stitch in time saves nine"))
84 .boostMode(CombineFunction.REPLACE)).get();
85 assertOrderedSearchHits(response, "2", "5");
86
87 response = client().prepareSearch("test").setExplain(randomBoolean())
88 .setQuery(functionScoreQuery(termQuery("content", "stitch"),
89 new LevenshteinDistanceScoreBuilder(field, "A stitch in time saves nine essay"))
90 .boostMode(CombineFunction.REPLACE)).get();
91 assertOrderedSearchHits(response, "5", "2");
92
93 response = client().prepareSearch("test").setExplain(randomBoolean())
94 .setQuery(functionScoreQuery(termQuery("content", "ignorance"),
95 new LevenshteinDistanceScoreBuilder(field, "Ignorance is bliss"))
96 .boostMode(CombineFunction.REPLACE)).get();
97 assertOrderedSearchHits(response, "3", "6");
98
99 response = client().prepareSearch("test").setExplain(randomBoolean())
100 .setQuery(functionScoreQuery(termQuery("content", "ignorance"),
101 new LevenshteinDistanceScoreBuilder(field, "Ignorance is strength"))
102 .boostMode(CombineFunction.REPLACE)).get();
103 assertOrderedSearchHits(response, "6", "3");
104 }
105 }