1 package org.wikimedia.search.extra.analysis.textify; 2 3 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; 4 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; 5 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits; 6 7 import java.io.IOException; 8 import java.util.Arrays; 9 import java.util.Collection; 10 import java.util.Collections; 11 import java.util.HashMap; 12 import java.util.Map; 13 import java.util.concurrent.ExecutionException; 14 15 import org.apache.lucene.analysis.core.WhitespaceTokenizer; 16 import org.elasticsearch.action.index.IndexRequestBuilder; 17 import org.elasticsearch.action.search.SearchResponse; 18 import org.elasticsearch.common.xcontent.XContentBuilder; 19 import org.elasticsearch.index.analysis.TokenizerFactory; 20 import org.elasticsearch.index.query.QueryBuilders; 21 import org.elasticsearch.indices.analysis.AnalysisModule; 22 import org.elasticsearch.plugins.AnalysisPlugin; 23 import org.elasticsearch.plugins.Plugin; 24 import org.elasticsearch.test.ESIntegTestCase; 25 import org.elasticsearch.test.ESIntegTestCase.ClusterScope; 26 import org.junit.Before; 27 import org.junit.Test; 28 29 @ClusterScope(scope = ESIntegTestCase.Scope.SUITE, transportClientRatio = 0.0) 30 public class LimitedMappingIntegrationTest extends ESIntegTestCase { 31 @Before 32 public void init() throws IOException, InterruptedException, ExecutionException { 33 XContentBuilder settings = jsonBuilder() 34 .startObject() 35 .field("number_of_shards", 1) 36 .startObject("analysis") 37 .startObject("char_filter") 38 .startObject("apos_mini") 39 .field("type", "limited_mapping") 40 .array("mappings", "`=>'", "‘=>'", "\u2019=>'") 41 .endObject() 42 .endObject() 43 .startObject("analyzer") 44 .startObject("ltdmap") 45 .field("tokenizer", "whitespace") 46 .array("char_filter", "apos_mini") 47 .endObject() 48 .endObject() 49 .endObject() 50 .endObject(); 51 52 XContentBuilder mapping = jsonBuilder() 53 .startObject() 54 .startObject("test") 55 .startObject("properties") 56 .startObject("test") 57 .field("type", "text") 58 .field("analyzer", "ltdmap") 59 .field("similarity", "BM25") 60 .endObject() 61 .endObject() 62 .endObject() 63 .endObject(); 64 65 assertAcked(prepareCreate("test").addMapping("test", mapping).setSettings(settings)); 66 ensureGreen(); 67 indexRandom(false, doc("curly", "hello w‘orl’d")); 68 indexRandom(false, doc("straight", "hello w'orl'd")); 69 refresh(); 70 } 71 72 @Test 73 public void testCurlyAndStraightQuotes() { 74 SearchResponse sr = client().prepareSearch("test") 75 .setQuery(QueryBuilders.matchQuery("test", "w‘orl’d")) 76 .get(); 77 assertSearchHits(sr, "curly", "straight"); 78 79 sr = client().prepareSearch("test") 80 .setQuery(QueryBuilders.matchQuery("test", "w'orl'd")) 81 .get(); 82 assertSearchHits(sr, "curly", "straight"); 83 } 84 85 private IndexRequestBuilder doc(String id, String fieldValue) { 86 return client().prepareIndex("test", "test", id).setSource("test", fieldValue); 87 } 88 89 @Override 90 protected Collection<Class<? extends Plugin>> nodePlugins() { 91 return Collections.<Class<? extends 92 Plugin>>unmodifiableList(Arrays.asList(ExtraAnalysisTextifyPlugin.class, MockPlugin.class)); 93 } 94 95 public static class MockPlugin extends Plugin implements AnalysisPlugin { 96 @Override 97 public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() { 98 Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> map = new HashMap<>(); 99 map.put("whitespace", (isettings, env, name, settings) -> 100 TokenizerFactory.newFactory("whitespace", WhitespaceTokenizer::new)); 101 return Collections.unmodifiableMap(map); 102 } 103 } 104 105 }