1 package org.wikimedia.search.extra.regex;
2
3 import static org.hamcrest.CoreMatchers.instanceOf;
4
5 import java.io.IOException;
6 import java.util.Arrays;
7 import java.util.Collection;
8 import java.util.Locale;
9
10 import org.apache.lucene.index.IndexReader;
11 import org.apache.lucene.index.memory.MemoryIndex;
12 import org.apache.lucene.search.Query;
13 import org.elasticsearch.common.compress.CompressedXContent;
14 import org.elasticsearch.index.mapper.MapperService;
15 import org.elasticsearch.index.query.QueryBuilder;
16 import org.elasticsearch.index.query.QueryShardContext;
17 import org.elasticsearch.index.query.Rewriteable;
18 import org.elasticsearch.plugins.Plugin;
19 import org.elasticsearch.test.AbstractQueryTestCase;
20 import org.elasticsearch.test.TestGeoShapeFieldMapperPlugin;
21 import org.wikimedia.search.extra.ExtraCorePlugin;
22 import org.wikimedia.search.extra.util.FieldValues;
23
24 public class SourceRegexBuilderESTest extends AbstractQueryTestCase<SourceRegexQueryBuilder> {
25 protected Collection<Class<? extends Plugin>> getPlugins() {
26 return Arrays.asList(ExtraCorePlugin.class, TestGeoShapeFieldMapperPlugin.class);
27 }
28 private static final String MY_FIELD = "regex_field";
29 private static final String MY_FIELD_NGRAM = "regex_field_ngram";
30
31 @Override
32 protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
33 mapperService.merge("_doc",
34 new CompressedXContent("{\"properties\":{" +
35 "\"" + MY_FIELD + "\":{\"type\":\"text\" }," +
36 "\"" + MY_FIELD_NGRAM + "\":{\"type\":\"text\" }" +
37 "}}"),
38 MapperService.MergeReason.MAPPING_UPDATE);
39 }
40
41 @Override
42 protected SourceRegexQueryBuilder doCreateTestQueryBuilder() {
43 SourceRegexQueryBuilder builder = new SourceRegexQueryBuilder(MY_FIELD, "ramdom[reg]ex");
44 randomlyDo(
45 () -> builder.caseSensitive(randomBoolean()),
46 () -> builder.gramSize(randomIntBetween(2, 4)),
47 () -> builder.loadFromSource(randomBoolean()),
48 () -> builder.loadFromSource(randomBoolean()),
49 () -> builder.settings().maxNgramClauses(randomIntBetween(1, 1000)),
50 () -> builder.settings().rejectUnaccelerated(randomBoolean()),
51 () -> builder.settings().locale(randomFrom(Locale.FRENCH, Locale.ENGLISH, new Locale("el"), new Locale("ga"), new Locale("tr"))),
52 () -> builder.settings().caseSensitive(randomBoolean()),
53 () -> builder.settings().maxDeterminizedStates(randomIntBetween(1, 10000)),
54 () -> builder.settings().maxNgramsExtracted(randomIntBetween(1, 200)),
55 () -> builder.settings().maxExpand(randomIntBetween(1, 200)),
56 () -> builder.settings().maxStatesTraced(randomIntBetween(100, 10000)));
57 return builder;
58 }
59
60 private void randomlyDo(Runnable...r) {
61 Arrays.stream(r)
62 .filter((x) -> randomBoolean())
63 .forEach(Runnable::run);
64 }
65
66 @Override
67 protected void doAssertLuceneQuery(SourceRegexQueryBuilder queryBuilder, Query query, QueryShardContext context) throws IOException {
68 assertThat(query, instanceOf(SourceRegexQuery.class));
69 SourceRegexQuery rquery = (SourceRegexQuery) query;
70 assertEquals(queryBuilder.field(), rquery.getFieldPath());
71 assertEquals(queryBuilder.ngramField(), rquery.getNgramFieldPath());
72 if (queryBuilder.loadFromSource()) {
73 assertSame(FieldValues.loadFromSource(), rquery.getLoader());
74 } else {
75 assertSame(FieldValues.loadFromStoredField(), rquery.getLoader());
76 }
77
78 assertEquals(queryBuilder.settings(), rquery.getSettings());
79 if (!queryBuilder.settings().caseSensitive()
80 && !rquery.getSettings().locale().getLanguage().equals("ga")
81 && !rquery.getSettings().locale().getLanguage().equals("tr")) {
82 assertThat(rquery.getRechecker(), instanceOf(SourceRegexQuery.NonBacktrackingOnTheFlyCaseConvertingRechecker.class));
83 } else {
84 assertThat(rquery.getRechecker(), instanceOf(SourceRegexQuery.NonBacktrackingRechecker.class));
85 }
86 }
87
88 public void testParseDocExample() throws IOException {
89 String json = "{\"source_regex\": {\n" +
90 " \"field\": \"" + MY_FIELD + "\",\n" +
91 " \"regex\": \"regex[a-z]\",\n" +
92 " \"load_from_source\" : true,\n" +
93 " \"ngram_field\" : \"" + MY_FIELD_NGRAM + "\",\n" +
94 " \"gram_size\" : 3,\n" +
95 " \"max_expand\" : 5,\n" +
96 " \"max_states_traced\" : 10001,\n" +
97 " \"max_determinized_states\" : 20001,\n" +
98 " \"max_ngrams_extracted\" : 101,\n" +
99 " \"locale\" : \"fr\",\n" +
100 " \"reject_unaccelerated\" : true,\n" +
101 " \"max_ngram_clauses\" : 1001\n" +
102 "}}";
103 QueryBuilder builder = parseQuery(json);
104 assertThat(builder, instanceOf(SourceRegexQueryBuilder.class));
105 SourceRegexQueryBuilder parsed = (SourceRegexQueryBuilder) builder;
106 SourceRegexQueryBuilder expected = new SourceRegexQueryBuilder(MY_FIELD, "regex[a-z]");
107 expected.loadFromSource(true);
108 expected.ngramField(MY_FIELD_NGRAM);
109 expected.gramSize(3);
110 expected.settings().maxExpand(5);
111 expected.maxStatesTraced(10001);
112 expected.maxDeterminizedStates(20001);
113 expected.settings().maxNgramsExtracted(101);
114 expected.locale(Locale.FRENCH);
115 expected.rejectUnaccelerated(true);
116 expected.settings().maxNgramClauses(1001);
117 assertEquals(expected, parsed);
118 }
119
120 @Override
121 protected Query rewrite(Query query) throws IOException {
122
123 return query;
124 }
125
126 public void testLuceneRewrite() throws IOException {
127 SourceRegexQueryBuilder builder = new SourceRegexQueryBuilder(MY_FIELD, "ab[0-2]");
128 builder.settings().rejectUnaccelerated(false);
129 Query rewritten = buildAndRewrite(builder);
130 assertThat(rewritten, instanceOf(UnacceleratedSourceRegexQuery.class));
131
132 builder.settings().rejectUnaccelerated(true);
133 expectThrows(UnableToAccelerateRegexException.class, () -> buildAndRewrite(builder));
134
135 builder.ngramField(MY_FIELD_NGRAM);
136 rewritten = buildAndRewrite(builder);
137 assertThat(rewritten, instanceOf(AcceleratedSourceRegexQuery.class));
138
139 builder.settings().maxExpand(2);
140 expectThrows(UnableToAccelerateRegexException.class, () -> buildAndRewrite(builder));
141
142
143 }
144
145 private Query buildAndRewrite(SourceRegexQueryBuilder query) throws IOException {
146 IndexReader ir = new MemoryIndex().createSearcher().getIndexReader();
147 QueryShardContext context = createShardContext();
148 QueryBuilder rewritten = Rewriteable.rewrite(query, context);
149 Query lquery = rewritten.toQuery(context);
150 return lquery.rewrite(ir);
151 }
152
153 }