View Javadoc
1   package org.wikimedia.search.extra.regex;
2   
3   import static org.hamcrest.CoreMatchers.instanceOf;
4   
5   import java.io.IOException;
6   import java.util.Arrays;
7   import java.util.Collection;
8   import java.util.Locale;
9   
10  import org.apache.lucene.index.IndexReader;
11  import org.apache.lucene.index.memory.MemoryIndex;
12  import org.apache.lucene.search.Query;
13  import org.elasticsearch.common.compress.CompressedXContent;
14  import org.elasticsearch.index.mapper.MapperService;
15  import org.elasticsearch.index.query.QueryBuilder;
16  import org.elasticsearch.index.query.QueryShardContext;
17  import org.elasticsearch.index.query.Rewriteable;
18  import org.elasticsearch.plugins.Plugin;
19  import org.elasticsearch.test.AbstractQueryTestCase;
20  import org.elasticsearch.test.TestGeoShapeFieldMapperPlugin;
21  import org.wikimedia.search.extra.ExtraCorePlugin;
22  import org.wikimedia.search.extra.util.FieldValues;
23  
24  public class SourceRegexBuilderESTest extends AbstractQueryTestCase<SourceRegexQueryBuilder> {
25      protected Collection<Class<? extends Plugin>> getPlugins() {
26          return Arrays.asList(ExtraCorePlugin.class, TestGeoShapeFieldMapperPlugin.class);
27      }
28      private static final String MY_FIELD = "regex_field";
29      private static final String MY_FIELD_NGRAM = "regex_field_ngram";
30  
31      @Override
32      protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
33          mapperService.merge("_doc",
34                  new CompressedXContent("{\"properties\":{" +
35                          "\"" + MY_FIELD + "\":{\"type\":\"text\" }," +
36                          "\"" + MY_FIELD_NGRAM + "\":{\"type\":\"text\" }" +
37                          "}}"),
38                  MapperService.MergeReason.MAPPING_UPDATE);
39      }
40  
41      @Override
42      protected SourceRegexQueryBuilder doCreateTestQueryBuilder() {
43          SourceRegexQueryBuilder builder = new SourceRegexQueryBuilder(MY_FIELD, "ramdom[reg]ex");
44          randomlyDo(
45              () -> builder.caseSensitive(randomBoolean()),
46              () -> builder.gramSize(randomIntBetween(2, 4)),
47              () -> builder.loadFromSource(randomBoolean()),
48              () -> builder.loadFromSource(randomBoolean()),
49              () -> builder.settings().maxNgramClauses(randomIntBetween(1, 1000)),
50              () -> builder.settings().rejectUnaccelerated(randomBoolean()),
51              () -> builder.settings().locale(randomFrom(Locale.FRENCH, Locale.ENGLISH, new Locale("el"), new Locale("ga"), new Locale("tr"))),
52              () -> builder.settings().caseSensitive(randomBoolean()),
53              () -> builder.settings().maxDeterminizedStates(randomIntBetween(1, 10000)),
54              () -> builder.settings().maxNgramsExtracted(randomIntBetween(1, 200)),
55              () -> builder.settings().maxExpand(randomIntBetween(1, 200)),
56              () -> builder.settings().maxStatesTraced(randomIntBetween(100, 10000)));
57          return builder;
58      }
59  
60      private void randomlyDo(Runnable...r) {
61          Arrays.stream(r)
62              .filter((x) -> randomBoolean())
63              .forEach(Runnable::run);
64      }
65  
66      @Override
67      protected void doAssertLuceneQuery(SourceRegexQueryBuilder queryBuilder, Query query, QueryShardContext context) throws IOException {
68          assertThat(query, instanceOf(SourceRegexQuery.class));
69          SourceRegexQuery rquery = (SourceRegexQuery) query;
70          assertEquals(queryBuilder.field(), rquery.getFieldPath());
71          assertEquals(queryBuilder.ngramField(), rquery.getNgramFieldPath());
72          if (queryBuilder.loadFromSource()) {
73              assertSame(FieldValues.loadFromSource(), rquery.getLoader());
74          } else {
75              assertSame(FieldValues.loadFromStoredField(), rquery.getLoader());
76          }
77  
78          assertEquals(queryBuilder.settings(), rquery.getSettings());
79          if (!queryBuilder.settings().caseSensitive()
80                  && !rquery.getSettings().locale().getLanguage().equals("ga")
81                  && !rquery.getSettings().locale().getLanguage().equals("tr")) {
82              assertThat(rquery.getRechecker(), instanceOf(SourceRegexQuery.NonBacktrackingOnTheFlyCaseConvertingRechecker.class));
83          } else {
84              assertThat(rquery.getRechecker(), instanceOf(SourceRegexQuery.NonBacktrackingRechecker.class));
85          }
86      }
87  
88      public void testParseDocExample() throws IOException {
89          String json = "{\"source_regex\": {\n" +
90                  "   \"field\": \"" + MY_FIELD + "\",\n" +
91                  "   \"regex\": \"regex[a-z]\",\n" +
92                  "   \"load_from_source\" : true,\n" +
93                  "   \"ngram_field\" : \"" + MY_FIELD_NGRAM + "\",\n" +
94                  "   \"gram_size\" : 3,\n" +
95                  "   \"max_expand\" : 5,\n" +
96                  "   \"max_states_traced\" : 10001,\n" +
97                  "   \"max_determinized_states\" : 20001,\n" +
98                  "   \"max_ngrams_extracted\" : 101,\n" +
99                  "   \"locale\" : \"fr\",\n" +
100                 "   \"reject_unaccelerated\" : true,\n" +
101                 "   \"max_ngram_clauses\" : 1001\n" +
102                 "}}";
103         QueryBuilder builder = parseQuery(json);
104         assertThat(builder, instanceOf(SourceRegexQueryBuilder.class));
105         SourceRegexQueryBuilder parsed = (SourceRegexQueryBuilder) builder;
106         SourceRegexQueryBuilder expected = new SourceRegexQueryBuilder(MY_FIELD, "regex[a-z]");
107         expected.loadFromSource(true);
108         expected.ngramField(MY_FIELD_NGRAM);
109         expected.gramSize(3);
110         expected.settings().maxExpand(5);
111         expected.maxStatesTraced(10001);
112         expected.maxDeterminizedStates(20001);
113         expected.settings().maxNgramsExtracted(101);
114         expected.locale(Locale.FRENCH);
115         expected.rejectUnaccelerated(true);
116         expected.settings().maxNgramClauses(1001);
117         assertEquals(expected, parsed);
118     }
119 
120     @Override
121     protected Query rewrite(Query query) throws IOException {
122         // Do not rewrite, rewriting deserves its own subtest
123         return query;
124     }
125 
126     public void testLuceneRewrite() throws IOException {
127         SourceRegexQueryBuilder builder = new SourceRegexQueryBuilder(MY_FIELD, "ab[0-2]");
128         builder.settings().rejectUnaccelerated(false);
129         Query rewritten = buildAndRewrite(builder);
130         assertThat(rewritten, instanceOf(UnacceleratedSourceRegexQuery.class));
131 
132         builder.settings().rejectUnaccelerated(true);
133         expectThrows(UnableToAccelerateRegexException.class, () -> buildAndRewrite(builder));
134 
135         builder.ngramField(MY_FIELD_NGRAM);
136         rewritten = buildAndRewrite(builder);
137         assertThat(rewritten, instanceOf(AcceleratedSourceRegexQuery.class));
138 
139         builder.settings().maxExpand(2);
140         expectThrows(UnableToAccelerateRegexException.class, () -> buildAndRewrite(builder));
141 
142         // TODO: move more tests from SourceRegexQueryIntegrationTests here
143     }
144 
145     private Query buildAndRewrite(SourceRegexQueryBuilder query) throws IOException {
146         IndexReader ir = new MemoryIndex().createSearcher().getIndexReader();
147         QueryShardContext context = createShardContext();
148         QueryBuilder rewritten = Rewriteable.rewrite(query, context);
149         Query lquery = rewritten.toQuery(context);
150         return lquery.rewrite(ir);
151     }
152 
153 }