View Javadoc
1   /*
2    * Licensed to Elasticsearch under one or more contributor
3    * license agreements. See the NOTICE file distributed with
4    * this work for additional information regarding copyright
5    * ownership. Elasticsearch licenses this file to you under
6    * the Apache License, Version 2.0 (the "License"); you may
7    * not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *    http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  package org.wikimedia.search.extra.fuzzylike;
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.Collections;
26  import java.util.HashSet;
27  import java.util.List;
28  import java.util.Objects;
29  import java.util.Set;
30  import java.util.stream.Collectors;
31  import java.util.stream.Stream;
32  
33  import javax.annotation.Nullable;
34  
35  import org.apache.lucene.analysis.Analyzer;
36  import org.apache.lucene.search.Query;
37  import org.elasticsearch.common.ParseField;
38  import org.elasticsearch.common.ParsingException;
39  import org.elasticsearch.common.io.stream.StreamInput;
40  import org.elasticsearch.common.io.stream.StreamOutput;
41  import org.elasticsearch.common.unit.Fuzziness;
42  import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
43  import org.elasticsearch.common.xcontent.XContentBuilder;
44  import org.elasticsearch.common.xcontent.XContentParser;
45  import org.elasticsearch.index.mapper.TextFieldMapper;
46  import org.elasticsearch.index.query.AbstractQueryBuilder;
47  import org.elasticsearch.index.query.QueryShardContext;
48  import org.elasticsearch.index.query.QueryShardException;
49  
50  import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
51  import lombok.Getter;
52  import lombok.Setter;
53  import lombok.experimental.Accessors;
54  
55  /**
56   * @deprecated this query was too costly and has been removed
57   */
58  @Deprecated
59  @Accessors(fluent = true, chain = true)
60  @Getter @Setter
61  @SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "We don't care about exposing representation in a builder")
62  public class FuzzyLikeThisQueryBuilder extends AbstractQueryBuilder<FuzzyLikeThisQueryBuilder> {
63      public static final ParseField NAME = new ParseField("fuzzy_like_this", "flt", "fuzzyLikeThis");
64  
65      public static final ParseField FIELDS = new ParseField("fields");
66      public static final ParseField LIKE_TEXT = new ParseField("like_text", "likeText");
67      public static final ParseField PREFIX_LENGTH = new ParseField("prefix_length", "likeText");
68      public static final ParseField MAX_QUERY_TERMS = new ParseField("max_query_terms", "maxQueryTerms");
69      public static final ParseField IGNORE_TF = new ParseField("ignore_tf", "ignoreTF");
70      public static final ParseField ANALYZER = new ParseField("analyzer");
71      public static final ParseField FAIL_ON_UNSUPPORTED_FIELD = new ParseField("fail_on_unsupported_field", "failOnUnsupportedField");
72  
73      public static final ParseField FUZZINESS = Fuzziness.FIELD.withDeprecation("min_similarity");
74  
75      private static final int DEFAULT_PREFIX_LENGTH = 0;
76      private static final Fuzziness DEFAULT_FUZZINESS = Fuzziness.TWO;
77      private static final boolean DEFAULT_IGNORETF = false;
78      private static final boolean DEFAULT_FAIL_ON_UNSUPPORTED_FIELD = false;
79      private static final int DEFAULT_MAX_QUERY_TERMS = 25;
80  
81      private static final Set<String> SUPPORTED_TYPES = new HashSet<>(Collections.singletonList(
82              TextFieldMapper.CONTENT_TYPE
83      ));
84  
85      @Nullable private final String[] fields;
86      private final String likeText;
87      private Fuzziness fuzziness = DEFAULT_FUZZINESS;
88      private int prefixLength = DEFAULT_PREFIX_LENGTH;
89      private int maxQueryTerms = DEFAULT_MAX_QUERY_TERMS;
90      private boolean ignoreTF = DEFAULT_IGNORETF;
91      @Nullable private String analyzer;
92      private boolean failOnUnsupportedField = DEFAULT_FAIL_ON_UNSUPPORTED_FIELD;
93  
94      public FuzzyLikeThisQueryBuilder(@Nullable String[] fields, String likeText) {
95          this.fields = fields;
96          this.likeText = Objects.requireNonNull(likeText);
97      }
98  
99      public FuzzyLikeThisQueryBuilder(String likeText) {
100         this(null, likeText);
101     }
102 
103     public FuzzyLikeThisQueryBuilder(StreamInput in) throws IOException {
104         super(in);
105         fields = in.readOptionalStringArray();
106         likeText = in.readString();
107         fuzziness = new Fuzziness(in);
108         prefixLength = in.readVInt();
109         maxQueryTerms = in.readVInt();
110         ignoreTF = in.readBoolean();
111         analyzer = in.readOptionalString();
112         failOnUnsupportedField = in.readBoolean();
113     }
114 
115     @Override
116     protected void doWriteTo(StreamOutput out) throws IOException {
117         out.writeOptionalStringArray(fields);
118         out.writeString(likeText);
119         fuzziness.writeTo(out);
120         out.writeVInt(prefixLength);
121         out.writeVInt(maxQueryTerms);
122         out.writeBoolean(ignoreTF);
123         out.writeOptionalString(analyzer);
124         out.writeBoolean(failOnUnsupportedField);
125     }
126 
127     public FuzzyLikeThisQueryBuilder fuzziness(Fuzziness fuzziness) {
128         this.fuzziness = Objects.requireNonNull(fuzziness);
129         return this;
130     }
131 
132     public FuzzyLikeThisQueryBuilder prefixLength(int prefixLength) {
133         this.prefixLength = prefixLength;
134         return this;
135     }
136 
137     public FuzzyLikeThisQueryBuilder maxQueryTerms(int maxQueryTerms) {
138         this.maxQueryTerms = maxQueryTerms;
139         return this;
140     }
141 
142     public FuzzyLikeThisQueryBuilder ignoreTF(boolean ignoreTF) {
143         this.ignoreTF = ignoreTF;
144         return this;
145     }
146 
147     /**
148      * The analyzer that will be used to analyze the text. Defaults to the analyzer associated with the fied.
149      */
150     public FuzzyLikeThisQueryBuilder analyzer(@Nullable String analyzer) {
151         this.analyzer = analyzer;
152         return this;
153     }
154 
155     /**
156      * Whether to fail or return no result when this query is run against a field which is not supported such as binary/numeric fields.
157      */
158     public FuzzyLikeThisQueryBuilder failOnUnsupportedField(boolean fail) {
159         failOnUnsupportedField = fail;
160         return this;
161     }
162 
163     @Override
164     protected void doXContent(XContentBuilder builder, Params params) throws IOException {
165         builder.startObject(NAME.getPreferredName());
166         if (fields != null) {
167             builder.startArray(FIELDS.getPreferredName());
168             for (String field : fields) {
169                 builder.value(field);
170             }
171             builder.endArray();
172         }
173         builder.field(LIKE_TEXT.getPreferredName(), likeText);
174         if (maxQueryTerms != DEFAULT_MAX_QUERY_TERMS) {
175             builder.field(MAX_QUERY_TERMS.getPreferredName(), maxQueryTerms);
176         }
177         if (!fuzziness.equals(DEFAULT_FUZZINESS)) {
178             fuzziness.toXContent(builder, params);
179         }
180         if (prefixLength != DEFAULT_PREFIX_LENGTH) {
181             builder.field(PREFIX_LENGTH.getPreferredName(), prefixLength);
182         }
183         if (ignoreTF != DEFAULT_IGNORETF) {
184             builder.field(IGNORE_TF.getPreferredName(), ignoreTF);
185         }
186         if (analyzer != null) {
187             builder.field(ANALYZER.getPreferredName(), analyzer);
188         }
189         if (failOnUnsupportedField != DEFAULT_FAIL_ON_UNSUPPORTED_FIELD) {
190             builder.field(FAIL_ON_UNSUPPORTED_FIELD.getPreferredName(), failOnUnsupportedField);
191         }
192         builder.endObject();
193     }
194 
195     @Override
196     public String getWriteableName() {
197         return NAME.getPreferredName();
198     }
199 
200     @SuppressWarnings("CyclomaticComplexity")
201     public static FuzzyLikeThisQueryBuilder fromXContent(XContentParser parser) throws IOException {
202         int maxNumTerms = DEFAULT_MAX_QUERY_TERMS;
203         List<String> fields = null;
204         String likeText = null;
205         Fuzziness fuzziness = DEFAULT_FUZZINESS;
206         int prefixLength = DEFAULT_PREFIX_LENGTH;
207         boolean ignoreTF = DEFAULT_IGNORETF;
208         String analyzer = null;
209         boolean failOnUnsupportedField = DEFAULT_FAIL_ON_UNSUPPORTED_FIELD;
210 
211         XContentParser.Token token;
212         String currentFieldName = null;
213         while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
214             if (token == XContentParser.Token.FIELD_NAME) {
215                 currentFieldName = parser.currentName();
216             } else if (token.isValue()) {
217                 if (LIKE_TEXT.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
218                     likeText = parser.text();
219                 } else if (MAX_QUERY_TERMS.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
220                     maxNumTerms = parser.intValue();
221                 } else if (IGNORE_TF.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
222                     ignoreTF = parser.booleanValue();
223                 } else if (FUZZINESS.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
224                     fuzziness = Fuzziness.parse(parser);
225                 } else if (PREFIX_LENGTH.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
226                     prefixLength = parser.intValue();
227                 } else if (ANALYZER.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
228                     analyzer = parser.text();
229                 } else if (FAIL_ON_UNSUPPORTED_FIELD.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
230                     failOnUnsupportedField = parser.booleanValue();
231                 } else {
232                     throw new ParsingException(parser.getTokenLocation(), "[flt] query does not support [" + currentFieldName + "]");
233                 }
234             } else if (token == XContentParser.Token.START_ARRAY) {
235                 if (FIELDS.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
236                     fields = new ArrayList<>();
237                     while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
238                         fields.add(parser.text());
239                     }
240                     if (fields.isEmpty()) {
241                         throw new ParsingException(parser.getTokenLocation(), "fuzzy_like_this requires 'fields' to be non-empty");
242                     }
243                 } else {
244                     throw new ParsingException(parser.getTokenLocation(), "[flt] query does not support [" + currentFieldName + "]");
245                 }
246             }
247         }
248 
249         if (likeText == null) {
250             throw new ParsingException(parser.getTokenLocation(), "fuzzy_like_this requires 'like_text' to be specified");
251         }
252 
253         String[] fs = fields != null ? fields.toArray(new String[0]) : null;
254 
255         FuzzyLikeThisQueryBuilder builder = new FuzzyLikeThisQueryBuilder(fs, likeText);
256 
257         builder.analyzer(analyzer)
258             .fuzziness(fuzziness)
259             .ignoreTF(ignoreTF)
260             .maxQueryTerms(maxNumTerms)
261             .prefixLength(prefixLength)
262             .failOnUnsupportedField(failOnUnsupportedField);
263 
264         return builder;
265     }
266 
267     @Override
268     protected Query doToQuery(QueryShardContext context) {
269         final List<String> fields;
270         if (this.fields == null) {
271             fields = context.defaultFields();
272         } else {
273             fields = Arrays.stream(this.fields)
274                         .filter(x -> context.fieldMapper(x) != null)
275                         .filter(x -> context.fieldMapper(x).isSearchable())
276                         .filter(x -> SUPPORTED_TYPES.contains(context.fieldMapper(x).typeName()))
277                         .map(x -> context.fieldMapper(x).name())
278                         .collect(Collectors.toList());
279             if (fields.isEmpty()) {
280                 throw new QueryShardException(context, "fuzzy_like_this all provided fields are unknown or not tonized");
281             }
282 
283             if (failOnUnsupportedField && fields.size() != this.fields.length) {
284                 List<String> unsupportedFields = Stream.of(this.fields)
285                         .filter(x -> !fields.contains(x))
286                         .collect(Collectors.toList());
287                 throw new QueryShardException(context, "fuzzy_like_this some fields are either unknown/untokenized/non-text: {}", unsupportedFields);
288             }
289         }
290 
291         final Analyzer analyzer;
292         if (this.analyzer == null) {
293             analyzer = context.getMapperService().searchAnalyzer();
294         } else {
295             analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer);
296         }
297 
298         FuzzyLikeThisQuery query = new FuzzyLikeThisQuery(maxQueryTerms, analyzer);
299         int maxDist = fuzziness.asDistance();
300         for (String field : fields) {
301             query.addTerms(likeText, field, maxDist, prefixLength);
302         }
303         query.setIgnoreTF(ignoreTF);
304         return query;
305     }
306 
307     @Override
308     protected boolean doEquals(FuzzyLikeThisQueryBuilder other) {
309         return Objects.equals(this.analyzer, other.analyzer)
310             && Arrays.equals(this.fields, other.fields)
311             && Objects.equals(this.failOnUnsupportedField, other.failOnUnsupportedField)
312             && Objects.equals(this.fuzziness, other.fuzziness)
313             && Objects.equals(this.ignoreTF, other.ignoreTF)
314             && Objects.equals(this.likeText, other.likeText)
315             && Objects.equals(this.maxQueryTerms, other.maxQueryTerms)
316             && Objects.equals(this.prefixLength, other.prefixLength);
317     }
318 
319     @Override
320     protected int doHashCode() {
321         return Objects.hash(analyzer, fields, failOnUnsupportedField, fuzziness,
322                 ignoreTF, likeText, maxQueryTerms, prefixLength);
323     }
324 }