1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.wikimedia.search.extra.fuzzylike;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.Collections;
26 import java.util.HashSet;
27 import java.util.List;
28 import java.util.Objects;
29 import java.util.Set;
30 import java.util.stream.Collectors;
31 import java.util.stream.Stream;
32
33 import javax.annotation.Nullable;
34
35 import org.apache.lucene.analysis.Analyzer;
36 import org.apache.lucene.search.Query;
37 import org.elasticsearch.common.ParseField;
38 import org.elasticsearch.common.ParsingException;
39 import org.elasticsearch.common.io.stream.StreamInput;
40 import org.elasticsearch.common.io.stream.StreamOutput;
41 import org.elasticsearch.common.unit.Fuzziness;
42 import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
43 import org.elasticsearch.common.xcontent.XContentBuilder;
44 import org.elasticsearch.common.xcontent.XContentParser;
45 import org.elasticsearch.index.mapper.TextFieldMapper;
46 import org.elasticsearch.index.query.AbstractQueryBuilder;
47 import org.elasticsearch.index.query.QueryShardContext;
48 import org.elasticsearch.index.query.QueryShardException;
49
50 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
51 import lombok.Getter;
52 import lombok.Setter;
53 import lombok.experimental.Accessors;
54
55
56
57
58 @Deprecated
59 @Accessors(fluent = true, chain = true)
60 @Getter @Setter
61 @SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "We don't care about exposing representation in a builder")
62 public class FuzzyLikeThisQueryBuilder extends AbstractQueryBuilder<FuzzyLikeThisQueryBuilder> {
63 public static final ParseField NAME = new ParseField("fuzzy_like_this", "flt", "fuzzyLikeThis");
64
65 public static final ParseField FIELDS = new ParseField("fields");
66 public static final ParseField LIKE_TEXT = new ParseField("like_text", "likeText");
67 public static final ParseField PREFIX_LENGTH = new ParseField("prefix_length", "likeText");
68 public static final ParseField MAX_QUERY_TERMS = new ParseField("max_query_terms", "maxQueryTerms");
69 public static final ParseField IGNORE_TF = new ParseField("ignore_tf", "ignoreTF");
70 public static final ParseField ANALYZER = new ParseField("analyzer");
71 public static final ParseField FAIL_ON_UNSUPPORTED_FIELD = new ParseField("fail_on_unsupported_field", "failOnUnsupportedField");
72
73 public static final ParseField FUZZINESS = Fuzziness.FIELD.withDeprecation("min_similarity");
74
75 private static final int DEFAULT_PREFIX_LENGTH = 0;
76 private static final Fuzziness DEFAULT_FUZZINESS = Fuzziness.TWO;
77 private static final boolean DEFAULT_IGNORETF = false;
78 private static final boolean DEFAULT_FAIL_ON_UNSUPPORTED_FIELD = false;
79 private static final int DEFAULT_MAX_QUERY_TERMS = 25;
80
81 private static final Set<String> SUPPORTED_TYPES = new HashSet<>(Collections.singletonList(
82 TextFieldMapper.CONTENT_TYPE
83 ));
84
85 @Nullable private final String[] fields;
86 private final String likeText;
87 private Fuzziness fuzziness = DEFAULT_FUZZINESS;
88 private int prefixLength = DEFAULT_PREFIX_LENGTH;
89 private int maxQueryTerms = DEFAULT_MAX_QUERY_TERMS;
90 private boolean ignoreTF = DEFAULT_IGNORETF;
91 @Nullable private String analyzer;
92 private boolean failOnUnsupportedField = DEFAULT_FAIL_ON_UNSUPPORTED_FIELD;
93
94 public FuzzyLikeThisQueryBuilder(@Nullable String[] fields, String likeText) {
95 this.fields = fields;
96 this.likeText = Objects.requireNonNull(likeText);
97 }
98
99 public FuzzyLikeThisQueryBuilder(String likeText) {
100 this(null, likeText);
101 }
102
103 public FuzzyLikeThisQueryBuilder(StreamInput in) throws IOException {
104 super(in);
105 fields = in.readOptionalStringArray();
106 likeText = in.readString();
107 fuzziness = new Fuzziness(in);
108 prefixLength = in.readVInt();
109 maxQueryTerms = in.readVInt();
110 ignoreTF = in.readBoolean();
111 analyzer = in.readOptionalString();
112 failOnUnsupportedField = in.readBoolean();
113 }
114
115 @Override
116 protected void doWriteTo(StreamOutput out) throws IOException {
117 out.writeOptionalStringArray(fields);
118 out.writeString(likeText);
119 fuzziness.writeTo(out);
120 out.writeVInt(prefixLength);
121 out.writeVInt(maxQueryTerms);
122 out.writeBoolean(ignoreTF);
123 out.writeOptionalString(analyzer);
124 out.writeBoolean(failOnUnsupportedField);
125 }
126
127 public FuzzyLikeThisQueryBuilder fuzziness(Fuzziness fuzziness) {
128 this.fuzziness = Objects.requireNonNull(fuzziness);
129 return this;
130 }
131
132 public FuzzyLikeThisQueryBuilder prefixLength(int prefixLength) {
133 this.prefixLength = prefixLength;
134 return this;
135 }
136
137 public FuzzyLikeThisQueryBuilder maxQueryTerms(int maxQueryTerms) {
138 this.maxQueryTerms = maxQueryTerms;
139 return this;
140 }
141
142 public FuzzyLikeThisQueryBuilder ignoreTF(boolean ignoreTF) {
143 this.ignoreTF = ignoreTF;
144 return this;
145 }
146
147
148
149
150 public FuzzyLikeThisQueryBuilder analyzer(@Nullable String analyzer) {
151 this.analyzer = analyzer;
152 return this;
153 }
154
155
156
157
158 public FuzzyLikeThisQueryBuilder failOnUnsupportedField(boolean fail) {
159 failOnUnsupportedField = fail;
160 return this;
161 }
162
163 @Override
164 protected void doXContent(XContentBuilder builder, Params params) throws IOException {
165 builder.startObject(NAME.getPreferredName());
166 if (fields != null) {
167 builder.startArray(FIELDS.getPreferredName());
168 for (String field : fields) {
169 builder.value(field);
170 }
171 builder.endArray();
172 }
173 builder.field(LIKE_TEXT.getPreferredName(), likeText);
174 if (maxQueryTerms != DEFAULT_MAX_QUERY_TERMS) {
175 builder.field(MAX_QUERY_TERMS.getPreferredName(), maxQueryTerms);
176 }
177 if (!fuzziness.equals(DEFAULT_FUZZINESS)) {
178 fuzziness.toXContent(builder, params);
179 }
180 if (prefixLength != DEFAULT_PREFIX_LENGTH) {
181 builder.field(PREFIX_LENGTH.getPreferredName(), prefixLength);
182 }
183 if (ignoreTF != DEFAULT_IGNORETF) {
184 builder.field(IGNORE_TF.getPreferredName(), ignoreTF);
185 }
186 if (analyzer != null) {
187 builder.field(ANALYZER.getPreferredName(), analyzer);
188 }
189 if (failOnUnsupportedField != DEFAULT_FAIL_ON_UNSUPPORTED_FIELD) {
190 builder.field(FAIL_ON_UNSUPPORTED_FIELD.getPreferredName(), failOnUnsupportedField);
191 }
192 builder.endObject();
193 }
194
195 @Override
196 public String getWriteableName() {
197 return NAME.getPreferredName();
198 }
199
200 @SuppressWarnings("CyclomaticComplexity")
201 public static FuzzyLikeThisQueryBuilder fromXContent(XContentParser parser) throws IOException {
202 int maxNumTerms = DEFAULT_MAX_QUERY_TERMS;
203 List<String> fields = null;
204 String likeText = null;
205 Fuzziness fuzziness = DEFAULT_FUZZINESS;
206 int prefixLength = DEFAULT_PREFIX_LENGTH;
207 boolean ignoreTF = DEFAULT_IGNORETF;
208 String analyzer = null;
209 boolean failOnUnsupportedField = DEFAULT_FAIL_ON_UNSUPPORTED_FIELD;
210
211 XContentParser.Token token;
212 String currentFieldName = null;
213 while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
214 if (token == XContentParser.Token.FIELD_NAME) {
215 currentFieldName = parser.currentName();
216 } else if (token.isValue()) {
217 if (LIKE_TEXT.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
218 likeText = parser.text();
219 } else if (MAX_QUERY_TERMS.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
220 maxNumTerms = parser.intValue();
221 } else if (IGNORE_TF.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
222 ignoreTF = parser.booleanValue();
223 } else if (FUZZINESS.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
224 fuzziness = Fuzziness.parse(parser);
225 } else if (PREFIX_LENGTH.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
226 prefixLength = parser.intValue();
227 } else if (ANALYZER.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
228 analyzer = parser.text();
229 } else if (FAIL_ON_UNSUPPORTED_FIELD.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
230 failOnUnsupportedField = parser.booleanValue();
231 } else {
232 throw new ParsingException(parser.getTokenLocation(), "[flt] query does not support [" + currentFieldName + "]");
233 }
234 } else if (token == XContentParser.Token.START_ARRAY) {
235 if (FIELDS.match(currentFieldName, LoggingDeprecationHandler.INSTANCE)) {
236 fields = new ArrayList<>();
237 while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
238 fields.add(parser.text());
239 }
240 if (fields.isEmpty()) {
241 throw new ParsingException(parser.getTokenLocation(), "fuzzy_like_this requires 'fields' to be non-empty");
242 }
243 } else {
244 throw new ParsingException(parser.getTokenLocation(), "[flt] query does not support [" + currentFieldName + "]");
245 }
246 }
247 }
248
249 if (likeText == null) {
250 throw new ParsingException(parser.getTokenLocation(), "fuzzy_like_this requires 'like_text' to be specified");
251 }
252
253 String[] fs = fields != null ? fields.toArray(new String[0]) : null;
254
255 FuzzyLikeThisQueryBuilder builder = new FuzzyLikeThisQueryBuilder(fs, likeText);
256
257 builder.analyzer(analyzer)
258 .fuzziness(fuzziness)
259 .ignoreTF(ignoreTF)
260 .maxQueryTerms(maxNumTerms)
261 .prefixLength(prefixLength)
262 .failOnUnsupportedField(failOnUnsupportedField);
263
264 return builder;
265 }
266
267 @Override
268 protected Query doToQuery(QueryShardContext context) {
269 final List<String> fields;
270 if (this.fields == null) {
271 fields = context.defaultFields();
272 } else {
273 fields = Arrays.stream(this.fields)
274 .filter(x -> context.fieldMapper(x) != null)
275 .filter(x -> context.fieldMapper(x).isSearchable())
276 .filter(x -> SUPPORTED_TYPES.contains(context.fieldMapper(x).typeName()))
277 .map(x -> context.fieldMapper(x).name())
278 .collect(Collectors.toList());
279 if (fields.isEmpty()) {
280 throw new QueryShardException(context, "fuzzy_like_this all provided fields are unknown or not tonized");
281 }
282
283 if (failOnUnsupportedField && fields.size() != this.fields.length) {
284 List<String> unsupportedFields = Stream.of(this.fields)
285 .filter(x -> !fields.contains(x))
286 .collect(Collectors.toList());
287 throw new QueryShardException(context, "fuzzy_like_this some fields are either unknown/untokenized/non-text: {}", unsupportedFields);
288 }
289 }
290
291 final Analyzer analyzer;
292 if (this.analyzer == null) {
293 analyzer = context.getMapperService().searchAnalyzer();
294 } else {
295 analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer);
296 }
297
298 FuzzyLikeThisQuery query = new FuzzyLikeThisQuery(maxQueryTerms, analyzer);
299 int maxDist = fuzziness.asDistance();
300 for (String field : fields) {
301 query.addTerms(likeText, field, maxDist, prefixLength);
302 }
303 query.setIgnoreTF(ignoreTF);
304 return query;
305 }
306
307 @Override
308 protected boolean doEquals(FuzzyLikeThisQueryBuilder other) {
309 return Objects.equals(this.analyzer, other.analyzer)
310 && Arrays.equals(this.fields, other.fields)
311 && Objects.equals(this.failOnUnsupportedField, other.failOnUnsupportedField)
312 && Objects.equals(this.fuzziness, other.fuzziness)
313 && Objects.equals(this.ignoreTF, other.ignoreTF)
314 && Objects.equals(this.likeText, other.likeText)
315 && Objects.equals(this.maxQueryTerms, other.maxQueryTerms)
316 && Objects.equals(this.prefixLength, other.prefixLength);
317 }
318
319 @Override
320 protected int doHashCode() {
321 return Objects.hash(analyzer, fields, failOnUnsupportedField, fuzziness,
322 ignoreTF, likeText, maxQueryTerms, prefixLength);
323 }
324 }