View Javadoc
1   package org.wikimedia.search.extra.levenshtein;
2   
3   import java.util.Objects;
4   
5   import javax.annotation.Nullable;
6   
7   import org.apache.lucene.index.LeafReaderContext;
8   import org.apache.lucene.search.Explanation;
9   import org.apache.lucene.search.spell.LevenshteinDistance;
10  import org.elasticsearch.ElasticsearchException;
11  import org.elasticsearch.common.lucene.search.function.CombineFunction;
12  import org.elasticsearch.common.lucene.search.function.LeafScoreFunction;
13  import org.elasticsearch.common.lucene.search.function.ScoreFunction;
14  import org.elasticsearch.index.mapper.MappedFieldType;
15  import org.elasticsearch.search.lookup.FieldLookup;
16  import org.elasticsearch.search.lookup.LeafSearchLookup;
17  import org.elasticsearch.search.lookup.SearchLookup;
18  
19  /**
20   * Function score based on levenshtein distance.
21   * This function is slow because it loads string field data for <b>each</b> doc.
22   * Permits to replace the inline groovy script :
23   * <pre>
24   * return new LevensteinDistance().getDistance(srctxt, _source['content'])
25   * </pre>
26   * used by the Translate extension.
27   */
28  public class LevenshteinDistanceScore extends ScoreFunction {
29      private final MappedFieldType fieldType;
30      private final String value;
31      private final SearchLookup lookup;
32      @Nullable private final String missing;
33      private final LevenshteinDistance levenshtein = new LevenshteinDistance();
34  
35      public LevenshteinDistanceScore(SearchLookup lookup, MappedFieldType fieldType, String value, @Nullable String missing) {
36          super(CombineFunction.REPLACE);
37          this.fieldType = fieldType;
38          this.value = value;
39          this.lookup = lookup;
40          this.missing = missing;
41      }
42  
43      /**
44       * NOTE: Very slow.
45       *
46       * Loads field data from stored fields or source if not stored
47       * @return the field data
48       * @throws ElasticsearchException if the data is not found or if it's not a string.
49       */
50      private String loadValue(LeafSearchLookup leafLookup) {
51          Object value = null;
52          if (!fieldType.isStored()) {
53              value = leafLookup.source().get(fieldType.name());
54          } else {
55              FieldLookup fl = (FieldLookup) leafLookup.fields().get(fieldType.name());
56              if (fl != null) {
57                  value = fl.getValue();
58              }
59          }
60          if (value == null) {
61              if (missing == null) {
62                  throw new ElasticsearchException(fieldType.name() + " is null");
63              } else {
64                  return missing;
65              }
66          }
67          if (!(value instanceof String)) {
68              throw new ElasticsearchException("Expected String for " + fieldType.name() + ", got " + value.getClass().getName() + " instead");
69          }
70          return (String) value;
71      }
72  
73      @Override
74      public LeafScoreFunction getLeafScoreFunction(final LeafReaderContext ctx) {
75          final LeafSearchLookup leafLookup = lookup.getLeafSearchLookup(ctx);
76          return new LeafScoreFunction() {
77              @Override
78              public double score(int docId, float subQueryScore) {
79                  leafLookup.setDocument(docId);
80                  String fieldValue = loadValue(leafLookup);
81                  return levenshtein.getDistance(value, fieldValue);
82              }
83  
84              @Override
85              public Explanation explainScore(int docId, Explanation subQueryScore) {
86                  double score = score(docId, subQueryScore.getValue().floatValue());
87                  String explanation = "LevenshteinDistanceScore";
88                  explanation += " with parameters:\n text:" + value;
89                  explanation += "\n field value : " + loadValue(leafLookup);
90  
91                  Explanation scoreExp = Explanation.match(subQueryScore.getValue(), "_score: ", subQueryScore);
92                  return Explanation.match((float) score, explanation, scoreExp);
93              }
94          };
95      }
96  
97      @Override
98      public boolean needsScores() {
99          return false;
100     }
101 
102     @Override
103     protected boolean doEquals(ScoreFunction other) {
104         // class equality is checked in super.equals();
105         LevenshteinDistanceScore o = (LevenshteinDistanceScore) other;
106         return Objects.equals(fieldType, o.fieldType) &&
107                 Objects.equals(this.value, o.value) &&
108                 Objects.equals(this.missing, o.missing);
109 
110     }
111 
112     public MappedFieldType getFieldType() {
113         return fieldType;
114     }
115 
116     public String getValue() {
117         return value;
118     }
119 
120     @Nullable
121     public String getMissing() {
122         return missing;
123     }
124 
125     @Override
126     protected int doHashCode() {
127         return Objects.hash(fieldType, value, missing);
128     }
129 }