Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
97.70% |
85 / 87 |
|
75.00% |
6 / 8 |
CRAP | |
0.00% |
0 / 1 |
CustomMatchFeature | |
97.70% |
85 / 87 |
|
75.00% |
6 / 8 |
23 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getKeywords | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
doApply | |
93.75% |
15 / 16 |
|
0.00% |
0 / 1 |
3.00 | |||
applyFunctionScore | |
92.86% |
13 / 14 |
|
0.00% |
0 / 1 |
3.00 | |||
combineQueries | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
parseValue | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
parseSearchString | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
7 | |||
generateParameterizedQueries | |
100.00% |
19 / 19 |
|
100.00% |
1 / 1 |
4 |
1 | <?php |
2 | |
3 | namespace Wikibase\MediaInfo\Search\Feature; |
4 | |
5 | use CirrusSearch\Query\SimpleKeywordFeature; |
6 | use CirrusSearch\Search\SearchContext; |
7 | use CirrusSearch\WarningCollector; |
8 | use Elastica\Query\AbstractQuery; |
9 | use Elastica\Query\BoolQuery; |
10 | use Elastica\Query\FunctionScore; |
11 | use Elastica\Query\MatchQuery; |
12 | use Elastica\Script\Script; |
13 | use RuntimeException; |
14 | |
15 | /** |
16 | * Handles the search keyword 'custommatch:' |
17 | * |
18 | * Allows the user to search using custom-configured Match queries. The user supplies a name for |
19 | * the custom profile plus a search term, and a Bool query is created comprising Matches on the |
20 | * fields specified in the config for the custom profile |
21 | * |
22 | * The custom profiles are specified in config like this |
23 | * $wgCirrusSearchCustomMatchFeature = [ |
24 | * string $profileName => [ |
25 | * 'fields' => [ |
26 | * string $fieldName => [ |
27 | * [ 'prefix' => string $prefix, 'boost' => float $boost ], |
28 | * ... |
29 | * ], |
30 | * ... |
31 | * ], |
32 | * ] |
33 | * ... |
34 | * ]; |
35 | * |
36 | * So for example if we search using `custommatch:depicts_or_linked_from=Q999` and the config |
37 | * looks like this: |
38 | * [ |
39 | * 'depicts_or_linked_from' => [ |
40 | * 'fields' => [ |
41 | * 'statement_keywords' => [ |
42 | * [ 'prefix' => 'P180=', 'boost' => 10 ], |
43 | * ], |
44 | * 'weighted_tags' => [ |
45 | * [ 'prefix' => 'image.linked.from.wikidata.p18/', 'boost' => 9 ], |
46 | * [ 'prefix' => 'image.linked.from.wikidata.p373/', 'boost' => 8 ], |
47 | * ] |
48 | * ] |
49 | * ] |
50 | * ]; |
51 | * |
52 | * Then the resulting query will look like this: |
53 | * { |
54 | * "query": { |
55 | * "bool": { |
56 | * "should": [ |
57 | * { |
58 | * "match": { |
59 | * "statement_keywords": { |
60 | * "query": "P180=Q999", |
61 | * "boost": 10 |
62 | * } |
63 | * } |
64 | * }, |
65 | * { |
66 | * "match": { |
67 | * "weighted_tags": { |
68 | * "query": "image.linked.from.wikidata.p18\/Q999", |
69 | * "boost": 9 |
70 | * } |
71 | * } |
72 | * }, |
73 | * { |
74 | * "match": { |
75 | * "weighted_tags": { |
76 | * "query": "image.linked.from.wikidata.p373\/Q999", |
77 | * "boost": 8 |
78 | * } |
79 | * } |
80 | * }, |
81 | * ] |
82 | * } |
83 | * } |
84 | * } |
85 | * |
86 | * A profile can also have a function score associated with it. Configuration looks like this: |
87 | * string $profileName => [ |
88 | * 'fields' => [ ... ], |
89 | * 'functionScore' => [ |
90 | * 'scriptCode' => string $code, |
91 | * 'params' => [], |
92 | * ] |
93 | * ] |
94 | * |
95 | * In our previous example if we have the following config for the profile 'depicts_or_linked_from': |
96 | * [ |
97 | * 'depicts_or_linked_from' => [ |
98 | * 'fields' => [ (as above) ], |
99 | * 'functionScore' => [ |
100 | * 'scriptCode' => '100 / ( 1 + exp( -1 * ( _score + intercept ) ) )', |
101 | * 'params' => [ 'intercept' => -1.11111 ] |
102 | * ] |
103 | * ] |
104 | * ]; |
105 | * |
106 | * ... then the resulting query will look like this: |
107 | * { |
108 | * "function_score": { |
109 | * "query": { (as above) }, |
110 | * "functions": [ |
111 | * { |
112 | * "script_score": { |
113 | * "script": { |
114 | * "source": "100 \/ ( 1 + exp( -1 * ( _score + intercept ) ) )", |
115 | * "params": { |
116 | * "intercept": -1.11111 |
117 | * }, |
118 | * "lang": "expression" |
119 | * } |
120 | * } |
121 | * } |
122 | * ] |
123 | * } |
124 | * } |
125 | * |
126 | * @uses CirrusSearch |
127 | * @see https://phabricator.wikimedia.org/T296309 |
128 | */ |
129 | class CustomMatchFeature extends SimpleKeywordFeature { |
130 | |
131 | /** @var array */ |
132 | private $featureConfig; |
133 | |
134 | public function __construct( array $featureConfig ) { |
135 | $this->featureConfig = $featureConfig; |
136 | } |
137 | |
138 | /** |
139 | * @return string[] |
140 | */ |
141 | protected function getKeywords() { |
142 | return [ 'custommatch' ]; |
143 | } |
144 | |
145 | /** |
146 | * @param SearchContext $context |
147 | * @param string $key The keyword |
148 | * @param string $value The value attached to the keyword with quotes stripped |
149 | * @param string $quotedValue The original value in the search string, including quotes if used |
150 | * @param bool $negated Is the search negated? Not used to generate the returned AbstractQuery, |
151 | * that will be negated as necessary. Used for any other building/context necessary. |
152 | * @return array Two element array, first an AbstractQuery or null to apply to the |
153 | * query. Second a boolean indicating if the quotedValue should be kept in the search |
154 | * string. |
155 | */ |
156 | protected function doApply( SearchContext $context, $key, $value, $quotedValue, $negated ) { |
157 | $queries = $this->parseValue( |
158 | $key, |
159 | $value, |
160 | $quotedValue, |
161 | '', |
162 | '', |
163 | $context |
164 | ); |
165 | if ( count( $queries ) === 0 ) { |
166 | $context->setResultsPossible( false ); |
167 | return [ null, false ]; |
168 | } |
169 | $query = $this->combineQueries( $queries ); |
170 | if ( !$negated ) { |
171 | $context->addNonTextQuery( $query ); |
172 | return [ null, false ]; |
173 | } else { |
174 | return [ $query, false ]; |
175 | } |
176 | } |
177 | |
178 | /** |
179 | * Wraps query in a FunctionScore |
180 | * |
181 | * @param string $profileName |
182 | * @param AbstractQuery $query |
183 | * @return AbstractQuery |
184 | */ |
185 | private function applyFunctionScore( string $profileName, AbstractQuery $query |
186 | ): AbstractQuery { |
187 | $config = $this->featureConfig[$profileName]['functionScore'] ?? null; |
188 | if ( $config === null ) { |
189 | return $query; |
190 | } |
191 | if ( !isset( $config['scriptCode'] ) ) { |
192 | return $query; |
193 | } |
194 | |
195 | return ( new FunctionScore() ) |
196 | ->setQuery( $query ) |
197 | ->addScriptScoreFunction( |
198 | new Script( |
199 | $config['scriptCode'], |
200 | $config['params'] ?? [], |
201 | 'expression' |
202 | ) |
203 | ); |
204 | } |
205 | |
206 | /** |
207 | * Combines parameterized queries into single query containing MatchQuery objects |
208 | * |
209 | * @param string[][] $queries queries to combine. See generateParameterizedQueries() for fields. |
210 | * @return AbstractQuery |
211 | */ |
212 | private function combineQueries( array $queries ): AbstractQuery { |
213 | $profileName = ''; |
214 | $return = new BoolQuery(); |
215 | $return->setMinimumShouldMatch( 1 ); |
216 | foreach ( $queries as $query ) { |
217 | $return->addShould( new MatchQuery( |
218 | $query['field'], |
219 | [ 'query' => $query['string'], 'boost' => $query['boost'] ] |
220 | ) ); |
221 | $profileName = $query['profileName']; |
222 | } |
223 | return $this->applyFunctionScore( $profileName, $return ); |
224 | } |
225 | |
226 | /** |
227 | * @param string $key |
228 | * @param string $value |
229 | * @param string $quotedValue |
230 | * @param string $valueDelimiter |
231 | * @param string $suffix |
232 | * @param WarningCollector $warningCollector |
233 | * @return array [ |
234 | * [ |
235 | * 'field' => document field to run the query against, |
236 | * 'string' => string to search for, |
237 | * 'weight' => the boost for the query |
238 | * ], |
239 | * ... |
240 | * ] |
241 | */ |
242 | public function parseValue( |
243 | $key, |
244 | $value, |
245 | $quotedValue, |
246 | $valueDelimiter, |
247 | $suffix, |
248 | WarningCollector $warningCollector |
249 | ) { |
250 | $parsedSearchString = $this->parseSearchString( $value, $key, $warningCollector ); |
251 | if ( $parsedSearchString === null ) { |
252 | return []; |
253 | } |
254 | return $this->generateParameterizedQueries( |
255 | $parsedSearchString['profileName'], $parsedSearchString['searchTerm'] ); |
256 | } |
257 | |
258 | /** |
259 | * We expect the search string to be in the form <profile name>=<search term>. This function |
260 | * checks the format and if it's ok returns an array with the profile name and search |
261 | * terms separated |
262 | * |
263 | * @param string $searchString |
264 | * @param string $keyword |
265 | * @param WarningCollector|null $warningCollector |
266 | * @return array|null |
267 | */ |
268 | private function parseSearchString( string $searchString, string $keyword = '', |
269 | WarningCollector $warningCollector = null |
270 | ): ?array { |
271 | if ( !preg_match( '/^(\w+)=(.+)$/i', $searchString, $matches ) ) { |
272 | if ( $warningCollector !== null ) { |
273 | $warningCollector->addWarning( |
274 | 'wikibasemediainfo-custommatch-feature-invalid-term', |
275 | $keyword |
276 | ); |
277 | } |
278 | return null; |
279 | } |
280 | $profileName = $matches[1]; |
281 | $searchTerm = $matches[2]; |
282 | if ( !isset( $this->featureConfig[ $profileName ] ) ) { |
283 | if ( $warningCollector !== null ) { |
284 | $warningCollector->addWarning( 'wikibasemediainfo-custommatch-feature-no-profile', |
285 | $profileName ); |
286 | } |
287 | return null; |
288 | } |
289 | if ( !isset( $this->featureConfig[ $profileName ][ 'fields' ] ) || |
290 | !is_array( $this->featureConfig[ $profileName ][ 'fields' ] ) |
291 | ) { |
292 | throw new RuntimeException( 'The CustomMatch cirrussearch feature is misconfigured' ); |
293 | } |
294 | return [ |
295 | 'profileName' => $profileName, |
296 | 'searchTerm' => $searchTerm, |
297 | ]; |
298 | } |
299 | |
300 | private function generateParameterizedQueries( string $profileName, string $searchTerm |
301 | ): array { |
302 | $queries = []; |
303 | foreach ( $this->featureConfig[ $profileName ][ 'fields' ] as $field => $config ) { |
304 | if ( is_array( $config ) ) { |
305 | foreach ( $config as $configRow ) { |
306 | $prefix = $configRow['prefix'] ?? ''; |
307 | $boost = $configRow['boost'] ?? 1; |
308 | $queries[] = [ |
309 | 'field' => $field, |
310 | 'string' => $prefix . $searchTerm, |
311 | 'boost' => $boost, |
312 | 'profileName' => $profileName, |
313 | ]; |
314 | } |
315 | } else { |
316 | $queries[] = [ |
317 | 'field' => $config, |
318 | 'string' => $searchTerm, |
319 | 'boost' => 1, |
320 | 'profileName' => $profileName, |
321 | ]; |
322 | } |
323 | } |
324 | return $queries; |
325 | } |
326 | } |