Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
97.70% covered (success)
97.70%
85 / 87
75.00% covered (warning)
75.00%
6 / 8
CRAP
0.00% covered (danger)
0.00%
0 / 1
CustomMatchFeature
97.70% covered (success)
97.70%
85 / 87
75.00% covered (warning)
75.00%
6 / 8
23
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getKeywords
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 doApply
93.75% covered (success)
93.75%
15 / 16
0.00% covered (danger)
0.00%
0 / 1
3.00
 applyFunctionScore
92.86% covered (success)
92.86%
13 / 14
0.00% covered (danger)
0.00%
0 / 1
3.00
 combineQueries
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
2
 parseValue
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
2
 parseSearchString
100.00% covered (success)
100.00%
21 / 21
100.00% covered (success)
100.00%
1 / 1
7
 generateParameterizedQueries
100.00% covered (success)
100.00%
19 / 19
100.00% covered (success)
100.00%
1 / 1
4
1<?php
2
3namespace Wikibase\MediaInfo\Search\Feature;
4
5use CirrusSearch\Query\SimpleKeywordFeature;
6use CirrusSearch\Search\SearchContext;
7use CirrusSearch\WarningCollector;
8use Elastica\Query\AbstractQuery;
9use Elastica\Query\BoolQuery;
10use Elastica\Query\FunctionScore;
11use Elastica\Query\MatchQuery;
12use Elastica\Script\Script;
13use RuntimeException;
14
15/**
16 * Handles the search keyword 'custommatch:'
17 *
18 * Allows the user to search using custom-configured Match queries. The user supplies a name for
19 * the custom profile plus a search term, and a Bool query is created comprising Matches on the
20 * fields specified in the config for the custom profile
21 *
22 * The custom profiles are specified in config like this
23 *     $wgCirrusSearchCustomMatchFeature = [
24 *         string $profileName => [
25 *             'fields' => [
26 *                 string $fieldName => [
27 *                     [ 'prefix' => string $prefix, 'boost' => float $boost ],
28 *                     ...
29 *                 ],
30 *                 ...
31 *             ],
32 *         ]
33 *         ...
34 *     ];
35 *
36 * So for example if we search using `custommatch:depicts_or_linked_from=Q999` and the config
37 * looks like this:
38 *     [
39 *         'depicts_or_linked_from' => [
40 *             'fields' => [
41 *                 'statement_keywords' => [
42 *                     [ 'prefix' => 'P180=', 'boost' => 10 ],
43 *                 ],
44 *                 'weighted_tags' => [
45 *                     [ 'prefix' => 'image.linked.from.wikidata.p18/', 'boost' => 9 ],
46 *                     [ 'prefix' => 'image.linked.from.wikidata.p373/', 'boost' => 8 ],
47 *                 ]
48 *             ]
49 *         ]
50 *     ];
51 *
52 * Then the resulting query will look like this:
53 *     {
54 *         "query": {
55 *             "bool": {
56 *                 "should": [
57 *                     {
58 *                         "match": {
59 *                             "statement_keywords": {
60 *                                 "query": "P180=Q999",
61 *                                 "boost": 10
62 *                             }
63 *                         }
64 *                     },
65 *                     {
66 *                         "match": {
67 *                             "weighted_tags": {
68 *                                 "query": "image.linked.from.wikidata.p18\/Q999",
69 *                                 "boost": 9
70 *                             }
71 *                         }
72 *                     },
73 *                     {
74 *                         "match": {
75 *                             "weighted_tags": {
76 *                                 "query": "image.linked.from.wikidata.p373\/Q999",
77 *                                 "boost": 8
78 *                             }
79 *                         }
80 *                     },
81 *                 ]
82 *             }
83 *         }
84 *     }
85 *
86 * A profile can also have a function score associated with it. Configuration looks like this:
87 *     string $profileName => [
88 *         'fields' => [ ... ],
89 *         'functionScore' => [
90 *             'scriptCode' => string $code,
91 *             'params' => [],
92 *         ]
93 *     ]
94 *
95 * In our previous example if we have the following config for the profile 'depicts_or_linked_from':
96 *     [
97 *         'depicts_or_linked_from' => [
98 *             'fields' => [ (as above) ],
99 *             'functionScore' => [
100 *                 'scriptCode' => '100 / ( 1 + exp( -1 * ( _score + intercept ) ) )',
101 *                 'params' => [ 'intercept' => -1.11111 ]
102 *             ]
103 *         ]
104 *     ];
105 *
106 * ... then the resulting query will look like this:
107 *     {
108 *         "function_score": {
109 *             "query": { (as above) },
110 *             "functions": [
111 *                 {
112 *                     "script_score": {
113 *                         "script": {
114 *                             "source": "100 \/ ( 1 + exp( -1 * ( _score + intercept ) ) )",
115 *                             "params": {
116 *                                 "intercept": -1.11111
117 *                             },
118 *                             "lang": "expression"
119 *                         }
120 *                     }
121 *                 }
122 *             ]
123 *         }
124 *     }
125 *
126 * @uses CirrusSearch
127 * @see https://phabricator.wikimedia.org/T296309
128 */
129class CustomMatchFeature extends SimpleKeywordFeature {
130
131    /** @var array */
132    private $featureConfig;
133
134    public function __construct( array $featureConfig ) {
135        $this->featureConfig = $featureConfig;
136    }
137
138    /**
139     * @return string[]
140     */
141    protected function getKeywords() {
142        return [ 'custommatch' ];
143    }
144
145    /**
146     * @param SearchContext $context
147     * @param string $key The keyword
148     * @param string $value The value attached to the keyword with quotes stripped
149     * @param string $quotedValue The original value in the search string, including quotes if used
150     * @param bool $negated Is the search negated? Not used to generate the returned AbstractQuery,
151     *  that will be negated as necessary. Used for any other building/context necessary.
152     * @return array Two element array, first an AbstractQuery or null to apply to the
153     *  query. Second a boolean indicating if the quotedValue should be kept in the search
154     *  string.
155     */
156    protected function doApply( SearchContext $context, $key, $value, $quotedValue, $negated ) {
157        $queries = $this->parseValue(
158            $key,
159            $value,
160            $quotedValue,
161            '',
162            '',
163            $context
164        );
165        if ( count( $queries ) === 0 ) {
166            $context->setResultsPossible( false );
167            return [ null, false ];
168        }
169        $query = $this->combineQueries( $queries );
170        if ( !$negated ) {
171            $context->addNonTextQuery( $query );
172            return [ null, false ];
173        } else {
174            return [ $query, false ];
175        }
176    }
177
178    /**
179     * Wraps query in a FunctionScore
180     *
181     * @param string $profileName
182     * @param AbstractQuery $query
183     * @return AbstractQuery
184     */
185    private function applyFunctionScore( string $profileName, AbstractQuery $query
186    ): AbstractQuery {
187        $config = $this->featureConfig[$profileName]['functionScore'] ?? null;
188        if ( $config === null ) {
189            return $query;
190        }
191        if ( !isset( $config['scriptCode'] ) ) {
192            return $query;
193        }
194
195        return ( new FunctionScore() )
196            ->setQuery( $query )
197            ->addScriptScoreFunction(
198                new Script(
199                    $config['scriptCode'],
200                    $config['params'] ?? [],
201                    'expression'
202                )
203            );
204    }
205
206    /**
207     * Combines parameterized queries into single query containing MatchQuery objects
208     *
209     * @param string[][] $queries queries to combine. See generateParameterizedQueries() for fields.
210     * @return AbstractQuery
211     */
212    private function combineQueries( array $queries ): AbstractQuery {
213        $profileName = '';
214        $return = new BoolQuery();
215        $return->setMinimumShouldMatch( 1 );
216        foreach ( $queries as $query ) {
217            $return->addShould( new MatchQuery(
218                $query['field'],
219                [ 'query' => $query['string'], 'boost' => $query['boost'] ]
220            ) );
221            $profileName = $query['profileName'];
222        }
223        return $this->applyFunctionScore( $profileName, $return );
224    }
225
226    /**
227     * @param string $key
228     * @param string $value
229     * @param string $quotedValue
230     * @param string $valueDelimiter
231     * @param string $suffix
232     * @param WarningCollector $warningCollector
233     * @return array [
234     *         [
235     *             'field' => document field to run the query against,
236     *             'string' => string to search for,
237     *             'weight' => the boost for the query
238     *         ],
239     *         ...
240     *     ]
241     */
242    public function parseValue(
243        $key,
244        $value,
245        $quotedValue,
246        $valueDelimiter,
247        $suffix,
248        WarningCollector $warningCollector
249    ) {
250        $parsedSearchString = $this->parseSearchString( $value, $key, $warningCollector );
251        if ( $parsedSearchString === null ) {
252            return [];
253        }
254        return $this->generateParameterizedQueries(
255            $parsedSearchString['profileName'], $parsedSearchString['searchTerm'] );
256    }
257
258    /**
259     * We expect the search string to be in the form <profile name>=<search term>. This function
260     * checks the format and if it's ok returns an array with the profile name and search
261     * terms separated
262     *
263     * @param string $searchString
264     * @param string $keyword
265     * @param WarningCollector|null $warningCollector
266     * @return array|null
267     */
268    private function parseSearchString( string $searchString, string $keyword = '',
269                                        WarningCollector $warningCollector = null
270    ): ?array {
271        if ( !preg_match( '/^(\w+)=(.+)$/i', $searchString, $matches ) ) {
272            if ( $warningCollector !== null ) {
273                $warningCollector->addWarning(
274                    'wikibasemediainfo-custommatch-feature-invalid-term',
275                    $keyword
276                );
277            }
278            return null;
279        }
280        $profileName = $matches[1];
281        $searchTerm = $matches[2];
282        if ( !isset( $this->featureConfig[ $profileName ] ) ) {
283            if ( $warningCollector !== null ) {
284                $warningCollector->addWarning( 'wikibasemediainfo-custommatch-feature-no-profile',
285                    $profileName );
286            }
287            return null;
288        }
289        if ( !isset( $this->featureConfig[ $profileName ][ 'fields' ] ) ||
290             !is_array( $this->featureConfig[ $profileName ][ 'fields' ] )
291        ) {
292            throw new RuntimeException( 'The CustomMatch cirrussearch feature is misconfigured' );
293        }
294        return [
295            'profileName' => $profileName,
296            'searchTerm' => $searchTerm,
297        ];
298    }
299
300    private function generateParameterizedQueries( string $profileName, string $searchTerm
301    ): array {
302        $queries = [];
303        foreach ( $this->featureConfig[ $profileName ][ 'fields' ] as $field => $config ) {
304            if ( is_array( $config ) ) {
305                foreach ( $config as $configRow ) {
306                    $prefix = $configRow['prefix'] ?? '';
307                    $boost = $configRow['boost'] ?? 1;
308                    $queries[] = [
309                        'field' => $field,
310                        'string' => $prefix . $searchTerm,
311                        'boost' => $boost,
312                        'profileName' => $profileName,
313                    ];
314                }
315            } else {
316                $queries[] = [
317                    'field' => $config,
318                    'string' => $searchTerm,
319                    'boost' => 1,
320                    'profileName' => $profileName,
321                ];
322            }
323        }
324        return $queries;
325    }
326}