Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 72
0.00% covered (danger)
0.00%
0 / 3
CRAP
0.00% covered (danger)
0.00%
0 / 1
WordsQueryNodeHandler
0.00% covered (danger)
0.00%
0 / 72
0.00% covered (danger)
0.00%
0 / 3
72
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 30
0.00% covered (danger)
0.00%
0 / 1
6
 transform
0.00% covered (danger)
0.00%
0 / 36
0.00% covered (danger)
0.00%
0 / 1
30
 getTermScoringFieldQueryBuilder
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace Wikibase\MediaInfo\Search\ASTQueryBuilder;
4
5use CirrusSearch\Parser\AST\WordsQueryNode;
6use Elastica\Query\AbstractQuery;
7use Elastica\Query\BoolQuery;
8use Elastica\Query\DisMax;
9use Elastica\Query\MatchQuery;
10use Elastica\Query\MultiMatch;
11
12class WordsQueryNodeHandler implements ParsedNodeHandlerInterface {
13    /** @var WordsQueryNode */
14    private $node;
15
16    /** @var WikibaseEntitiesHandler */
17    private $entitiesHandler;
18
19    /** @var float[] */
20    private $decays;
21
22    /** @var FieldIterator[] */
23    private $termScoringFieldIterators;
24
25    public function __construct(
26        WordsQueryNode $node,
27        WikibaseEntitiesHandler $entitiesHandler,
28        array $languages,
29        array $synonyms,
30        array $synonymsLanguages,
31        array $stemmingSettings,
32        array $boosts,
33        array $decays
34    ) {
35        $fulltextBoosts = array_intersect_key(
36            $boosts,
37            array_flip(
38                array_merge( FieldIterator::LANGUAGE_AGNOSTIC_FIELDS,
39                    FieldIterator::LANGUAGE_AWARE_FIELDS )
40            )
41        );
42        $this->node = $node;
43        $this->entitiesHandler = $entitiesHandler;
44        $this->decays = $decays;
45
46        $this->termScoringFieldIterators[$node->getWords()] = new FieldIterator(
47            $this->getTermScoringFieldQueryBuilder( $node->getWords() ),
48            array_keys( $fulltextBoosts ),
49            $languages,
50            $stemmingSettings,
51            $fulltextBoosts,
52            $decays
53        );
54
55        // create iterators for all synonyms, where the scores are applied to the boost
56        foreach ( $synonyms as $term => $score ) {
57            $termLanguages = $synonymsLanguages[$term] ?? [];
58            $this->termScoringFieldIterators[$term] = new FieldIterator(
59                $this->getTermScoringFieldQueryBuilder( $term ),
60                array_keys( $fulltextBoosts ),
61                $termLanguages,
62                $stemmingSettings,
63                array_map( static function ( $boost ) use ( $score ) {
64                    return $boost * $score;
65                }, $fulltextBoosts ),
66                $decays
67            );
68        }
69    }
70
71    public function transform(): AbstractQuery {
72        // we (may) have multiple terms to match (the original search term,
73        // but also synonyms), which we'll wrap them all in a dis_max to
74        // ensure that the scores don't spiral out of control and grow too
75        // large with too many synonyms
76        // that said, if/when a document matches multiple synonyms, that's
77        // a fairly strong indication that it's a pretty good match for the
78        // subject, so we'll add a tiebreaker to allow some additional boost
79        // (though these additional matches won't be worth as much)
80        $termsDisMax = new DisMax();
81        $termsDisMax->setTieBreaker( $this->decays['synonyms'] ?? 0 );
82
83        // search term
84        $termQuery = new BoolQuery();
85        $termQuery->setMinimumShouldMatch( 0 );
86        $termQuery->addFilter(
87            ( new MultiMatch() )
88                ->setQuery( $this->node->getWords() )
89                ->setFields( [ 'all', 'all.plain' ] )
90                ->setOperator( MultiMatch::OPERATOR_AND )
91        );
92        // build a boolquery that matches all fields for a given term
93        foreach ( $this->termScoringFieldIterators[$this->node->getWords()] as $fieldQuery ) {
94            $termQuery->addShould( $fieldQuery );
95        }
96        // add term query (filter + normalized scoring clause per field) to global boolquery
97        $termsDisMax->addQuery( $termQuery );
98
99        // synonyms for search term
100        // this is very similar as with the original search term above,
101        // except that we'll be more strict in the filter & expect a
102        // phrase match
103        // they'll be wrapped inside another dis_max group to make sure
104        // that only the single best synonym can contribute to the score,
105        // because synonyms are often minor variations of similar text
106        // and could lead to massively inflated text matches in such case
107        $synonyms = array_diff( array_keys( $this->termScoringFieldIterators ), [ $this->node->getWords() ] );
108        if ( count( $synonyms ) > 0 ) {
109            $synonymsDisMax = new DisMax();
110            foreach ( $synonyms as $synonym ) {
111                $synonymQuery = new BoolQuery();
112                $synonymQuery->setMinimumShouldMatch( 0 );
113                $synonymQuery->addFilter(
114                    ( new MultiMatch() )
115                        ->setQuery( $synonym )
116                        ->setFields( [ 'all' ] )
117                        // needs to be exact (phrase) match to avoid, as much as
118                        // possible, false positives
119                        ->setType( 'phrase' )
120                );
121                foreach ( $this->termScoringFieldIterators[$synonym] as $fieldQuery ) {
122                    $synonymQuery->addShould( $fieldQuery );
123                }
124                $synonymsDisMax->addQuery( $synonymQuery );
125            }
126            $termsDisMax->addQuery( $synonymsDisMax );
127        }
128
129        $query = new BoolQuery();
130        $query->setMinimumShouldMatch( 1 );
131        // search term + synonyms
132        $query->addShould( $termsDisMax );
133        // wikibase entities
134        $query->addShould( $this->entitiesHandler->transform() );
135
136        return $query;
137    }
138
139    /**
140     * @param string $term
141     * @return FieldQueryBuilderInterface
142     */
143    private function getTermScoringFieldQueryBuilder( $term ): FieldQueryBuilderInterface {
144        return new class( $term ) implements FieldQueryBuilderInterface {
145            /** @var string */
146            private $term;
147
148            public function __construct( $term ) {
149                $this->term = $term;
150            }
151
152            public function getQuery( $field, $boost ): AbstractQuery {
153                return ( new MatchQuery() )
154                    ->setFieldQuery( $field, $this->term )
155                    ->setFieldBoost( $field, $boost );
156            }
157        };
158    }
159}