Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.39% |
61 / 62 |
|
83.33% |
5 / 6 |
CRAP | |
0.00% |
0 / 1 |
LexemeFullTextQueryBuilder | |
98.39% |
61 / 62 |
|
83.33% |
5 / 6 |
11 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
newFromGlobals | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
build | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
buildDegraded | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
buildEntitySearchQuery | |
100.00% |
37 / 37 |
|
100.00% |
1 / 1 |
2 | |||
buildSimpleAllFilter | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 |
1 | <?php |
2 | |
3 | namespace Wikibase\Lexeme\Search\Elastic; |
4 | |
5 | use CirrusSearch\Query\FullTextQueryBuilder; |
6 | use CirrusSearch\Search\SearchContext; |
7 | use Elastica\Query\BoolQuery; |
8 | use Elastica\Query\DisMax; |
9 | use Elastica\Query\MatchQuery; |
10 | use Elastica\Query\Term; |
11 | use MediaWiki\Language\Language; |
12 | use Wikibase\DataModel\Entity\EntityIdParser; |
13 | use Wikibase\Lib\Store\FallbackLabelDescriptionLookupFactory; |
14 | use Wikibase\Repo\WikibaseRepo; |
15 | use Wikibase\Search\Elastic\EntitySearchUtils; |
16 | |
17 | /** |
18 | * Builder for Lexeme fulltext queries |
19 | */ |
20 | class LexemeFullTextQueryBuilder implements FullTextQueryBuilder { |
21 | /** |
22 | * Default profile name for lexemes |
23 | */ |
24 | public const LEXEME_DEFAULT_PROFILE = 'lexeme_fulltext'; |
25 | /** |
26 | * Lexeme fulltext search context name |
27 | */ |
28 | public const CONTEXT_LEXEME_FULLTEXT = 'wikibase_lexeme_fulltext'; |
29 | public const LEXEME_FULL_TEXT_MARKER = 'lexeme_full_text'; |
30 | |
31 | /** |
32 | * @var array |
33 | */ |
34 | private $settings; |
35 | /** |
36 | * @var EntityIdParser |
37 | */ |
38 | private $entityIdParser; |
39 | /** |
40 | * @var Language User language code |
41 | */ |
42 | private $userLanguage; |
43 | /** |
44 | * @var FallbackLabelDescriptionLookupFactory |
45 | */ |
46 | private $lookupFactory; |
47 | |
48 | /** |
49 | * @param array $settings Settings from EntitySearchProfiles.php |
50 | * @param FallbackLabelDescriptionLookupFactory $lookupFactory |
51 | * @param EntityIdParser $entityIdParser |
52 | * @param Language $userLanguage User's display language |
53 | */ |
54 | public function __construct( |
55 | array $settings, |
56 | FallbackLabelDescriptionLookupFactory $lookupFactory, |
57 | EntityIdParser $entityIdParser, |
58 | Language $userLanguage |
59 | ) { |
60 | $this->settings = $settings; |
61 | $this->entityIdParser = $entityIdParser; |
62 | $this->userLanguage = $userLanguage; |
63 | $this->lookupFactory = $lookupFactory; |
64 | } |
65 | |
66 | /** |
67 | * Create fulltext builder from global environment. |
68 | * @param array $settings Configuration from config file |
69 | * @return LexemeFullTextQueryBuilder |
70 | */ |
71 | public static function newFromGlobals( array $settings ) { |
72 | return new static( |
73 | $settings, |
74 | WikibaseRepo::getFallbackLabelDescriptionLookupFactory(), |
75 | WikibaseRepo::getEntityIdParser(), |
76 | WikibaseRepo::getUserLanguage() |
77 | ); |
78 | } |
79 | |
80 | /** |
81 | * Search articles with provided term. |
82 | * |
83 | * @param SearchContext $searchContext |
84 | * @param string $term term to search |
85 | */ |
86 | public function build( SearchContext $searchContext, $term ) { |
87 | if ( $searchContext->areResultsPossible() && !$searchContext->isSpecialKeywordUsed() ) { |
88 | // We use entity search query if we did not find any advanced syntax |
89 | // and the base builder did not reject the query |
90 | $this->buildEntitySearchQuery( $searchContext, $term ); |
91 | } |
92 | // if we did find advanced query, we keep the old setup but change the result type |
93 | // FIXME: make it dispatch by content model |
94 | $searchContext->setResultsType( new LexemeFulltextResult( $this->entityIdParser, |
95 | $this->userLanguage, |
96 | $this->lookupFactory ) ); |
97 | } |
98 | |
99 | /** |
100 | * @param SearchContext $searchContext |
101 | * @return bool |
102 | */ |
103 | public function buildDegraded( SearchContext $searchContext ) { |
104 | // Not doing anything for now |
105 | return false; |
106 | } |
107 | |
108 | /** |
109 | * Build a fulltext query for Wikibase entity. |
110 | * @param SearchContext $searchContext |
111 | * @param string $term Search term |
112 | */ |
113 | protected function buildEntitySearchQuery( SearchContext $searchContext, $term ) { |
114 | $searchContext->setProfileContext( self::CONTEXT_LEXEME_FULLTEXT ); |
115 | $searchContext->addSyntaxUsed( self::LEXEME_FULL_TEXT_MARKER, 10 ); |
116 | /* |
117 | * Overall query structure is as follows: |
118 | * - Bool with: |
119 | * Filter of namespace = N |
120 | * OR (Should with 1 mininmum) of: |
121 | * title.keyword = QUERY |
122 | * lexeme_forms.id = QUERY |
123 | * fulltext match query |
124 | * |
125 | * Fulltext match query is: |
126 | * Filter of: |
127 | * at least one of: all, all.plain matching |
128 | * OR (should with 0 minimum) of: |
129 | * DISMAX query of: {lemma|form}.near_match |
130 | * OR (should with 0 minimum) of: |
131 | * all |
132 | * all.plain |
133 | */ |
134 | |
135 | $profile = $this->settings; |
136 | // $fields is collecting all the fields for dismax query to be used in |
137 | // scoring match |
138 | $fields = [ |
139 | [ "lemma.near_match", $profile['exact'] ], |
140 | [ "lemma.near_match_folded", $profile['folded'] ], |
141 | [ |
142 | "lexeme_forms.representation.near_match", |
143 | $profile['exact'] * $profile['form-discount'], |
144 | ], |
145 | [ |
146 | "lexeme_forms.representation.near_match_folded", |
147 | $profile['folded'] * $profile['form-discount'], |
148 | ], |
149 | ]; |
150 | |
151 | $titleMatch = new Term( [ |
152 | 'title.keyword' => EntitySearchUtils::normalizeId( $term, $this->entityIdParser ), |
153 | ] ); |
154 | // lexeme_forms.id is a lowercase_keyword so use Match to apply the analyzer |
155 | $formIdMatch = new MatchQuery( 'lexeme_forms.id', |
156 | EntitySearchUtils::normalizeId( $term, $this->entityIdParser ) ); |
157 | |
158 | // Main query filter |
159 | $filterQuery = $this->buildSimpleAllFilter( $term ); |
160 | |
161 | // Near match ones, they use constant score |
162 | $nearMatchQuery = new DisMax(); |
163 | $nearMatchQuery->setTieBreaker( $profile['tie-breaker'] ?? 0 ); |
164 | foreach ( $fields as $field ) { |
165 | $nearMatchQuery->addQuery( EntitySearchUtils::makeConstScoreQuery( $field[0], $field[1], |
166 | $term ) ); |
167 | } |
168 | |
169 | // Tokenized ones |
170 | $tokenizedQuery = $this->buildSimpleAllFilter( $term, 'OR', $profile['any'] ); |
171 | |
172 | // Main labels/desc query |
173 | $fullTextQuery = new BoolQuery(); |
174 | $fullTextQuery->addFilter( $filterQuery ); |
175 | $fullTextQuery->addShould( $nearMatchQuery ); |
176 | $fullTextQuery->addShould( $tokenizedQuery ); |
177 | |
178 | // Main query |
179 | $query = new BoolQuery(); |
180 | |
181 | // Match either labels or exact match to title |
182 | $query->addShould( $titleMatch ); |
183 | $query->addShould( $formIdMatch ); |
184 | $query->addShould( $fullTextQuery ); |
185 | $query->setMinimumShouldMatch( 1 ); |
186 | |
187 | $searchContext->setMainQuery( $query ); |
188 | } |
189 | |
190 | /** |
191 | * Builds a simple filter on all and all.plain when all terms must match |
192 | * |
193 | * @param string $query |
194 | * @param string $operator |
195 | * @param null $boost |
196 | * @return BoolQuery |
197 | */ |
198 | private function buildSimpleAllFilter( $query, $operator = 'AND', $boost = null ) { |
199 | $filter = new BoolQuery(); |
200 | // FIXME: We can't use solely the stem field here |
201 | // - Depending on languages it may lack stopwords, |
202 | // A dedicated field used for filtering would be nice |
203 | foreach ( [ 'all', 'all.plain' ] as $field ) { |
204 | $m = new MatchQuery(); |
205 | $m->setFieldQuery( $field, $query ); |
206 | $m->setFieldOperator( $field, $operator ); |
207 | if ( $boost ) { |
208 | $m->setFieldBoost( $field, $boost ); |
209 | } |
210 | $filter->addShould( $m ); |
211 | } |
212 | return $filter; |
213 | } |
214 | |
215 | } |