Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
50.48% |
105 / 208 |
|
9.09% |
1 / 11 |
CRAP | |
0.00% |
0 / 1 |
Hooks | |
50.48% |
105 / 208 |
|
9.09% |
1 / 11 |
203.24 | |
0.00% |
0 / 1 |
onSetupAfterCache | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
onWikibaseRepoEntityTypes | |
42.86% |
3 / 7 |
|
0.00% |
0 / 1 |
2.75 | |||
onCirrusSearchAnalysisConfig | |
0.00% |
0 / 33 |
|
0.00% |
0 / 1 |
20 | |||
onCirrusSearchProfileService | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 | |||
registerArrayProfile | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
registerSearchProfiles | |
82.56% |
71 / 86 |
|
0.00% |
0 / 1 |
4.08 | |||
onCirrusSearchAddQueryFeatures | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
amendSearchResults | |
90.32% |
28 / 31 |
|
0.00% |
0 / 1 |
9.07 | |||
onApiOpenSearchSuggest | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
onSpecialPageInitList | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
getWBCSConfig | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace Wikibase\Search\Elastic; |
4 | |
5 | use CirrusSearch\Maintenance\AnalysisConfigBuilder; |
6 | use CirrusSearch\Parser\BasicQueryClassifier; |
7 | use CirrusSearch\Profile\ArrayProfileRepository; |
8 | use CirrusSearch\Profile\SearchProfileRepositoryTransformer; |
9 | use CirrusSearch\Profile\SearchProfileService; |
10 | use Language; |
11 | use MediaWiki\Config\ConfigException; |
12 | use MediaWiki\MediaWikiServices; |
13 | use RequestContext; |
14 | use Wikibase\DataModel\Entity\EntityIdParsingException; |
15 | use Wikibase\Repo\WikibaseRepo; |
16 | use Wikibase\Search\Elastic\Fields\StatementsField; |
17 | use Wikibase\Search\Elastic\Query\HasDataForLangFeature; |
18 | use Wikibase\Search\Elastic\Query\HasLicenseFeature; |
19 | use Wikibase\Search\Elastic\Query\HasWbStatementFeature; |
20 | use Wikibase\Search\Elastic\Query\InLabelFeature; |
21 | use Wikibase\Search\Elastic\Query\WbStatementQuantityFeature; |
22 | use Wikimedia\Assert\Assert; |
23 | |
24 | /** |
25 | * Hooks for Wikibase search. |
26 | */ |
27 | class Hooks { |
28 | private const LANGUAGE_SELECTOR_PREFIX = "language_selector_prefix"; |
29 | |
30 | /** |
31 | * Setup hook. |
32 | * Enables/disables CirrusSearch depending on request settings. |
33 | */ |
34 | public static function onSetupAfterCache() { |
35 | $request = RequestContext::getMain()->getRequest(); |
36 | $useCirrus = $request->getVal( 'useCirrus' ); |
37 | if ( $useCirrus !== null ) { |
38 | $GLOBALS['wgWBCSUseCirrus'] = wfStringToBool( $useCirrus ); |
39 | } |
40 | $config = self::getWBCSConfig(); |
41 | if ( $config->enabled() ) { |
42 | global $wgCirrusSearchExtraIndexSettings; |
43 | // Bump max fields so that labels/descriptions fields fit in. |
44 | $wgCirrusSearchExtraIndexSettings['index.mapping.total_fields.limit'] = 5000; |
45 | } |
46 | } |
47 | |
48 | /** |
49 | * Adds the definitions relevant for Search to entity types definitions. |
50 | * |
51 | * @see WikibaseSearch.entitytypes.php |
52 | * |
53 | * @param array[] &$entityTypeDefinitions |
54 | */ |
55 | public static function onWikibaseRepoEntityTypes( array &$entityTypeDefinitions ) { |
56 | $wbcsConfig = self::getWBCSConfig(); |
57 | if ( !$wbcsConfig->enabled() ) { |
58 | return; |
59 | } |
60 | $entityTypeDefinitions = wfArrayPlus2d( |
61 | require __DIR__ . '/../WikibaseSearch.entitytypes.php', |
62 | $entityTypeDefinitions |
63 | ); |
64 | } |
65 | |
66 | /** |
67 | * Add Wikibase-specific ElasticSearch analyzer configurations. |
68 | * @param array &$config |
69 | * @param AnalysisConfigBuilder $builder |
70 | */ |
71 | public static function onCirrusSearchAnalysisConfig( &$config, AnalysisConfigBuilder $builder ) { |
72 | if ( defined( 'MW_PHPUNIT_TEST' ) ) { |
73 | return; |
74 | } |
75 | $wbcsConfig = self::getWBCSConfig(); |
76 | if ( !$wbcsConfig->enabled() ) { |
77 | return; |
78 | } |
79 | static $inHook; |
80 | if ( $inHook ) { |
81 | // Do not call this hook repeatedly, since ConfigBuilder calls AnalysisConfigBuilder |
82 | // FIXME: this is not a very nice hack, but we need it because we want AnalysisConfigBuilder |
83 | // to call the hook, since other extensions may make relevant changes to config. |
84 | // We just don't want to run this specific hook again, but Mediawiki API does not have |
85 | // the means to exclude one hook temporarily. |
86 | return; |
87 | } |
88 | |
89 | // Analyzer for splitting statements and extracting properties: |
90 | // P31=Q1234 => P31 |
91 | $config['analyzer']['extract_wb_property'] = [ |
92 | 'type' => 'custom', |
93 | 'tokenizer' => 'split_wb_statements', |
94 | 'filter' => [ 'first_token' ], |
95 | ]; |
96 | $config['tokenizer']['split_wb_statements'] = [ |
97 | 'type' => 'pattern', |
98 | 'pattern' => StatementsField::STATEMENT_SEPARATOR, |
99 | ]; |
100 | $config['filter']['first_token'] = [ |
101 | 'type' => 'limit', |
102 | 'max_token_count' => 1 |
103 | ]; |
104 | |
105 | // Analyzer for extracting quantity data and storing it in a term frequency field |
106 | $config['analyzer']['extract_wb_quantity'] = [ |
107 | 'type' => 'custom', |
108 | 'tokenizer' => 'keyword', |
109 | 'filter' => [ 'term_freq' ], |
110 | ]; |
111 | |
112 | // Language analyzers for descriptions |
113 | $wbBuilder = new ConfigBuilder( WikibaseRepo::getTermsLanguages()->getLanguages(), |
114 | self::getWBCSConfig(), |
115 | $builder |
116 | ); |
117 | $inHook = true; |
118 | try { |
119 | $wbBuilder->buildConfig( $config ); |
120 | } finally { |
121 | $inHook = false; |
122 | } |
123 | } |
124 | |
125 | /** |
126 | * Register our cirrus profiles using WikibaseRepo. |
127 | * |
128 | * @param SearchProfileService $service |
129 | */ |
130 | public static function onCirrusSearchProfileService( SearchProfileService $service ) { |
131 | $config = self::getWBCSConfig(); |
132 | if ( !defined( 'MW_PHPUNIT_TEST' ) && !$config->enabled() ) { |
133 | return; |
134 | } |
135 | |
136 | $namespacesForContexts = []; |
137 | $entityNsLookup = WikibaseRepo::getEntityNamespaceLookup(); |
138 | $localEntityTypes = WikibaseRepo::getLocalEntityTypes(); |
139 | foreach ( WikibaseRepo::getFulltextSearchTypes() as $type => $profileContext ) { |
140 | if ( !in_array( $type, $localEntityTypes ) ) { |
141 | // Do not enable profiles for entity types that are not local |
142 | // e.g. when using MediaInfo items and properties are not managed by this wiki |
143 | // and thus should not enable specific profiles for them. |
144 | continue; |
145 | } |
146 | $namespace = $entityNsLookup->getEntityNamespace( $type ); |
147 | if ( $namespace === null ) { |
148 | continue; |
149 | } |
150 | $namespacesForContexts[$profileContext][] = $namespace; |
151 | } |
152 | |
153 | self::registerSearchProfiles( $service, $config, $namespacesForContexts ); |
154 | } |
155 | |
156 | /** |
157 | * Register config variable containing search profiles. |
158 | * @param string $profileName Name of the variable (in config context) that contains profiles |
159 | * @param string $repoType Cirrus repo type |
160 | * @param SearchProfileService $service |
161 | * @param WikibaseSearchConfig $entitySearchConfig Config object |
162 | */ |
163 | private static function registerArrayProfile( |
164 | $profileName, |
165 | $repoType, |
166 | SearchProfileService $service, |
167 | WikibaseSearchConfig $entitySearchConfig |
168 | ) { |
169 | $profile = $entitySearchConfig->get( $profileName ); |
170 | if ( $profile ) { |
171 | $service->registerArrayRepository( $repoType, 'wikibase_config', $profile ); |
172 | } |
173 | } |
174 | |
175 | /** |
176 | * Register cirrus profiles. |
177 | * (Visible for testing purposes) |
178 | * @param SearchProfileService $service |
179 | * @param WikibaseSearchConfig $entitySearchConfig |
180 | * @param int[][] $namespacesForContexts list of namespaces indexed by profile context name |
181 | * @see SearchProfileService |
182 | * @see WikibaseRepo::getFulltextSearchTypes() |
183 | * @throws ConfigException |
184 | */ |
185 | public static function registerSearchProfiles( |
186 | SearchProfileService $service, |
187 | WikibaseSearchConfig $entitySearchConfig, |
188 | array $namespacesForContexts |
189 | ) { |
190 | $stmtBoost = $entitySearchConfig->get( 'StatementBoost' ); |
191 | // register base profiles available on all wikibase installs |
192 | $service->registerFileRepository( SearchProfileService::RESCORE, |
193 | 'wikibase_base', __DIR__ . '/config/ElasticSearchRescoreProfiles.php' ); |
194 | $service->registerRepository( new SearchProfileRepositoryTransformer( |
195 | ArrayProfileRepository::fromFile( |
196 | SearchProfileService::RESCORE_FUNCTION_CHAINS, |
197 | 'wikibase_base', |
198 | __DIR__ . '/config/ElasticSearchRescoreFunctions.php' ), |
199 | [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $stmtBoost ] |
200 | ) ); |
201 | $service->registerFileRepository( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
202 | 'wikibase_base', __DIR__ . '/config/EntityPrefixSearchProfiles.php' ); |
203 | $service->registerFileRepository( SearchProfileService::FT_QUERY_BUILDER, |
204 | 'wikibase_base', __DIR__ . '/config/EntitySearchProfiles.php' ); |
205 | |
206 | // register custom profiles provided in the wikibase config |
207 | self::registerArrayProfile( 'RescoreProfiles', SearchProfileService::RESCORE, |
208 | $service, $entitySearchConfig ); |
209 | // Register function chains |
210 | $chains = $entitySearchConfig->get( 'RescoreFunctionChains' ); |
211 | if ( $chains ) { |
212 | $service->registerRepository( new SearchProfileRepositoryTransformer( |
213 | ArrayProfileRepository::fromArray( |
214 | SearchProfileService::RESCORE_FUNCTION_CHAINS, |
215 | 'wikibase_config', |
216 | $chains ), |
217 | [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $stmtBoost ] |
218 | ) ); |
219 | } |
220 | |
221 | self::registerArrayProfile( 'PrefixSearchProfiles', |
222 | EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
223 | $service, $entitySearchConfig ); |
224 | self::registerArrayProfile( 'FulltextSearchProfiles', |
225 | SearchProfileService::FT_QUERY_BUILDER, |
226 | $service, $entitySearchConfig ); |
227 | |
228 | // Determine the default rescore profile to use for entity autocomplete search |
229 | $defaultRescore = $entitySearchConfig->get( 'DefaultPrefixRescoreProfile', |
230 | EntitySearchElastic::DEFAULT_RESCORE_PROFILE ); |
231 | $service->registerDefaultProfile( SearchProfileService::RESCORE, |
232 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, $defaultRescore ); |
233 | // add the possibility to override the profile by setting the URI param cirrusRescoreProfile |
234 | $service->registerUriParamOverride( SearchProfileService::RESCORE, |
235 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, 'cirrusRescoreProfile' ); |
236 | |
237 | // Determine the default query builder profile to use for entity autocomplete search |
238 | $defaultQB = $entitySearchConfig->get( 'PrefixSearchProfile', |
239 | EntitySearchElastic::DEFAULT_QUERY_BUILDER_PROFILE ); |
240 | |
241 | $service->registerDefaultProfile( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
242 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, $defaultQB ); |
243 | $service->registerUriParamOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
244 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, 'cirrusWBProfile' ); |
245 | |
246 | // Determine query builder profile for fulltext search |
247 | $defaultFQB = $entitySearchConfig->get( 'FulltextSearchProfile', |
248 | EntitySearchElastic::DEFAULT_FULL_TEXT_QUERY_BUILDER_PROFILE ); |
249 | |
250 | $service->registerDefaultProfile( SearchProfileService::FT_QUERY_BUILDER, |
251 | EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, $defaultFQB ); |
252 | $service->registerUriParamOverride( SearchProfileService::FT_QUERY_BUILDER, |
253 | EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, 'cirrusWBProfile' ); |
254 | |
255 | // Determine the default rescore profile to use for fulltext search |
256 | $defaultFTRescore = $entitySearchConfig->get( 'DefaultFulltextRescoreProfile', |
257 | EntitySearchElastic::DEFAULT_RESCORE_PROFILE ); |
258 | |
259 | $service->registerDefaultProfile( SearchProfileService::RESCORE, |
260 | EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, $defaultFTRescore ); |
261 | // add the possibility to override the profile by setting the URI param cirrusRescoreProfile |
262 | $service->registerUriParamOverride( SearchProfileService::RESCORE, |
263 | EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, 'cirrusRescoreProfile' ); |
264 | |
265 | // create a new search context for the language selector in the Special:NewLexeme |
266 | $service->registerDefaultProfile( SearchProfileService::RESCORE, self::LANGUAGE_SELECTOR_PREFIX, |
267 | EntitySearchElastic::DEFAULT_RESCORE_PROFILE ); |
268 | $service->registerConfigOverride( SearchProfileService::RESCORE, self::LANGUAGE_SELECTOR_PREFIX, |
269 | $entitySearchConfig, 'LanguageSelectorRescoreProfile' ); |
270 | $service->registerUriParamOverride( SearchProfileService::RESCORE, |
271 | self::LANGUAGE_SELECTOR_PREFIX, 'cirrusRescoreProfile' ); |
272 | $service->registerDefaultProfile( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, self::LANGUAGE_SELECTOR_PREFIX, |
273 | EntitySearchElastic::DEFAULT_QUERY_BUILDER_PROFILE ); |
274 | $service->registerConfigOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, self::LANGUAGE_SELECTOR_PREFIX, |
275 | $entitySearchConfig, 'LanguageSelectorPrefixSearchProfile' ); |
276 | $service->registerUriParamOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
277 | self::LANGUAGE_SELECTOR_PREFIX, 'cirrusWBProfile' ); |
278 | $languageSelectorChains = $entitySearchConfig->get( 'LanguageSelectorRescoreFunctionChains' ); |
279 | |
280 | if ( $languageSelectorChains ) { |
281 | $languageSelectorBoosts = $entitySearchConfig->get( 'LanguageSelectorStatementBoost' ); |
282 | $service->registerRepository( new SearchProfileRepositoryTransformer( |
283 | ArrayProfileRepository::fromArray( |
284 | SearchProfileService::RESCORE_FUNCTION_CHAINS, |
285 | 'wikibase_config_language_selector', |
286 | $languageSelectorChains ), |
287 | [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $languageSelectorBoosts ] |
288 | ) ); |
289 | } |
290 | // Declare "search routes" for wikibase full text search types |
291 | // Source of the routes is $namespacesForContexts which is a "reversed view" |
292 | // of WikibaseRepo::getFulltextSearchTypes(). |
293 | // It maps the namespaces to a profile context (e.g. EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT) |
294 | // and will tell cirrus to use the various components we declare in the SearchProfileService |
295 | // above. |
296 | // In this case since wikibase owns these namespaces we score the routes at 1.0 which discards |
297 | // any other routes and eventually fails if another extension |
298 | // tries to own our namespace. |
299 | // For now we only accept simple bag of words queries but this will change in the future |
300 | // when query builders will manipulate the parsed query. |
301 | foreach ( $namespacesForContexts as $profileContext => $namespaces ) { |
302 | Assert::precondition( is_string( $profileContext ), |
303 | '$namespacesForContexts keys must be strings and refer to the profile context to use' ); |
304 | $service->registerFTSearchQueryRoute( |
305 | $profileContext, |
306 | 1.0, |
307 | $namespaces, |
308 | // The wikibase builders only supports simple queries for now |
309 | [ BasicQueryClassifier::SIMPLE_BAG_OF_WORDS ] |
310 | ); |
311 | } |
312 | } |
313 | |
314 | /** |
315 | * Add extra cirrus search query features for wikibase |
316 | * |
317 | * @param \CirrusSearch\SearchConfig $config (not used, required by hook) |
318 | * @param array &$extraFeatures |
319 | */ |
320 | public static function onCirrusSearchAddQueryFeatures( $config, array &$extraFeatures ) { |
321 | $searchConfig = self::getWBCSConfig(); |
322 | if ( !$searchConfig->enabled() ) { |
323 | return; |
324 | } |
325 | $extraFeatures[] = new HasWbStatementFeature(); |
326 | $extraFeatures[] = new WbStatementQuantityFeature(); |
327 | |
328 | $licenseMapping = HasLicenseFeature::getConfiguredLicenseMap( $searchConfig ); |
329 | $extraFeatures[] = new HasLicenseFeature( $licenseMapping ); |
330 | |
331 | $languageCodes = WikibaseRepo::getTermsLanguages()->getLanguages(); |
332 | $extraFeatures[] = new InLabelFeature( WikibaseRepo::getLanguageFallbackChainFactory(), $languageCodes ); |
333 | |
334 | $extraFeatures[] = new HasDataForLangFeature( $languageCodes ); |
335 | } |
336 | |
337 | /** |
338 | * Will instantiate descriptions for search results. |
339 | * @param Language $lang |
340 | * @param array &$results |
341 | */ |
342 | public static function amendSearchResults( Language $lang, array &$results ) { |
343 | $idParser = WikibaseRepo::getEntityIdParser(); |
344 | $entityIds = []; |
345 | $namespaceLookup = WikibaseRepo::getEntityNamespaceLookup(); |
346 | |
347 | foreach ( $results as &$result ) { |
348 | if ( empty( $result['title'] ) || |
349 | !$namespaceLookup->isEntityNamespace( $result['title']->getNamespace() ) ) { |
350 | continue; |
351 | } |
352 | try { |
353 | $title = $result['title']->getText(); |
354 | $entityId = $idParser->parse( $title ); |
355 | $entityIds[] = $entityId; |
356 | $result['entityId'] = $entityId; |
357 | } catch ( EntityIdParsingException $e ) { |
358 | continue; |
359 | } |
360 | } |
361 | if ( !$entityIds ) { |
362 | return; |
363 | } |
364 | $lookup = WikibaseRepo::getFallbackLabelDescriptionLookupFactory() |
365 | ->newLabelDescriptionLookup( $lang, $entityIds ); |
366 | $formatterFactory = WikibaseRepo::getEntityLinkFormatterFactory(); |
367 | foreach ( $results as &$result ) { |
368 | if ( empty( $result['entityId'] ) ) { |
369 | continue; |
370 | } |
371 | $entityId = $result['entityId']; |
372 | unset( $result['entityId'] ); |
373 | $label = $lookup->getLabel( $entityId ); |
374 | if ( !$label ) { |
375 | continue; |
376 | } |
377 | $linkFormatter = $formatterFactory->getLinkFormatter( $entityId->getEntityType(), $lang ); |
378 | $result['extract'] = strip_tags( $linkFormatter->getHtml( $entityId, [ |
379 | 'value' => $label->getText(), |
380 | 'language' => $label->getActualLanguageCode(), |
381 | ] ) ); |
382 | } |
383 | } |
384 | |
385 | /** |
386 | * Will instantiate descriptions for search results. |
387 | * @param array &$results |
388 | */ |
389 | public static function onApiOpenSearchSuggest( &$results ) { |
390 | $wbcsConfig = self::getWBCSConfig(); |
391 | if ( !$wbcsConfig->enabled() ) { |
392 | return; |
393 | } |
394 | |
395 | if ( !$results ) { |
396 | return; |
397 | } |
398 | |
399 | self::amendSearchResults( WikibaseRepo::getUserLanguage(), $results ); |
400 | } |
401 | |
402 | /** |
403 | * Register special pages. |
404 | * |
405 | * @param array &$list |
406 | */ |
407 | public static function onSpecialPageInitList( &$list ) { |
408 | $list['EntitiesWithoutLabel'] = [ |
409 | SpecialEntitiesWithoutPageFactory::class, |
410 | 'newSpecialEntitiesWithoutLabel' |
411 | ]; |
412 | |
413 | $list['EntitiesWithoutDescription'] = [ |
414 | SpecialEntitiesWithoutPageFactory::class, |
415 | 'newSpecialEntitiesWithoutDescription' |
416 | ]; |
417 | } |
418 | |
419 | /** |
420 | * @return WikibaseSearchConfig |
421 | * @suppress PhanTypeMismatchReturnSuperType |
422 | */ |
423 | private static function getWBCSConfig(): WikibaseSearchConfig { |
424 | return MediaWikiServices::getInstance() |
425 | ->getConfigFactory() |
426 | ->makeConfig( 'WikibaseCirrusSearch' ); |
427 | } |
428 | |
429 | } |