Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
58.56% |
106 / 181 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
CirrusSearchHooksHandler | |
58.56% |
106 / 181 |
|
0.00% |
0 / 5 |
41.05 | |
0.00% |
0 / 1 |
onCirrusSearchAnalysisConfig | |
0.00% |
0 / 33 |
|
0.00% |
0 / 1 |
20 | |||
onCirrusSearchProfileService | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 | |||
registerArrayProfile | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
registerSearchProfiles | |
87.60% |
106 / 121 |
|
0.00% |
0 / 1 |
4.03 | |||
onCirrusSearchAddQueryFeatures | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | declare( strict_types = 1 ); |
4 | |
5 | namespace Wikibase\Search\Elastic\Hooks; |
6 | |
7 | use CirrusSearch\Hooks\CirrusSearchAddQueryFeaturesHook; |
8 | use CirrusSearch\Hooks\CirrusSearchAnalysisConfigHook; |
9 | use CirrusSearch\Hooks\CirrusSearchProfileServiceHook; |
10 | use CirrusSearch\Maintenance\AnalysisConfigBuilder; |
11 | use CirrusSearch\Parser\BasicQueryClassifier; |
12 | use CirrusSearch\Profile\ArrayProfileRepository; |
13 | use CirrusSearch\Profile\SearchProfileRepositoryTransformer; |
14 | use CirrusSearch\Profile\SearchProfileService; |
15 | use CirrusSearch\SearchConfig; |
16 | use MediaWiki\Config\ConfigException; |
17 | use Wikibase\Repo\WikibaseRepo; |
18 | use Wikibase\Search\Elastic\ConfigBuilder; |
19 | use Wikibase\Search\Elastic\EntitySearchElastic; |
20 | use Wikibase\Search\Elastic\Fields\StatementsField; |
21 | use Wikibase\Search\Elastic\Query\HasDataForLangFeature; |
22 | use Wikibase\Search\Elastic\Query\HasLicenseFeature; |
23 | use Wikibase\Search\Elastic\Query\HasWbStatementFeature; |
24 | use Wikibase\Search\Elastic\Query\InLabelFeature; |
25 | use Wikibase\Search\Elastic\Query\WbStatementQuantityFeature; |
26 | use Wikibase\Search\Elastic\WikibaseSearchConfig; |
27 | use Wikimedia\Assert\Assert; |
28 | |
29 | /** |
30 | * Hooks for Wikibase search. |
31 | */ |
32 | class CirrusSearchHooksHandler implements |
33 | CirrusSearchAnalysisConfigHook, |
34 | CirrusSearchProfileServiceHook, |
35 | CirrusSearchAddQueryFeaturesHook |
36 | { |
37 | |
38 | private const LANGUAGE_SELECTOR_PREFIX = "language_selector_prefix"; |
39 | |
40 | /** |
41 | * Add Wikibase-specific ElasticSearch analyzer configurations. |
42 | * @param array &$config |
43 | * @param AnalysisConfigBuilder $builder |
44 | */ |
45 | public function onCirrusSearchAnalysisConfig( array &$config, AnalysisConfigBuilder $builder ): void { |
46 | if ( defined( 'MW_PHPUNIT_TEST' ) ) { |
47 | return; |
48 | } |
49 | $wbcsConfig = CirrusSearchConfiguration::getWBCSConfig(); |
50 | if ( !$wbcsConfig->enabled() ) { |
51 | return; |
52 | } |
53 | static $inHook; |
54 | if ( $inHook ) { |
55 | // Do not call this hook repeatedly, since ConfigBuilder calls AnalysisConfigBuilder |
56 | // FIXME: this is not a very nice hack, but we need it because we want AnalysisConfigBuilder |
57 | // to call the hook, since other extensions may make relevant changes to config. |
58 | // We just don't want to run this specific hook again, but Mediawiki API does not have |
59 | // the means to exclude one hook temporarily. |
60 | return; |
61 | } |
62 | |
63 | // Analyzer for splitting statements and extracting properties: |
64 | // P31=Q1234 => P31 |
65 | $config['analyzer']['extract_wb_property'] = [ |
66 | 'type' => 'custom', |
67 | 'tokenizer' => 'split_wb_statements', |
68 | 'filter' => [ 'first_token' ], |
69 | ]; |
70 | $config['tokenizer']['split_wb_statements'] = [ |
71 | 'type' => 'pattern', |
72 | 'pattern' => StatementsField::STATEMENT_SEPARATOR, |
73 | ]; |
74 | $config['filter']['first_token'] = [ |
75 | 'type' => 'limit', |
76 | 'max_token_count' => 1 |
77 | ]; |
78 | |
79 | // Analyzer for extracting quantity data and storing it in a term frequency field |
80 | $config['analyzer']['extract_wb_quantity'] = [ |
81 | 'type' => 'custom', |
82 | 'tokenizer' => 'keyword', |
83 | 'filter' => [ 'term_freq' ], |
84 | ]; |
85 | |
86 | // Language analyzers for descriptions |
87 | $wbBuilder = new ConfigBuilder( WikibaseRepo::getTermsLanguages()->getLanguages(), |
88 | $wbcsConfig, |
89 | $builder |
90 | ); |
91 | $inHook = true; |
92 | try { |
93 | $wbBuilder->buildConfig( $config ); |
94 | } finally { |
95 | $inHook = false; |
96 | } |
97 | } |
98 | |
99 | /** |
100 | * Register our cirrus profiles using WikibaseRepo. |
101 | * |
102 | * @param SearchProfileService $service |
103 | */ |
104 | public function onCirrusSearchProfileService( SearchProfileService $service ): void { |
105 | $config = CirrusSearchConfiguration::getWBCSConfig(); |
106 | if ( !defined( 'MW_PHPUNIT_TEST' ) && !$config->enabled() ) { |
107 | return; |
108 | } |
109 | |
110 | $namespacesForContexts = []; |
111 | $entityNsLookup = WikibaseRepo::getEntityNamespaceLookup(); |
112 | $localEntityTypes = WikibaseRepo::getLocalEntityTypes(); |
113 | foreach ( WikibaseRepo::getFulltextSearchTypes() as $type => $profileContext ) { |
114 | if ( !in_array( $type, $localEntityTypes ) ) { |
115 | // Do not enable profiles for entity types that are not local |
116 | // e.g. when using MediaInfo items and properties are not managed by this wiki |
117 | // and thus should not enable specific profiles for them. |
118 | continue; |
119 | } |
120 | $namespace = $entityNsLookup->getEntityNamespace( $type ); |
121 | if ( $namespace === null ) { |
122 | continue; |
123 | } |
124 | $namespacesForContexts[$profileContext][] = $namespace; |
125 | } |
126 | |
127 | self::registerSearchProfiles( $service, $config, $namespacesForContexts ); |
128 | } |
129 | |
130 | /** |
131 | * Register config variable containing search profiles. |
132 | * @param string $profileName Name of the variable (in config context) that contains profiles |
133 | * @param string $repoType Cirrus repo type |
134 | * @param SearchProfileService $service |
135 | * @param WikibaseSearchConfig $entitySearchConfig Config object |
136 | */ |
137 | private static function registerArrayProfile( |
138 | $profileName, |
139 | $repoType, |
140 | SearchProfileService $service, |
141 | WikibaseSearchConfig $entitySearchConfig |
142 | ) { |
143 | $profile = $entitySearchConfig->get( $profileName ); |
144 | if ( $profile ) { |
145 | $service->registerArrayRepository( $repoType, 'wikibase_config', $profile ); |
146 | } |
147 | } |
148 | |
149 | /** |
150 | * Register cirrus profiles. |
151 | * (Visible for testing purposes) |
152 | * @param SearchProfileService $service |
153 | * @param WikibaseSearchConfig $entitySearchConfig |
154 | * @param int[][] $namespacesForContexts list of namespaces indexed by profile context name |
155 | * @see SearchProfileService |
156 | * @see WikibaseRepo::getFulltextSearchTypes() |
157 | * @throws ConfigException |
158 | */ |
159 | public static function registerSearchProfiles( |
160 | SearchProfileService $service, |
161 | WikibaseSearchConfig $entitySearchConfig, |
162 | array $namespacesForContexts |
163 | ) { |
164 | $stmtBoost = $entitySearchConfig->get( 'StatementBoost' ); |
165 | // register base profiles available on all wikibase installs |
166 | $service->registerFileRepository( SearchProfileService::RESCORE, |
167 | 'wikibase_base', __DIR__ . '/../config/ElasticSearchRescoreProfiles.php' ); |
168 | $service->registerRepository( new SearchProfileRepositoryTransformer( |
169 | ArrayProfileRepository::fromFile( |
170 | SearchProfileService::RESCORE_FUNCTION_CHAINS, |
171 | 'wikibase_base', |
172 | __DIR__ . '/../config/ElasticSearchRescoreFunctions.php' ), |
173 | [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $stmtBoost ] |
174 | ) ); |
175 | $service->registerFileRepository( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
176 | 'wikibase_base', __DIR__ . '/../config/EntityPrefixSearchProfiles.php' ); |
177 | $service->registerFileRepository( EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER, |
178 | 'wikibase_base', __DIR__ . '/../config/EntityInLabelSearchProfiles.php' ); |
179 | $service->registerFileRepository( SearchProfileService::FT_QUERY_BUILDER, |
180 | 'wikibase_base', __DIR__ . '/../config/EntitySearchProfiles.php' ); |
181 | |
182 | // register custom profiles provided in the wikibase config |
183 | self::registerArrayProfile( 'RescoreProfiles', SearchProfileService::RESCORE, |
184 | $service, $entitySearchConfig ); |
185 | // Register function chains |
186 | $chains = $entitySearchConfig->get( 'RescoreFunctionChains' ); |
187 | if ( $chains ) { |
188 | $service->registerRepository( new SearchProfileRepositoryTransformer( |
189 | ArrayProfileRepository::fromArray( |
190 | SearchProfileService::RESCORE_FUNCTION_CHAINS, |
191 | 'wikibase_config', |
192 | $chains ), |
193 | [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $stmtBoost ] |
194 | ) ); |
195 | } |
196 | |
197 | self::registerArrayProfile( 'PrefixSearchProfiles', |
198 | EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
199 | $service, $entitySearchConfig ); |
200 | self::registerArrayProfile( 'FulltextSearchProfiles', |
201 | SearchProfileService::FT_QUERY_BUILDER, |
202 | $service, $entitySearchConfig ); |
203 | self::registerArrayProfile( 'InLabelSearchProfiles', |
204 | EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER, |
205 | $service, $entitySearchConfig ); |
206 | |
207 | // Determine the default rescore profile to use for entity autocomplete search |
208 | $defaultRescore = $entitySearchConfig->get( 'DefaultPrefixRescoreProfile', |
209 | EntitySearchElastic::DEFAULT_RESCORE_PROFILE ); |
210 | $service->registerDefaultProfile( SearchProfileService::RESCORE, |
211 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, $defaultRescore ); |
212 | // Check for a variation of the default profile with the requested language code appended. If available |
213 | // use the language specific profile instead of the default profile. |
214 | $service->registerContextualOverride( SearchProfileService::RESCORE, |
215 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, "{$defaultRescore}-{lang}", [ '{lang}' => 'language' ] ); |
216 | // add the possibility to override the profile by setting the URI param cirrusRescoreProfile |
217 | $service->registerUriParamOverride( SearchProfileService::RESCORE, |
218 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, 'cirrusRescoreProfile' ); |
219 | |
220 | // Determine the default query builder profile to use for entity autocomplete search |
221 | $defaultQB = $entitySearchConfig->get( 'PrefixSearchProfile', |
222 | EntitySearchElastic::DEFAULT_QUERY_BUILDER_PROFILE ); |
223 | |
224 | $service->registerDefaultProfile( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
225 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, $defaultQB ); |
226 | $service->registerContextualOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
227 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, "{$defaultQB}-{lang}", [ '{lang}' => 'language' ] ); |
228 | $service->registerUriParamOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
229 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, 'cirrusWBProfile' ); |
230 | |
231 | // Determine the default rescore profile to use for entity search by label |
232 | $defaultInLabelRescore = 'wikibase_in_label'; |
233 | $service->registerDefaultProfile( SearchProfileService::RESCORE, |
234 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, $defaultInLabelRescore ); |
235 | $service->registerConfigOverride( |
236 | SearchProfileService::RESCORE, |
237 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, |
238 | $entitySearchConfig, |
239 | 'DefaultInLabelRescoreProfile' |
240 | ); |
241 | // Check for a variation of the default profile with the requested language code appended. If available |
242 | // use the language specific profile instead of the default profile. |
243 | $service->registerContextualOverride( SearchProfileService::RESCORE, |
244 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, "{$defaultInLabelRescore}-{lang}", [ '{lang}' => 'language' ] ); |
245 | // add the possibility to override the profile by setting the URI param cirrusRescoreProfile |
246 | $service->registerUriParamOverride( SearchProfileService::RESCORE, |
247 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, 'cirrusRescoreProfile' ); |
248 | |
249 | // Determine the default query builder profile to use for entity search by label |
250 | $defaultInLabelQB = 'default'; |
251 | $service->registerConfigOverride( |
252 | EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER, |
253 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, |
254 | $entitySearchConfig, |
255 | 'InLabelSearchProfile' |
256 | ); |
257 | $service->registerDefaultProfile( EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER, |
258 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, $defaultInLabelQB ); |
259 | $service->registerContextualOverride( EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER, |
260 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, "{$defaultInLabelQB}-{lang}", [ '{lang}' => 'language' ] ); |
261 | $service->registerUriParamOverride( EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER, |
262 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, 'cirrusWBProfile' ); |
263 | |
264 | // Determine query builder profile for fulltext search |
265 | $defaultFQB = $entitySearchConfig->get( 'FulltextSearchProfile', |
266 | EntitySearchElastic::DEFAULT_FULL_TEXT_QUERY_BUILDER_PROFILE ); |
267 | |
268 | $service->registerDefaultProfile( SearchProfileService::FT_QUERY_BUILDER, |
269 | EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, $defaultFQB ); |
270 | $service->registerUriParamOverride( SearchProfileService::FT_QUERY_BUILDER, |
271 | EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, 'cirrusWBProfile' ); |
272 | |
273 | // Determine the default rescore profile to use for fulltext search |
274 | $defaultFTRescore = $entitySearchConfig->get( 'DefaultFulltextRescoreProfile', |
275 | EntitySearchElastic::DEFAULT_RESCORE_PROFILE ); |
276 | |
277 | $service->registerDefaultProfile( SearchProfileService::RESCORE, |
278 | EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, $defaultFTRescore ); |
279 | // add the possibility to override the profile by setting the URI param cirrusRescoreProfile |
280 | $service->registerUriParamOverride( SearchProfileService::RESCORE, |
281 | EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, 'cirrusRescoreProfile' ); |
282 | |
283 | // create a new search context for the language selector in the Special:NewLexeme |
284 | $service->registerDefaultProfile( SearchProfileService::RESCORE, self::LANGUAGE_SELECTOR_PREFIX, |
285 | EntitySearchElastic::DEFAULT_RESCORE_PROFILE ); |
286 | $service->registerConfigOverride( SearchProfileService::RESCORE, self::LANGUAGE_SELECTOR_PREFIX, |
287 | $entitySearchConfig, 'LanguageSelectorRescoreProfile' ); |
288 | $service->registerUriParamOverride( SearchProfileService::RESCORE, |
289 | self::LANGUAGE_SELECTOR_PREFIX, 'cirrusRescoreProfile' ); |
290 | $service->registerDefaultProfile( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, self::LANGUAGE_SELECTOR_PREFIX, |
291 | EntitySearchElastic::DEFAULT_QUERY_BUILDER_PROFILE ); |
292 | $service->registerConfigOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, self::LANGUAGE_SELECTOR_PREFIX, |
293 | $entitySearchConfig, 'LanguageSelectorPrefixSearchProfile' ); |
294 | $service->registerUriParamOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
295 | self::LANGUAGE_SELECTOR_PREFIX, 'cirrusWBProfile' ); |
296 | $languageSelectorChains = $entitySearchConfig->get( 'LanguageSelectorRescoreFunctionChains' ); |
297 | |
298 | if ( $languageSelectorChains ) { |
299 | $languageSelectorBoosts = $entitySearchConfig->get( 'LanguageSelectorStatementBoost' ); |
300 | $service->registerRepository( new SearchProfileRepositoryTransformer( |
301 | ArrayProfileRepository::fromArray( |
302 | SearchProfileService::RESCORE_FUNCTION_CHAINS, |
303 | 'wikibase_config_language_selector', |
304 | $languageSelectorChains ), |
305 | [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $languageSelectorBoosts ] |
306 | ) ); |
307 | } |
308 | // Declare "search routes" for wikibase full text search types |
309 | // Source of the routes is $namespacesForContexts which is a "reversed view" |
310 | // of WikibaseRepo::getFulltextSearchTypes(). |
311 | // It maps the namespaces to a profile context (e.g. EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT) |
312 | // and will tell cirrus to use the various components we declare in the SearchProfileService |
313 | // above. |
314 | // In this case since wikibase owns these namespaces we score the routes at 1.0 which discards |
315 | // any other routes and eventually fails if another extension |
316 | // tries to own our namespace. |
317 | // For now we only accept simple bag of words queries but this will change in the future |
318 | // when query builders will manipulate the parsed query. |
319 | foreach ( $namespacesForContexts as $profileContext => $namespaces ) { |
320 | Assert::precondition( is_string( $profileContext ), |
321 | '$namespacesForContexts keys must be strings and refer to the profile context to use' ); |
322 | $service->registerFTSearchQueryRoute( |
323 | $profileContext, |
324 | 1.0, |
325 | $namespaces, |
326 | // The wikibase builders only supports simple queries for now |
327 | [ BasicQueryClassifier::SIMPLE_BAG_OF_WORDS ] |
328 | ); |
329 | } |
330 | } |
331 | |
332 | /** |
333 | * Add extra cirrus search query features for wikibase |
334 | * |
335 | * @param \CirrusSearch\SearchConfig $config (not used, required by hook) |
336 | * @param array &$extraFeatures |
337 | */ |
338 | public function onCirrusSearchAddQueryFeatures( SearchConfig $config, array &$extraFeatures ): void { |
339 | $searchConfig = CirrusSearchConfiguration::getWBCSConfig(); |
340 | if ( !$searchConfig->enabled() ) { |
341 | return; |
342 | } |
343 | $extraFeatures[] = new HasWbStatementFeature(); |
344 | $extraFeatures[] = new WbStatementQuantityFeature(); |
345 | |
346 | $licenseMapping = HasLicenseFeature::getConfiguredLicenseMap( $searchConfig ); |
347 | $extraFeatures[] = new HasLicenseFeature( $licenseMapping ); |
348 | |
349 | $languageCodes = WikibaseRepo::getTermsLanguages()->getLanguages(); |
350 | $extraFeatures[] = new InLabelFeature( WikibaseRepo::getLanguageFallbackChainFactory(), $languageCodes ); |
351 | |
352 | $extraFeatures[] = new HasDataForLangFeature( $languageCodes ); |
353 | } |
354 | |
355 | } |