Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
58.56% |
106 / 181 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
| CirrusSearchHooksHandler | |
58.56% |
106 / 181 |
|
0.00% |
0 / 5 |
41.05 | |
0.00% |
0 / 1 |
| onCirrusSearchAnalysisConfig | |
0.00% |
0 / 33 |
|
0.00% |
0 / 1 |
20 | |||
| onCirrusSearchProfileService | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 | |||
| registerArrayProfile | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| registerSearchProfiles | |
87.60% |
106 / 121 |
|
0.00% |
0 / 1 |
4.03 | |||
| onCirrusSearchAddQueryFeatures | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
| 1 | <?php |
| 2 | |
| 3 | declare( strict_types = 1 ); |
| 4 | |
| 5 | namespace Wikibase\Search\Elastic\Hooks; |
| 6 | |
| 7 | use CirrusSearch\Hooks\CirrusSearchAddQueryFeaturesHook; |
| 8 | use CirrusSearch\Hooks\CirrusSearchAnalysisConfigHook; |
| 9 | use CirrusSearch\Hooks\CirrusSearchProfileServiceHook; |
| 10 | use CirrusSearch\Maintenance\AnalysisConfigBuilder; |
| 11 | use CirrusSearch\Parser\BasicQueryClassifier; |
| 12 | use CirrusSearch\Profile\ArrayProfileRepository; |
| 13 | use CirrusSearch\Profile\SearchProfileRepositoryTransformer; |
| 14 | use CirrusSearch\Profile\SearchProfileService; |
| 15 | use CirrusSearch\SearchConfig; |
| 16 | use MediaWiki\Config\ConfigException; |
| 17 | use Wikibase\Repo\WikibaseRepo; |
| 18 | use Wikibase\Search\Elastic\ConfigBuilder; |
| 19 | use Wikibase\Search\Elastic\EntitySearchElastic; |
| 20 | use Wikibase\Search\Elastic\Fields\StatementsField; |
| 21 | use Wikibase\Search\Elastic\Query\HasDataForLangFeature; |
| 22 | use Wikibase\Search\Elastic\Query\HasLicenseFeature; |
| 23 | use Wikibase\Search\Elastic\Query\HasWbStatementFeature; |
| 24 | use Wikibase\Search\Elastic\Query\InLabelFeature; |
| 25 | use Wikibase\Search\Elastic\Query\WbStatementQuantityFeature; |
| 26 | use Wikibase\Search\Elastic\WikibaseSearchConfig; |
| 27 | use Wikimedia\Assert\Assert; |
| 28 | |
| 29 | /** |
| 30 | * Hooks for Wikibase search. |
| 31 | */ |
| 32 | class CirrusSearchHooksHandler implements |
| 33 | CirrusSearchAnalysisConfigHook, |
| 34 | CirrusSearchProfileServiceHook, |
| 35 | CirrusSearchAddQueryFeaturesHook |
| 36 | { |
| 37 | |
| 38 | private const LANGUAGE_SELECTOR_PREFIX = "language_selector_prefix"; |
| 39 | |
| 40 | /** |
| 41 | * Add Wikibase-specific ElasticSearch analyzer configurations. |
| 42 | * @param array &$config |
| 43 | * @param AnalysisConfigBuilder $builder |
| 44 | */ |
| 45 | public function onCirrusSearchAnalysisConfig( array &$config, AnalysisConfigBuilder $builder ): void { |
| 46 | if ( defined( 'MW_PHPUNIT_TEST' ) ) { |
| 47 | return; |
| 48 | } |
| 49 | $wbcsConfig = CirrusSearchConfiguration::getWBCSConfig(); |
| 50 | if ( !$wbcsConfig->enabled() ) { |
| 51 | return; |
| 52 | } |
| 53 | static $inHook; |
| 54 | if ( $inHook ) { |
| 55 | // Do not call this hook repeatedly, since ConfigBuilder calls AnalysisConfigBuilder |
| 56 | // FIXME: this is not a very nice hack, but we need it because we want AnalysisConfigBuilder |
| 57 | // to call the hook, since other extensions may make relevant changes to config. |
| 58 | // We just don't want to run this specific hook again, but Mediawiki API does not have |
| 59 | // the means to exclude one hook temporarily. |
| 60 | return; |
| 61 | } |
| 62 | |
| 63 | // Analyzer for splitting statements and extracting properties: |
| 64 | // P31=Q1234 => P31 |
| 65 | $config['analyzer']['extract_wb_property'] = [ |
| 66 | 'type' => 'custom', |
| 67 | 'tokenizer' => 'split_wb_statements', |
| 68 | 'filter' => [ 'first_token' ], |
| 69 | ]; |
| 70 | $config['tokenizer']['split_wb_statements'] = [ |
| 71 | 'type' => 'pattern', |
| 72 | 'pattern' => StatementsField::STATEMENT_SEPARATOR, |
| 73 | ]; |
| 74 | $config['filter']['first_token'] = [ |
| 75 | 'type' => 'limit', |
| 76 | 'max_token_count' => 1 |
| 77 | ]; |
| 78 | |
| 79 | // Analyzer for extracting quantity data and storing it in a term frequency field |
| 80 | $config['analyzer']['extract_wb_quantity'] = [ |
| 81 | 'type' => 'custom', |
| 82 | 'tokenizer' => 'keyword', |
| 83 | 'filter' => [ 'term_freq' ], |
| 84 | ]; |
| 85 | |
| 86 | // Language analyzers for descriptions |
| 87 | $wbBuilder = new ConfigBuilder( WikibaseRepo::getTermsLanguages()->getLanguages(), |
| 88 | $wbcsConfig, |
| 89 | $builder |
| 90 | ); |
| 91 | $inHook = true; |
| 92 | try { |
| 93 | $wbBuilder->buildConfig( $config ); |
| 94 | } finally { |
| 95 | $inHook = false; |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | /** |
| 100 | * Register our cirrus profiles using WikibaseRepo. |
| 101 | * |
| 102 | * @param SearchProfileService $service |
| 103 | */ |
| 104 | public function onCirrusSearchProfileService( SearchProfileService $service ): void { |
| 105 | $config = CirrusSearchConfiguration::getWBCSConfig(); |
| 106 | if ( !defined( 'MW_PHPUNIT_TEST' ) && !$config->enabled() ) { |
| 107 | return; |
| 108 | } |
| 109 | |
| 110 | $namespacesForContexts = []; |
| 111 | $entityNsLookup = WikibaseRepo::getEntityNamespaceLookup(); |
| 112 | $localEntityTypes = WikibaseRepo::getLocalEntityTypes(); |
| 113 | foreach ( WikibaseRepo::getFulltextSearchTypes() as $type => $profileContext ) { |
| 114 | if ( !in_array( $type, $localEntityTypes ) ) { |
| 115 | // Do not enable profiles for entity types that are not local |
| 116 | // e.g. when using MediaInfo items and properties are not managed by this wiki |
| 117 | // and thus should not enable specific profiles for them. |
| 118 | continue; |
| 119 | } |
| 120 | $namespace = $entityNsLookup->getEntityNamespace( $type ); |
| 121 | if ( $namespace === null ) { |
| 122 | continue; |
| 123 | } |
| 124 | $namespacesForContexts[$profileContext][] = $namespace; |
| 125 | } |
| 126 | |
| 127 | self::registerSearchProfiles( $service, $config, $namespacesForContexts ); |
| 128 | } |
| 129 | |
| 130 | /** |
| 131 | * Register config variable containing search profiles. |
| 132 | * @param string $profileName Name of the variable (in config context) that contains profiles |
| 133 | * @param string $repoType Cirrus repo type |
| 134 | * @param SearchProfileService $service |
| 135 | * @param WikibaseSearchConfig $entitySearchConfig Config object |
| 136 | */ |
| 137 | private static function registerArrayProfile( |
| 138 | $profileName, |
| 139 | $repoType, |
| 140 | SearchProfileService $service, |
| 141 | WikibaseSearchConfig $entitySearchConfig |
| 142 | ) { |
| 143 | $profile = $entitySearchConfig->get( $profileName ); |
| 144 | if ( $profile ) { |
| 145 | $service->registerArrayRepository( $repoType, 'wikibase_config', $profile ); |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | /** |
| 150 | * Register cirrus profiles. |
| 151 | * (Visible for testing purposes) |
| 152 | * @param SearchProfileService $service |
| 153 | * @param WikibaseSearchConfig $entitySearchConfig |
| 154 | * @param int[][] $namespacesForContexts list of namespaces indexed by profile context name |
| 155 | * @see SearchProfileService |
| 156 | * @see WikibaseRepo::getFulltextSearchTypes() |
| 157 | * @throws ConfigException |
| 158 | */ |
| 159 | public static function registerSearchProfiles( |
| 160 | SearchProfileService $service, |
| 161 | WikibaseSearchConfig $entitySearchConfig, |
| 162 | array $namespacesForContexts |
| 163 | ) { |
| 164 | $stmtBoost = $entitySearchConfig->get( 'StatementBoost' ); |
| 165 | // register base profiles available on all wikibase installs |
| 166 | $service->registerFileRepository( SearchProfileService::RESCORE, |
| 167 | 'wikibase_base', __DIR__ . '/../config/ElasticSearchRescoreProfiles.php' ); |
| 168 | $service->registerRepository( new SearchProfileRepositoryTransformer( |
| 169 | ArrayProfileRepository::fromFile( |
| 170 | SearchProfileService::RESCORE_FUNCTION_CHAINS, |
| 171 | 'wikibase_base', |
| 172 | __DIR__ . '/../config/ElasticSearchRescoreFunctions.php' ), |
| 173 | [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $stmtBoost ] |
| 174 | ) ); |
| 175 | $service->registerFileRepository( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
| 176 | 'wikibase_base', __DIR__ . '/../config/EntityPrefixSearchProfiles.php' ); |
| 177 | $service->registerFileRepository( EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER, |
| 178 | 'wikibase_base', __DIR__ . '/../config/EntityInLabelSearchProfiles.php' ); |
| 179 | $service->registerFileRepository( SearchProfileService::FT_QUERY_BUILDER, |
| 180 | 'wikibase_base', __DIR__ . '/../config/EntitySearchProfiles.php' ); |
| 181 | |
| 182 | // register custom profiles provided in the wikibase config |
| 183 | self::registerArrayProfile( 'RescoreProfiles', SearchProfileService::RESCORE, |
| 184 | $service, $entitySearchConfig ); |
| 185 | // Register function chains |
| 186 | $chains = $entitySearchConfig->get( 'RescoreFunctionChains' ); |
| 187 | if ( $chains ) { |
| 188 | $service->registerRepository( new SearchProfileRepositoryTransformer( |
| 189 | ArrayProfileRepository::fromArray( |
| 190 | SearchProfileService::RESCORE_FUNCTION_CHAINS, |
| 191 | 'wikibase_config', |
| 192 | $chains ), |
| 193 | [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $stmtBoost ] |
| 194 | ) ); |
| 195 | } |
| 196 | |
| 197 | self::registerArrayProfile( 'PrefixSearchProfiles', |
| 198 | EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
| 199 | $service, $entitySearchConfig ); |
| 200 | self::registerArrayProfile( 'FulltextSearchProfiles', |
| 201 | SearchProfileService::FT_QUERY_BUILDER, |
| 202 | $service, $entitySearchConfig ); |
| 203 | self::registerArrayProfile( 'InLabelSearchProfiles', |
| 204 | EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER, |
| 205 | $service, $entitySearchConfig ); |
| 206 | |
| 207 | // Determine the default rescore profile to use for entity autocomplete search |
| 208 | $defaultRescore = $entitySearchConfig->get( 'DefaultPrefixRescoreProfile', |
| 209 | EntitySearchElastic::DEFAULT_RESCORE_PROFILE ); |
| 210 | $service->registerDefaultProfile( SearchProfileService::RESCORE, |
| 211 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, $defaultRescore ); |
| 212 | // Check for a variation of the default profile with the requested language code appended. If available |
| 213 | // use the language specific profile instead of the default profile. |
| 214 | $service->registerContextualOverride( SearchProfileService::RESCORE, |
| 215 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, "{$defaultRescore}-{lang}", [ '{lang}' => 'language' ] ); |
| 216 | // add the possibility to override the profile by setting the URI param cirrusRescoreProfile |
| 217 | $service->registerUriParamOverride( SearchProfileService::RESCORE, |
| 218 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, 'cirrusRescoreProfile' ); |
| 219 | |
| 220 | // Determine the default query builder profile to use for entity autocomplete search |
| 221 | $defaultQB = $entitySearchConfig->get( 'PrefixSearchProfile', |
| 222 | EntitySearchElastic::DEFAULT_QUERY_BUILDER_PROFILE ); |
| 223 | |
| 224 | $service->registerDefaultProfile( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
| 225 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, $defaultQB ); |
| 226 | $service->registerContextualOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
| 227 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, "{$defaultQB}-{lang}", [ '{lang}' => 'language' ] ); |
| 228 | $service->registerUriParamOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
| 229 | EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, 'cirrusWBProfile' ); |
| 230 | |
| 231 | // Determine the default rescore profile to use for entity search by label |
| 232 | $defaultInLabelRescore = 'wikibase_in_label'; |
| 233 | $service->registerDefaultProfile( SearchProfileService::RESCORE, |
| 234 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, $defaultInLabelRescore ); |
| 235 | $service->registerConfigOverride( |
| 236 | SearchProfileService::RESCORE, |
| 237 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, |
| 238 | $entitySearchConfig, |
| 239 | 'DefaultInLabelRescoreProfile' |
| 240 | ); |
| 241 | // Check for a variation of the default profile with the requested language code appended. If available |
| 242 | // use the language specific profile instead of the default profile. |
| 243 | $service->registerContextualOverride( SearchProfileService::RESCORE, |
| 244 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, "{$defaultInLabelRescore}-{lang}", [ '{lang}' => 'language' ] ); |
| 245 | // add the possibility to override the profile by setting the URI param cirrusRescoreProfile |
| 246 | $service->registerUriParamOverride( SearchProfileService::RESCORE, |
| 247 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, 'cirrusRescoreProfile' ); |
| 248 | |
| 249 | // Determine the default query builder profile to use for entity search by label |
| 250 | $defaultInLabelQB = 'default'; |
| 251 | $service->registerConfigOverride( |
| 252 | EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER, |
| 253 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, |
| 254 | $entitySearchConfig, |
| 255 | 'InLabelSearchProfile' |
| 256 | ); |
| 257 | $service->registerDefaultProfile( EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER, |
| 258 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, $defaultInLabelQB ); |
| 259 | $service->registerContextualOverride( EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER, |
| 260 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, "{$defaultInLabelQB}-{lang}", [ '{lang}' => 'language' ] ); |
| 261 | $service->registerUriParamOverride( EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER, |
| 262 | EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, 'cirrusWBProfile' ); |
| 263 | |
| 264 | // Determine query builder profile for fulltext search |
| 265 | $defaultFQB = $entitySearchConfig->get( 'FulltextSearchProfile', |
| 266 | EntitySearchElastic::DEFAULT_FULL_TEXT_QUERY_BUILDER_PROFILE ); |
| 267 | |
| 268 | $service->registerDefaultProfile( SearchProfileService::FT_QUERY_BUILDER, |
| 269 | EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, $defaultFQB ); |
| 270 | $service->registerUriParamOverride( SearchProfileService::FT_QUERY_BUILDER, |
| 271 | EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, 'cirrusWBProfile' ); |
| 272 | |
| 273 | // Determine the default rescore profile to use for fulltext search |
| 274 | $defaultFTRescore = $entitySearchConfig->get( 'DefaultFulltextRescoreProfile', |
| 275 | EntitySearchElastic::DEFAULT_RESCORE_PROFILE ); |
| 276 | |
| 277 | $service->registerDefaultProfile( SearchProfileService::RESCORE, |
| 278 | EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, $defaultFTRescore ); |
| 279 | // add the possibility to override the profile by setting the URI param cirrusRescoreProfile |
| 280 | $service->registerUriParamOverride( SearchProfileService::RESCORE, |
| 281 | EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, 'cirrusRescoreProfile' ); |
| 282 | |
| 283 | // create a new search context for the language selector in the Special:NewLexeme |
| 284 | $service->registerDefaultProfile( SearchProfileService::RESCORE, self::LANGUAGE_SELECTOR_PREFIX, |
| 285 | EntitySearchElastic::DEFAULT_RESCORE_PROFILE ); |
| 286 | $service->registerConfigOverride( SearchProfileService::RESCORE, self::LANGUAGE_SELECTOR_PREFIX, |
| 287 | $entitySearchConfig, 'LanguageSelectorRescoreProfile' ); |
| 288 | $service->registerUriParamOverride( SearchProfileService::RESCORE, |
| 289 | self::LANGUAGE_SELECTOR_PREFIX, 'cirrusRescoreProfile' ); |
| 290 | $service->registerDefaultProfile( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, self::LANGUAGE_SELECTOR_PREFIX, |
| 291 | EntitySearchElastic::DEFAULT_QUERY_BUILDER_PROFILE ); |
| 292 | $service->registerConfigOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, self::LANGUAGE_SELECTOR_PREFIX, |
| 293 | $entitySearchConfig, 'LanguageSelectorPrefixSearchProfile' ); |
| 294 | $service->registerUriParamOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
| 295 | self::LANGUAGE_SELECTOR_PREFIX, 'cirrusWBProfile' ); |
| 296 | $languageSelectorChains = $entitySearchConfig->get( 'LanguageSelectorRescoreFunctionChains' ); |
| 297 | |
| 298 | if ( $languageSelectorChains ) { |
| 299 | $languageSelectorBoosts = $entitySearchConfig->get( 'LanguageSelectorStatementBoost' ); |
| 300 | $service->registerRepository( new SearchProfileRepositoryTransformer( |
| 301 | ArrayProfileRepository::fromArray( |
| 302 | SearchProfileService::RESCORE_FUNCTION_CHAINS, |
| 303 | 'wikibase_config_language_selector', |
| 304 | $languageSelectorChains ), |
| 305 | [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $languageSelectorBoosts ] |
| 306 | ) ); |
| 307 | } |
| 308 | // Declare "search routes" for wikibase full text search types |
| 309 | // Source of the routes is $namespacesForContexts which is a "reversed view" |
| 310 | // of WikibaseRepo::getFulltextSearchTypes(). |
| 311 | // It maps the namespaces to a profile context (e.g. EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT) |
| 312 | // and will tell cirrus to use the various components we declare in the SearchProfileService |
| 313 | // above. |
| 314 | // In this case since wikibase owns these namespaces we score the routes at 1.0 which discards |
| 315 | // any other routes and eventually fails if another extension |
| 316 | // tries to own our namespace. |
| 317 | // For now we only accept simple bag of words queries but this will change in the future |
| 318 | // when query builders will manipulate the parsed query. |
| 319 | foreach ( $namespacesForContexts as $profileContext => $namespaces ) { |
| 320 | Assert::precondition( is_string( $profileContext ), |
| 321 | '$namespacesForContexts keys must be strings and refer to the profile context to use' ); |
| 322 | $service->registerFTSearchQueryRoute( |
| 323 | $profileContext, |
| 324 | 1.0, |
| 325 | $namespaces, |
| 326 | // The wikibase builders only supports simple queries for now |
| 327 | [ BasicQueryClassifier::SIMPLE_BAG_OF_WORDS ] |
| 328 | ); |
| 329 | } |
| 330 | } |
| 331 | |
| 332 | /** |
| 333 | * Add extra cirrus search query features for wikibase |
| 334 | * |
| 335 | * @param \CirrusSearch\SearchConfig $config (not used, required by hook) |
| 336 | * @param array &$extraFeatures |
| 337 | */ |
| 338 | public function onCirrusSearchAddQueryFeatures( SearchConfig $config, array &$extraFeatures ): void { |
| 339 | $searchConfig = CirrusSearchConfiguration::getWBCSConfig(); |
| 340 | if ( !$searchConfig->enabled() ) { |
| 341 | return; |
| 342 | } |
| 343 | $extraFeatures[] = new HasWbStatementFeature(); |
| 344 | $extraFeatures[] = new WbStatementQuantityFeature(); |
| 345 | |
| 346 | $licenseMapping = HasLicenseFeature::getConfiguredLicenseMap( $searchConfig ); |
| 347 | $extraFeatures[] = new HasLicenseFeature( $licenseMapping ); |
| 348 | |
| 349 | $languageCodes = WikibaseRepo::getTermsLanguages()->getLanguages(); |
| 350 | $extraFeatures[] = new InLabelFeature( WikibaseRepo::getLanguageFallbackChainFactory(), $languageCodes ); |
| 351 | |
| 352 | $extraFeatures[] = new HasDataForLangFeature( $languageCodes ); |
| 353 | } |
| 354 | |
| 355 | } |