Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
58.56% covered (warning)
58.56%
106 / 181
0.00% covered (danger)
0.00%
0 / 5
CRAP
0.00% covered (danger)
0.00%
0 / 1
CirrusSearchHooksHandler
58.56% covered (warning)
58.56%
106 / 181
0.00% covered (danger)
0.00%
0 / 5
41.05
0.00% covered (danger)
0.00%
0 / 1
 onCirrusSearchAnalysisConfig
0.00% covered (danger)
0.00%
0 / 33
0.00% covered (danger)
0.00%
0 / 1
20
 onCirrusSearchProfileService
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
42
 registerArrayProfile
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
6
 registerSearchProfiles
87.60% covered (warning)
87.60%
106 / 121
0.00% covered (danger)
0.00%
0 / 1
4.03
 onCirrusSearchAddQueryFeatures
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3declare( strict_types = 1 );
4
5namespace Wikibase\Search\Elastic\Hooks;
6
7use CirrusSearch\Hooks\CirrusSearchAddQueryFeaturesHook;
8use CirrusSearch\Hooks\CirrusSearchAnalysisConfigHook;
9use CirrusSearch\Hooks\CirrusSearchProfileServiceHook;
10use CirrusSearch\Maintenance\AnalysisConfigBuilder;
11use CirrusSearch\Parser\BasicQueryClassifier;
12use CirrusSearch\Profile\ArrayProfileRepository;
13use CirrusSearch\Profile\SearchProfileRepositoryTransformer;
14use CirrusSearch\Profile\SearchProfileService;
15use CirrusSearch\SearchConfig;
16use MediaWiki\Config\ConfigException;
17use Wikibase\Repo\WikibaseRepo;
18use Wikibase\Search\Elastic\ConfigBuilder;
19use Wikibase\Search\Elastic\EntitySearchElastic;
20use Wikibase\Search\Elastic\Fields\StatementsField;
21use Wikibase\Search\Elastic\Query\HasDataForLangFeature;
22use Wikibase\Search\Elastic\Query\HasLicenseFeature;
23use Wikibase\Search\Elastic\Query\HasWbStatementFeature;
24use Wikibase\Search\Elastic\Query\InLabelFeature;
25use Wikibase\Search\Elastic\Query\WbStatementQuantityFeature;
26use Wikibase\Search\Elastic\WikibaseSearchConfig;
27use Wikimedia\Assert\Assert;
28
29/**
30 * Hooks for Wikibase search.
31 */
32class CirrusSearchHooksHandler implements
33    CirrusSearchAnalysisConfigHook,
34    CirrusSearchProfileServiceHook,
35    CirrusSearchAddQueryFeaturesHook
36{
37
38    private const LANGUAGE_SELECTOR_PREFIX = "language_selector_prefix";
39
40    /**
41     * Add Wikibase-specific ElasticSearch analyzer configurations.
42     * @param array &$config
43     * @param AnalysisConfigBuilder $builder
44     */
45    public function onCirrusSearchAnalysisConfig( array &$config, AnalysisConfigBuilder $builder ): void {
46        if ( defined( 'MW_PHPUNIT_TEST' ) ) {
47            return;
48        }
49        $wbcsConfig = CirrusSearchConfiguration::getWBCSConfig();
50        if ( !$wbcsConfig->enabled() ) {
51            return;
52        }
53        static $inHook;
54        if ( $inHook ) {
55            // Do not call this hook repeatedly, since ConfigBuilder calls AnalysisConfigBuilder
56            // FIXME: this is not a very nice hack, but we need it because we want AnalysisConfigBuilder
57            // to call the hook, since other extensions may make relevant changes to config.
58            // We just don't want to run this specific hook again, but Mediawiki API does not have
59            // the means to exclude one hook temporarily.
60            return;
61        }
62
63        // Analyzer for splitting statements and extracting properties:
64        // P31=Q1234 => P31
65        $config['analyzer']['extract_wb_property'] = [
66            'type' => 'custom',
67            'tokenizer' => 'split_wb_statements',
68            'filter' => [ 'first_token' ],
69        ];
70        $config['tokenizer']['split_wb_statements'] = [
71            'type' => 'pattern',
72            'pattern' => StatementsField::STATEMENT_SEPARATOR,
73        ];
74        $config['filter']['first_token'] = [
75            'type' => 'limit',
76            'max_token_count' => 1
77        ];
78
79        // Analyzer for extracting quantity data and storing it in a term frequency field
80        $config['analyzer']['extract_wb_quantity'] = [
81            'type' => 'custom',
82            'tokenizer' => 'keyword',
83            'filter' => [ 'term_freq' ],
84        ];
85
86        // Language analyzers for descriptions
87        $wbBuilder = new ConfigBuilder( WikibaseRepo::getTermsLanguages()->getLanguages(),
88            $wbcsConfig,
89            $builder
90        );
91        $inHook = true;
92        try {
93            $wbBuilder->buildConfig( $config );
94        } finally {
95            $inHook = false;
96        }
97    }
98
99    /**
100     * Register our cirrus profiles using WikibaseRepo.
101     *
102     * @param SearchProfileService $service
103     */
104    public function onCirrusSearchProfileService( SearchProfileService $service ): void {
105        $config = CirrusSearchConfiguration::getWBCSConfig();
106        if ( !defined( 'MW_PHPUNIT_TEST' ) && !$config->enabled() ) {
107            return;
108        }
109
110        $namespacesForContexts = [];
111        $entityNsLookup = WikibaseRepo::getEntityNamespaceLookup();
112        $localEntityTypes = WikibaseRepo::getLocalEntityTypes();
113        foreach ( WikibaseRepo::getFulltextSearchTypes() as $type => $profileContext ) {
114            if ( !in_array( $type, $localEntityTypes ) ) {
115                // Do not enable profiles for entity types that are not local
116                // e.g. when using MediaInfo items and properties are not managed by this wiki
117                // and thus should not enable specific profiles for them.
118                continue;
119            }
120            $namespace = $entityNsLookup->getEntityNamespace( $type );
121            if ( $namespace === null ) {
122                continue;
123            }
124            $namespacesForContexts[$profileContext][] = $namespace;
125        }
126
127        self::registerSearchProfiles( $service, $config, $namespacesForContexts );
128    }
129
130    /**
131     * Register config variable containing search profiles.
132     * @param string $profileName Name of the variable (in config context) that contains profiles
133     * @param string $repoType Cirrus repo type
134     * @param SearchProfileService $service
135     * @param WikibaseSearchConfig $entitySearchConfig Config object
136     */
137    private static function registerArrayProfile(
138        $profileName,
139        $repoType,
140        SearchProfileService $service,
141        WikibaseSearchConfig $entitySearchConfig
142    ) {
143        $profile = $entitySearchConfig->get( $profileName );
144        if ( $profile ) {
145            $service->registerArrayRepository( $repoType, 'wikibase_config', $profile );
146        }
147    }
148
149    /**
150     * Register cirrus profiles.
151     * (Visible for testing purposes)
152     * @param SearchProfileService $service
153     * @param WikibaseSearchConfig $entitySearchConfig
154     * @param int[][] $namespacesForContexts list of namespaces indexed by profile context name
155     * @see SearchProfileService
156     * @see WikibaseRepo::getFulltextSearchTypes()
157     * @throws ConfigException
158     */
159    public static function registerSearchProfiles(
160        SearchProfileService $service,
161        WikibaseSearchConfig $entitySearchConfig,
162        array $namespacesForContexts
163    ) {
164        $stmtBoost = $entitySearchConfig->get( 'StatementBoost' );
165        // register base profiles available on all wikibase installs
166        $service->registerFileRepository( SearchProfileService::RESCORE,
167            'wikibase_base', __DIR__ . '/../config/ElasticSearchRescoreProfiles.php' );
168        $service->registerRepository( new SearchProfileRepositoryTransformer(
169            ArrayProfileRepository::fromFile(
170                SearchProfileService::RESCORE_FUNCTION_CHAINS,
171                'wikibase_base',
172                __DIR__ . '/../config/ElasticSearchRescoreFunctions.php' ),
173            [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $stmtBoost ]
174        ) );
175        $service->registerFileRepository( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER,
176            'wikibase_base', __DIR__ . '/../config/EntityPrefixSearchProfiles.php' );
177        $service->registerFileRepository( EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER,
178            'wikibase_base', __DIR__ . '/../config/EntityInLabelSearchProfiles.php' );
179        $service->registerFileRepository( SearchProfileService::FT_QUERY_BUILDER,
180            'wikibase_base', __DIR__ . '/../config/EntitySearchProfiles.php' );
181
182        // register custom profiles provided in the wikibase config
183        self::registerArrayProfile( 'RescoreProfiles', SearchProfileService::RESCORE,
184            $service, $entitySearchConfig );
185        // Register function chains
186        $chains = $entitySearchConfig->get( 'RescoreFunctionChains' );
187        if ( $chains ) {
188            $service->registerRepository( new SearchProfileRepositoryTransformer(
189                ArrayProfileRepository::fromArray(
190                    SearchProfileService::RESCORE_FUNCTION_CHAINS,
191                    'wikibase_config',
192                    $chains ),
193                [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $stmtBoost ]
194            ) );
195        }
196
197        self::registerArrayProfile( 'PrefixSearchProfiles',
198            EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER,
199            $service, $entitySearchConfig );
200        self::registerArrayProfile( 'FulltextSearchProfiles',
201            SearchProfileService::FT_QUERY_BUILDER,
202            $service, $entitySearchConfig );
203        self::registerArrayProfile( 'InLabelSearchProfiles',
204            EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER,
205            $service, $entitySearchConfig );
206
207        // Determine the default rescore profile to use for entity autocomplete search
208        $defaultRescore = $entitySearchConfig->get( 'DefaultPrefixRescoreProfile',
209            EntitySearchElastic::DEFAULT_RESCORE_PROFILE );
210        $service->registerDefaultProfile( SearchProfileService::RESCORE,
211            EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, $defaultRescore );
212        // Check for a variation of the default profile with the requested language code appended. If available
213        // use the language specific profile instead of the default profile.
214        $service->registerContextualOverride( SearchProfileService::RESCORE,
215            EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, "{$defaultRescore}-{lang}", [ '{lang}' => 'language' ] );
216        // add the possibility to override the profile by setting the URI param cirrusRescoreProfile
217        $service->registerUriParamOverride( SearchProfileService::RESCORE,
218            EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, 'cirrusRescoreProfile' );
219
220        // Determine the default query builder profile to use for entity autocomplete search
221        $defaultQB = $entitySearchConfig->get( 'PrefixSearchProfile',
222            EntitySearchElastic::DEFAULT_QUERY_BUILDER_PROFILE );
223
224        $service->registerDefaultProfile( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER,
225            EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, $defaultQB );
226        $service->registerContextualOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER,
227            EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, "{$defaultQB}-{lang}", [ '{lang}' => 'language' ] );
228        $service->registerUriParamOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER,
229            EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, 'cirrusWBProfile' );
230
231        // Determine the default rescore profile to use for entity search by label
232        $defaultInLabelRescore = 'wikibase_in_label';
233        $service->registerDefaultProfile( SearchProfileService::RESCORE,
234            EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, $defaultInLabelRescore );
235        $service->registerConfigOverride(
236            SearchProfileService::RESCORE,
237            EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL,
238            $entitySearchConfig,
239            'DefaultInLabelRescoreProfile'
240        );
241        // Check for a variation of the default profile with the requested language code appended. If available
242        // use the language specific profile instead of the default profile.
243        $service->registerContextualOverride( SearchProfileService::RESCORE,
244            EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, "{$defaultInLabelRescore}-{lang}", [ '{lang}' => 'language' ] );
245        // add the possibility to override the profile by setting the URI param cirrusRescoreProfile
246        $service->registerUriParamOverride( SearchProfileService::RESCORE,
247            EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, 'cirrusRescoreProfile' );
248
249        // Determine the default query builder profile to use for entity search by label
250        $defaultInLabelQB = 'default';
251        $service->registerConfigOverride(
252            EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER,
253            EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL,
254            $entitySearchConfig,
255            'InLabelSearchProfile'
256        );
257        $service->registerDefaultProfile( EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER,
258            EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, $defaultInLabelQB );
259        $service->registerContextualOverride( EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER,
260            EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, "{$defaultInLabelQB}-{lang}", [ '{lang}' => 'language' ] );
261        $service->registerUriParamOverride( EntitySearchElastic::WIKIBASE_IN_LABEL_QUERY_BUILDER,
262            EntitySearchElastic::CONTEXT_WIKIBASE_IN_LABEL, 'cirrusWBProfile' );
263
264        // Determine query builder profile for fulltext search
265        $defaultFQB = $entitySearchConfig->get( 'FulltextSearchProfile',
266            EntitySearchElastic::DEFAULT_FULL_TEXT_QUERY_BUILDER_PROFILE );
267
268        $service->registerDefaultProfile( SearchProfileService::FT_QUERY_BUILDER,
269            EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, $defaultFQB );
270        $service->registerUriParamOverride( SearchProfileService::FT_QUERY_BUILDER,
271            EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, 'cirrusWBProfile' );
272
273        // Determine the default rescore profile to use for fulltext search
274        $defaultFTRescore = $entitySearchConfig->get( 'DefaultFulltextRescoreProfile',
275            EntitySearchElastic::DEFAULT_RESCORE_PROFILE );
276
277        $service->registerDefaultProfile( SearchProfileService::RESCORE,
278            EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, $defaultFTRescore );
279        // add the possibility to override the profile by setting the URI param cirrusRescoreProfile
280        $service->registerUriParamOverride( SearchProfileService::RESCORE,
281            EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, 'cirrusRescoreProfile' );
282
283        // create a new search context for the language selector in the Special:NewLexeme
284        $service->registerDefaultProfile( SearchProfileService::RESCORE, self::LANGUAGE_SELECTOR_PREFIX,
285            EntitySearchElastic::DEFAULT_RESCORE_PROFILE );
286        $service->registerConfigOverride( SearchProfileService::RESCORE, self::LANGUAGE_SELECTOR_PREFIX,
287            $entitySearchConfig, 'LanguageSelectorRescoreProfile' );
288        $service->registerUriParamOverride( SearchProfileService::RESCORE,
289            self::LANGUAGE_SELECTOR_PREFIX, 'cirrusRescoreProfile' );
290        $service->registerDefaultProfile( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, self::LANGUAGE_SELECTOR_PREFIX,
291            EntitySearchElastic::DEFAULT_QUERY_BUILDER_PROFILE );
292        $service->registerConfigOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, self::LANGUAGE_SELECTOR_PREFIX,
293            $entitySearchConfig, 'LanguageSelectorPrefixSearchProfile' );
294        $service->registerUriParamOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER,
295            self::LANGUAGE_SELECTOR_PREFIX, 'cirrusWBProfile' );
296        $languageSelectorChains = $entitySearchConfig->get( 'LanguageSelectorRescoreFunctionChains' );
297
298        if ( $languageSelectorChains ) {
299            $languageSelectorBoosts = $entitySearchConfig->get( 'LanguageSelectorStatementBoost' );
300            $service->registerRepository( new SearchProfileRepositoryTransformer(
301                ArrayProfileRepository::fromArray(
302                    SearchProfileService::RESCORE_FUNCTION_CHAINS,
303                    'wikibase_config_language_selector',
304                    $languageSelectorChains ),
305                [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $languageSelectorBoosts ]
306            ) );
307        }
308        // Declare "search routes" for wikibase full text search types
309        // Source of the routes is $namespacesForContexts which is a "reversed view"
310        // of WikibaseRepo::getFulltextSearchTypes().
311        // It maps the namespaces to a profile context (e.g. EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT)
312        // and will tell cirrus to use the various components we declare in the SearchProfileService
313        // above.
314        // In this case since wikibase owns these namespaces we score the routes at 1.0 which discards
315        // any other routes and eventually fails if another extension
316        // tries to own our namespace.
317        // For now we only accept simple bag of words queries but this will change in the future
318        // when query builders will manipulate the parsed query.
319        foreach ( $namespacesForContexts as $profileContext => $namespaces ) {
320            Assert::precondition( is_string( $profileContext ),
321                '$namespacesForContexts keys must be strings and refer to the profile context to use' );
322            $service->registerFTSearchQueryRoute(
323                $profileContext,
324                1.0,
325                $namespaces,
326                // The wikibase builders only supports simple queries for now
327                [ BasicQueryClassifier::SIMPLE_BAG_OF_WORDS ]
328            );
329        }
330    }
331
332    /**
333     * Add extra cirrus search query features for wikibase
334     *
335     * @param \CirrusSearch\SearchConfig $config (not used, required by hook)
336     * @param array &$extraFeatures
337     */
338    public function onCirrusSearchAddQueryFeatures( SearchConfig $config, array &$extraFeatures ): void {
339        $searchConfig = CirrusSearchConfiguration::getWBCSConfig();
340        if ( !$searchConfig->enabled() ) {
341            return;
342        }
343        $extraFeatures[] = new HasWbStatementFeature();
344        $extraFeatures[] = new WbStatementQuantityFeature();
345
346        $licenseMapping = HasLicenseFeature::getConfiguredLicenseMap( $searchConfig );
347        $extraFeatures[] = new HasLicenseFeature( $licenseMapping );
348
349        $languageCodes = WikibaseRepo::getTermsLanguages()->getLanguages();
350        $extraFeatures[] = new InLabelFeature( WikibaseRepo::getLanguageFallbackChainFactory(), $languageCodes );
351
352        $extraFeatures[] = new HasDataForLangFeature( $languageCodes );
353    }
354
355}