Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
50.48% covered (warning)
50.48%
105 / 208
9.09% covered (danger)
9.09%
1 / 11
CRAP
0.00% covered (danger)
0.00%
0 / 1
Hooks
50.48% covered (warning)
50.48%
105 / 208
9.09% covered (danger)
9.09%
1 / 11
203.24
0.00% covered (danger)
0.00%
0 / 1
 onSetupAfterCache
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
12
 onWikibaseRepoEntityTypes
42.86% covered (danger)
42.86%
3 / 7
0.00% covered (danger)
0.00%
0 / 1
2.75
 onCirrusSearchAnalysisConfig
0.00% covered (danger)
0.00%
0 / 33
0.00% covered (danger)
0.00%
0 / 1
20
 onCirrusSearchProfileService
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
42
 registerArrayProfile
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
6
 registerSearchProfiles
82.56% covered (warning)
82.56%
71 / 86
0.00% covered (danger)
0.00%
0 / 1
4.08
 onCirrusSearchAddQueryFeatures
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
6
 amendSearchResults
90.32% covered (success)
90.32%
28 / 31
0.00% covered (danger)
0.00%
0 / 1
9.07
 onApiOpenSearchSuggest
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
12
 onSpecialPageInitList
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
2
 getWBCSConfig
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace Wikibase\Search\Elastic;
4
5use CirrusSearch\Maintenance\AnalysisConfigBuilder;
6use CirrusSearch\Parser\BasicQueryClassifier;
7use CirrusSearch\Profile\ArrayProfileRepository;
8use CirrusSearch\Profile\SearchProfileRepositoryTransformer;
9use CirrusSearch\Profile\SearchProfileService;
10use Language;
11use MediaWiki\Config\ConfigException;
12use MediaWiki\MediaWikiServices;
13use RequestContext;
14use Wikibase\DataModel\Entity\EntityIdParsingException;
15use Wikibase\Repo\WikibaseRepo;
16use Wikibase\Search\Elastic\Fields\StatementsField;
17use Wikibase\Search\Elastic\Query\HasDataForLangFeature;
18use Wikibase\Search\Elastic\Query\HasLicenseFeature;
19use Wikibase\Search\Elastic\Query\HasWbStatementFeature;
20use Wikibase\Search\Elastic\Query\InLabelFeature;
21use Wikibase\Search\Elastic\Query\WbStatementQuantityFeature;
22use Wikimedia\Assert\Assert;
23
24/**
25 * Hooks for Wikibase search.
26 */
27class Hooks {
28    private const LANGUAGE_SELECTOR_PREFIX = "language_selector_prefix";
29
30    /**
31     * Setup hook.
32     * Enables/disables CirrusSearch depending on request settings.
33     */
34    public static function onSetupAfterCache() {
35        $request = RequestContext::getMain()->getRequest();
36        $useCirrus = $request->getVal( 'useCirrus' );
37        if ( $useCirrus !== null ) {
38            $GLOBALS['wgWBCSUseCirrus'] = wfStringToBool( $useCirrus );
39        }
40        $config = self::getWBCSConfig();
41        if ( $config->enabled() ) {
42            global $wgCirrusSearchExtraIndexSettings;
43            // Bump max fields so that labels/descriptions fields fit in.
44            $wgCirrusSearchExtraIndexSettings['index.mapping.total_fields.limit'] = 5000;
45        }
46    }
47
48    /**
49     * Adds the definitions relevant for Search to entity types definitions.
50     *
51     * @see WikibaseSearch.entitytypes.php
52     *
53     * @param array[] &$entityTypeDefinitions
54     */
55    public static function onWikibaseRepoEntityTypes( array &$entityTypeDefinitions ) {
56        $wbcsConfig = self::getWBCSConfig();
57        if ( !$wbcsConfig->enabled() ) {
58            return;
59        }
60        $entityTypeDefinitions = wfArrayPlus2d(
61            require __DIR__ . '/../WikibaseSearch.entitytypes.php',
62            $entityTypeDefinitions
63        );
64    }
65
66    /**
67     * Add Wikibase-specific ElasticSearch analyzer configurations.
68     * @param array &$config
69     * @param AnalysisConfigBuilder $builder
70     */
71    public static function onCirrusSearchAnalysisConfig( &$config, AnalysisConfigBuilder $builder ) {
72        if ( defined( 'MW_PHPUNIT_TEST' ) ) {
73            return;
74        }
75        $wbcsConfig = self::getWBCSConfig();
76        if ( !$wbcsConfig->enabled() ) {
77            return;
78        }
79        static $inHook;
80        if ( $inHook ) {
81            // Do not call this hook repeatedly, since ConfigBuilder calls AnalysisConfigBuilder
82            // FIXME: this is not a very nice hack, but we need it because we want AnalysisConfigBuilder
83            // to call the hook, since other extensions may make relevant changes to config.
84            // We just don't want to run this specific hook again, but Mediawiki API does not have
85            // the means to exclude one hook temporarily.
86            return;
87        }
88
89        // Analyzer for splitting statements and extracting properties:
90        // P31=Q1234 => P31
91        $config['analyzer']['extract_wb_property'] = [
92            'type' => 'custom',
93            'tokenizer' => 'split_wb_statements',
94            'filter' => [ 'first_token' ],
95        ];
96        $config['tokenizer']['split_wb_statements'] = [
97            'type' => 'pattern',
98            'pattern' => StatementsField::STATEMENT_SEPARATOR,
99        ];
100        $config['filter']['first_token'] = [
101            'type' => 'limit',
102            'max_token_count' => 1
103        ];
104
105        // Analyzer for extracting quantity data and storing it in a term frequency field
106        $config['analyzer']['extract_wb_quantity'] = [
107            'type' => 'custom',
108            'tokenizer' => 'keyword',
109            'filter' => [ 'term_freq' ],
110        ];
111
112        // Language analyzers for descriptions
113        $wbBuilder = new ConfigBuilder( WikibaseRepo::getTermsLanguages()->getLanguages(),
114            self::getWBCSConfig(),
115            $builder
116        );
117        $inHook = true;
118        try {
119            $wbBuilder->buildConfig( $config );
120        } finally {
121            $inHook = false;
122        }
123    }
124
125    /**
126     * Register our cirrus profiles using WikibaseRepo.
127     *
128     * @param SearchProfileService $service
129     */
130    public static function onCirrusSearchProfileService( SearchProfileService $service ) {
131        $config = self::getWBCSConfig();
132        if ( !defined( 'MW_PHPUNIT_TEST' ) && !$config->enabled() ) {
133            return;
134        }
135
136        $namespacesForContexts = [];
137        $entityNsLookup = WikibaseRepo::getEntityNamespaceLookup();
138        $localEntityTypes = WikibaseRepo::getLocalEntityTypes();
139        foreach ( WikibaseRepo::getFulltextSearchTypes() as $type => $profileContext ) {
140            if ( !in_array( $type, $localEntityTypes ) ) {
141                // Do not enable profiles for entity types that are not local
142                // e.g. when using MediaInfo items and properties are not managed by this wiki
143                // and thus should not enable specific profiles for them.
144                continue;
145            }
146            $namespace = $entityNsLookup->getEntityNamespace( $type );
147            if ( $namespace === null ) {
148                continue;
149            }
150            $namespacesForContexts[$profileContext][] = $namespace;
151        }
152
153        self::registerSearchProfiles( $service, $config, $namespacesForContexts );
154    }
155
156    /**
157     * Register config variable containing search profiles.
158     * @param string $profileName Name of the variable (in config context) that contains profiles
159     * @param string $repoType Cirrus repo type
160     * @param SearchProfileService $service
161     * @param WikibaseSearchConfig $entitySearchConfig Config object
162     */
163    private static function registerArrayProfile(
164        $profileName,
165        $repoType,
166        SearchProfileService $service,
167        WikibaseSearchConfig $entitySearchConfig
168    ) {
169        $profile = $entitySearchConfig->get( $profileName );
170        if ( $profile ) {
171            $service->registerArrayRepository( $repoType, 'wikibase_config', $profile );
172        }
173    }
174
175    /**
176     * Register cirrus profiles.
177     * (Visible for testing purposes)
178     * @param SearchProfileService $service
179     * @param WikibaseSearchConfig $entitySearchConfig
180     * @param int[][] $namespacesForContexts list of namespaces indexed by profile context name
181     * @see SearchProfileService
182     * @see WikibaseRepo::getFulltextSearchTypes()
183     * @throws ConfigException
184     */
185    public static function registerSearchProfiles(
186        SearchProfileService $service,
187        WikibaseSearchConfig $entitySearchConfig,
188        array $namespacesForContexts
189    ) {
190        $stmtBoost = $entitySearchConfig->get( 'StatementBoost' );
191        // register base profiles available on all wikibase installs
192        $service->registerFileRepository( SearchProfileService::RESCORE,
193            'wikibase_base', __DIR__ . '/config/ElasticSearchRescoreProfiles.php' );
194        $service->registerRepository( new SearchProfileRepositoryTransformer(
195            ArrayProfileRepository::fromFile(
196                SearchProfileService::RESCORE_FUNCTION_CHAINS,
197                'wikibase_base',
198                __DIR__ . '/config/ElasticSearchRescoreFunctions.php' ),
199            [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $stmtBoost ]
200        ) );
201        $service->registerFileRepository( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER,
202            'wikibase_base', __DIR__ . '/config/EntityPrefixSearchProfiles.php' );
203        $service->registerFileRepository( SearchProfileService::FT_QUERY_BUILDER,
204            'wikibase_base', __DIR__ . '/config/EntitySearchProfiles.php' );
205
206        // register custom profiles provided in the wikibase config
207        self::registerArrayProfile( 'RescoreProfiles', SearchProfileService::RESCORE,
208            $service, $entitySearchConfig );
209        // Register function chains
210        $chains = $entitySearchConfig->get( 'RescoreFunctionChains' );
211        if ( $chains ) {
212            $service->registerRepository( new SearchProfileRepositoryTransformer(
213                ArrayProfileRepository::fromArray(
214                    SearchProfileService::RESCORE_FUNCTION_CHAINS,
215                    'wikibase_config',
216                    $chains ),
217                [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $stmtBoost ]
218            ) );
219        }
220
221        self::registerArrayProfile( 'PrefixSearchProfiles',
222            EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER,
223            $service, $entitySearchConfig );
224        self::registerArrayProfile( 'FulltextSearchProfiles',
225            SearchProfileService::FT_QUERY_BUILDER,
226            $service, $entitySearchConfig );
227
228        // Determine the default rescore profile to use for entity autocomplete search
229        $defaultRescore = $entitySearchConfig->get( 'DefaultPrefixRescoreProfile',
230            EntitySearchElastic::DEFAULT_RESCORE_PROFILE );
231        $service->registerDefaultProfile( SearchProfileService::RESCORE,
232            EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, $defaultRescore );
233        // add the possibility to override the profile by setting the URI param cirrusRescoreProfile
234        $service->registerUriParamOverride( SearchProfileService::RESCORE,
235            EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, 'cirrusRescoreProfile' );
236
237        // Determine the default query builder profile to use for entity autocomplete search
238        $defaultQB = $entitySearchConfig->get( 'PrefixSearchProfile',
239            EntitySearchElastic::DEFAULT_QUERY_BUILDER_PROFILE );
240
241        $service->registerDefaultProfile( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER,
242            EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, $defaultQB );
243        $service->registerUriParamOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER,
244            EntitySearchElastic::CONTEXT_WIKIBASE_PREFIX, 'cirrusWBProfile' );
245
246        // Determine query builder profile for fulltext search
247        $defaultFQB = $entitySearchConfig->get( 'FulltextSearchProfile',
248            EntitySearchElastic::DEFAULT_FULL_TEXT_QUERY_BUILDER_PROFILE );
249
250        $service->registerDefaultProfile( SearchProfileService::FT_QUERY_BUILDER,
251            EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, $defaultFQB );
252        $service->registerUriParamOverride( SearchProfileService::FT_QUERY_BUILDER,
253            EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, 'cirrusWBProfile' );
254
255        // Determine the default rescore profile to use for fulltext search
256        $defaultFTRescore = $entitySearchConfig->get( 'DefaultFulltextRescoreProfile',
257            EntitySearchElastic::DEFAULT_RESCORE_PROFILE );
258
259        $service->registerDefaultProfile( SearchProfileService::RESCORE,
260            EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, $defaultFTRescore );
261        // add the possibility to override the profile by setting the URI param cirrusRescoreProfile
262        $service->registerUriParamOverride( SearchProfileService::RESCORE,
263            EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT, 'cirrusRescoreProfile' );
264
265        // create a new search context for the language selector in the Special:NewLexeme
266        $service->registerDefaultProfile( SearchProfileService::RESCORE, self::LANGUAGE_SELECTOR_PREFIX,
267            EntitySearchElastic::DEFAULT_RESCORE_PROFILE );
268        $service->registerConfigOverride( SearchProfileService::RESCORE, self::LANGUAGE_SELECTOR_PREFIX,
269            $entitySearchConfig, 'LanguageSelectorRescoreProfile' );
270        $service->registerUriParamOverride( SearchProfileService::RESCORE,
271            self::LANGUAGE_SELECTOR_PREFIX, 'cirrusRescoreProfile' );
272        $service->registerDefaultProfile( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, self::LANGUAGE_SELECTOR_PREFIX,
273            EntitySearchElastic::DEFAULT_QUERY_BUILDER_PROFILE );
274        $service->registerConfigOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, self::LANGUAGE_SELECTOR_PREFIX,
275            $entitySearchConfig, 'LanguageSelectorPrefixSearchProfile' );
276        $service->registerUriParamOverride( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER,
277            self::LANGUAGE_SELECTOR_PREFIX, 'cirrusWBProfile' );
278        $languageSelectorChains = $entitySearchConfig->get( 'LanguageSelectorRescoreFunctionChains' );
279
280        if ( $languageSelectorChains ) {
281            $languageSelectorBoosts = $entitySearchConfig->get( 'LanguageSelectorStatementBoost' );
282            $service->registerRepository( new SearchProfileRepositoryTransformer(
283                ArrayProfileRepository::fromArray(
284                    SearchProfileService::RESCORE_FUNCTION_CHAINS,
285                    'wikibase_config_language_selector',
286                    $languageSelectorChains ),
287                [ EntitySearchElastic::STMT_BOOST_PROFILE_REPL => $languageSelectorBoosts ]
288            ) );
289        }
290        // Declare "search routes" for wikibase full text search types
291        // Source of the routes is $namespacesForContexts which is a "reversed view"
292        // of WikibaseRepo::getFulltextSearchTypes().
293        // It maps the namespaces to a profile context (e.g. EntitySearchElastic::CONTEXT_WIKIBASE_FULLTEXT)
294        // and will tell cirrus to use the various components we declare in the SearchProfileService
295        // above.
296        // In this case since wikibase owns these namespaces we score the routes at 1.0 which discards
297        // any other routes and eventually fails if another extension
298        // tries to own our namespace.
299        // For now we only accept simple bag of words queries but this will change in the future
300        // when query builders will manipulate the parsed query.
301        foreach ( $namespacesForContexts as $profileContext => $namespaces ) {
302            Assert::precondition( is_string( $profileContext ),
303                '$namespacesForContexts keys must be strings and refer to the profile context to use' );
304            $service->registerFTSearchQueryRoute(
305                $profileContext,
306                1.0,
307                $namespaces,
308                // The wikibase builders only supports simple queries for now
309                [ BasicQueryClassifier::SIMPLE_BAG_OF_WORDS ]
310            );
311        }
312    }
313
314    /**
315     * Add extra cirrus search query features for wikibase
316     *
317     * @param \CirrusSearch\SearchConfig $config (not used, required by hook)
318     * @param array &$extraFeatures
319     */
320    public static function onCirrusSearchAddQueryFeatures( $config, array &$extraFeatures ) {
321        $searchConfig = self::getWBCSConfig();
322        if ( !$searchConfig->enabled() ) {
323            return;
324        }
325        $extraFeatures[] = new HasWbStatementFeature();
326        $extraFeatures[] = new WbStatementQuantityFeature();
327
328        $licenseMapping = HasLicenseFeature::getConfiguredLicenseMap( $searchConfig );
329        $extraFeatures[] = new HasLicenseFeature( $licenseMapping );
330
331        $languageCodes = WikibaseRepo::getTermsLanguages()->getLanguages();
332        $extraFeatures[] = new InLabelFeature( WikibaseRepo::getLanguageFallbackChainFactory(), $languageCodes );
333
334        $extraFeatures[] = new HasDataForLangFeature( $languageCodes );
335    }
336
337    /**
338     * Will instantiate descriptions for search results.
339     * @param Language $lang
340     * @param array &$results
341     */
342    public static function amendSearchResults( Language $lang, array &$results ) {
343        $idParser = WikibaseRepo::getEntityIdParser();
344        $entityIds = [];
345        $namespaceLookup = WikibaseRepo::getEntityNamespaceLookup();
346
347        foreach ( $results as &$result ) {
348            if ( empty( $result['title'] ) ||
349                !$namespaceLookup->isEntityNamespace( $result['title']->getNamespace() ) ) {
350                continue;
351            }
352            try {
353                $title = $result['title']->getText();
354                $entityId = $idParser->parse( $title );
355                $entityIds[] = $entityId;
356                $result['entityId'] = $entityId;
357            } catch ( EntityIdParsingException $e ) {
358                continue;
359            }
360        }
361        if ( !$entityIds ) {
362            return;
363        }
364        $lookup = WikibaseRepo::getFallbackLabelDescriptionLookupFactory()
365            ->newLabelDescriptionLookup( $lang, $entityIds );
366        $formatterFactory = WikibaseRepo::getEntityLinkFormatterFactory();
367        foreach ( $results as &$result ) {
368            if ( empty( $result['entityId'] ) ) {
369                continue;
370            }
371            $entityId = $result['entityId'];
372            unset( $result['entityId'] );
373            $label = $lookup->getLabel( $entityId );
374            if ( !$label ) {
375                continue;
376            }
377            $linkFormatter = $formatterFactory->getLinkFormatter( $entityId->getEntityType(), $lang );
378            $result['extract'] = strip_tags( $linkFormatter->getHtml( $entityId, [
379                'value' => $label->getText(),
380                'language' => $label->getActualLanguageCode(),
381            ] ) );
382        }
383    }
384
385    /**
386     * Will instantiate descriptions for search results.
387     * @param array &$results
388     */
389    public static function onApiOpenSearchSuggest( &$results ) {
390        $wbcsConfig = self::getWBCSConfig();
391        if ( !$wbcsConfig->enabled() ) {
392            return;
393        }
394
395        if ( !$results ) {
396            return;
397        }
398
399        self::amendSearchResults( WikibaseRepo::getUserLanguage(), $results );
400    }
401
402    /**
403     * Register special pages.
404     *
405     * @param array &$list
406     */
407    public static function onSpecialPageInitList( &$list ) {
408        $list['EntitiesWithoutLabel'] = [
409            SpecialEntitiesWithoutPageFactory::class,
410            'newSpecialEntitiesWithoutLabel'
411        ];
412
413        $list['EntitiesWithoutDescription'] = [
414            SpecialEntitiesWithoutPageFactory::class,
415            'newSpecialEntitiesWithoutDescription'
416        ];
417    }
418
419    /**
420     * @return WikibaseSearchConfig
421     * @suppress PhanTypeMismatchReturnSuperType
422     */
423    private static function getWBCSConfig(): WikibaseSearchConfig {
424        return MediaWikiServices::getInstance()
425            ->getConfigFactory()
426            ->makeConfig( 'WikibaseCirrusSearch' );
427    }
428
429}