Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
84.91% covered (warning)
84.91%
45 / 53
70.00% covered (warning)
70.00%
7 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
WeightedTagsHooks
84.91% covered (warning)
84.91%
45 / 53
70.00% covered (warning)
70.00%
7 / 10
15.77
0.00% covered (danger)
0.00%
0 / 1
 onCirrusSearchSimilarityConfig
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 configureWeightedTagsSimilarity
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
2
 onSearchIndexFields
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
6
 configureWeightedTagsFieldMapping
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
2
 onCirrusSearchAnalysisConfig
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 onCirrusSearchAddQueryFeatures
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 configureWeightedTagsFieldAnalysis
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
2
 canBuild
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 canUse
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 maxScore
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace CirrusSearch\Wikimedia;
4
5use CirrusSearch\CirrusSearch;
6use CirrusSearch\Maintenance\AnalysisConfigBuilder;
7use CirrusSearch\Query\ArticleTopicFeature;
8use CirrusSearch\Query\HasRecommendationFeature;
9use CirrusSearch\SearchConfig;
10use MediaWiki\Config\Config;
11use MediaWiki\MediaWikiServices;
12use MediaWiki\Search\Hook\SearchIndexFieldsHook;
13use SearchEngine;
14
15/**
16 * Functionality related to the (Wikimedia-specific) weighted_tags search feature.
17 * @package CirrusSearch\Wikimedia
18 * @see ArticleTopicFeature
19 */
20class WeightedTagsHooks implements SearchIndexFieldsHook {
21    public const FIELD_NAME = 'weighted_tags';
22    public const FIELD_SIMILARITY = 'weighted_tags_similarity';
23    public const FIELD_INDEX_ANALYZER = 'weighted_tags';
24    public const FIELD_SEARCH_ANALYZER = 'keyword';
25    public const WMF_EXTRA_FEATURES = 'CirrusSearchWMFExtraFeatures';
26    public const CONFIG_OPTIONS = 'weighted_tags';
27    public const BUILD_OPTION = 'build';
28    public const USE_OPTION = 'use';
29    public const MAX_SCORE_OPTION = 'max_score';
30
31    /**
32     * Configure the similarity needed for the article topics field
33     * @param array &$similarity similarity settings to update
34     * @see https://www.mediawiki.org/wiki/Extension:CirrusSearch/Hooks/CirrusSearchSimilarityConfig
35     */
36    public static function onCirrusSearchSimilarityConfig( array &$similarity ) {
37        self::configureWeightedTagsSimilarity( $similarity,
38            MediaWikiServices::getInstance()->getMainConfig() );
39    }
40
41    /**
42     * Visible for testing.
43     * @param array &$similarity similarity settings to update
44     * @param Config $config current configuration
45     */
46    public static function configureWeightedTagsSimilarity(
47        array &$similarity,
48        Config $config
49    ) {
50        if ( !self::canBuild( $config ) ) {
51            return;
52        }
53        $maxScore = self::maxScore( $config );
54        $similarity[self::FIELD_SIMILARITY] = [
55            'type' => 'scripted',
56            // no weight=>' script we do not want doc independent weighing
57            'script' => [
58                // apply boost close to docFreq to force int->float conversion
59                'source' => "return (doc.freq*query.boost)/$maxScore;"
60            ]
61        ];
62    }
63
64    /**
65     * Define mapping for the weighted_tags field.
66     * @param array &$fields array of field definitions to update
67     * @param SearchEngine $engine the search engine requesting field definitions
68     * @see https://www.mediawiki.org/wiki/Manual:Hooks/SearchIndexFields
69     */
70    public function onSearchIndexFields( &$fields, $engine ) {
71        if ( !( $engine instanceof CirrusSearch ) ) {
72            return;
73        }
74        self::configureWeightedTagsFieldMapping( $fields,
75            MediaWikiServices::getInstance()->getMainConfig() );
76    }
77
78    /**
79     * Visible for testing
80     * @param \SearchIndexField[] &$fields array of field definitions to update
81     * @param Config $config the wiki configuration
82     */
83    public static function configureWeightedTagsFieldMapping(
84        array &$fields,
85        Config $config
86    ) {
87        if ( !self::canBuild( $config ) ) {
88            return;
89        }
90
91        $fields[self::FIELD_NAME] = new WeightedTags(
92            self::FIELD_NAME,
93            self::FIELD_NAME,
94            self::FIELD_INDEX_ANALYZER,
95            self::FIELD_SEARCH_ANALYZER,
96            self::FIELD_SIMILARITY
97        );
98    }
99
100    /**
101     * Configure default analyzer for the weighted_tags field.
102     * @param array &$config analysis settings to update
103     * @param AnalysisConfigBuilder $analysisConfigBuilder unneeded
104     * @see https://www.mediawiki.org/wiki/Extension:CirrusSearch/Hooks/CirrusSearchAnalysisConfig
105     */
106    public static function onCirrusSearchAnalysisConfig( array &$config, AnalysisConfigBuilder $analysisConfigBuilder ) {
107        self::configureWeightedTagsFieldAnalysis( $config,
108            MediaWikiServices::getInstance()->getMainConfig() );
109    }
110
111    /**
112     * Make weighted_tags search features available
113     * @param SearchConfig $config
114     * @param array &$extraFeatures Array holding KeywordFeature objects
115     * @see ArticleTopicFeature
116     */
117    public static function onCirrusSearchAddQueryFeatures( SearchConfig $config, array &$extraFeatures ) {
118        if ( self::canUse( $config ) ) {
119            // articletopic keyword, matches by ORES topic scores
120            $extraFeatures[] = new ArticleTopicFeature();
121            // article recommendations filter
122            $extraFeatures[] = new HasRecommendationFeature();
123        }
124    }
125
126    /**
127     * Visible only for testing
128     * @param array &$analysisConfig panalysis settings to update
129     * @param Config $config the wiki configuration
130     * @internal
131     */
132    public static function configureWeightedTagsFieldAnalysis(
133        array &$analysisConfig,
134        Config $config
135    ) {
136        if ( !self::canBuild( $config ) ) {
137            return;
138        }
139        $maxScore = self::maxScore( $config );
140        $analysisConfig['analyzer'][self::FIELD_INDEX_ANALYZER] = [
141            'type' => 'custom',
142            'tokenizer' => 'keyword',
143            'filter' => [
144                'weighted_tags_term_freq',
145            ]
146        ];
147        $analysisConfig['filter']['weighted_tags_term_freq'] = [
148            'type' => 'term_freq',
149            // must be a char that never appears in the topic names/ids
150            'split_char' => '|',
151            // max score (clamped), we assume that orig_score * 1000
152            'max_tf' => $maxScore,
153        ];
154    }
155
156    /**
157     * Check whether weighted_tags data should be processed.
158     * @param Config $config
159     * @return bool
160     */
161    private static function canBuild( Config $config ): bool {
162        $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES );
163        $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? [];
164        return (bool)( $weightedTagsOptions[self::BUILD_OPTION] ?? false );
165    }
166
167    /**
168     * Check whether weighted_tags data is available for searching.
169     * @param Config $config
170     * @return bool
171     */
172    private static function canUse( Config $config ): bool {
173        $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES );
174        $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? [];
175        return (bool)( $weightedTagsOptions[self::USE_OPTION] ?? false );
176    }
177
178    private static function maxScore( Config $config ): int {
179        $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES );
180        $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? [];
181        return (int)( $weightedTagsOptions[self::MAX_SCORE_OPTION] ?? 1000 );
182    }
183}