Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
84.91% covered (warning)
84.91%
45 / 53
70.00% covered (warning)
70.00%
7 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
WeightedTagsHooks
84.91% covered (warning)
84.91%
45 / 53
70.00% covered (warning)
70.00%
7 / 10
15.77
0.00% covered (danger)
0.00%
0 / 1
 onCirrusSearchSimilarityConfig
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 configureWeightedTagsSimilarity
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
2
 onSearchIndexFields
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
6
 configureWeightedTagsFieldMapping
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
2
 onCirrusSearchAnalysisConfig
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 onCirrusSearchAddQueryFeatures
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 configureWeightedTagsFieldAnalysis
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
2
 canBuild
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 canUse
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 maxScore
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace CirrusSearch\Wikimedia;
4
5use CirrusSearch\CirrusSearch;
6use CirrusSearch\Maintenance\AnalysisConfigBuilder;
7use CirrusSearch\Query\ArticleTopicFeature;
8use CirrusSearch\Query\HasRecommendationFeature;
9use CirrusSearch\SearchConfig;
10use Config;
11use MediaWiki\MediaWikiServices;
12use SearchEngine;
13
14/**
15 * Functionality related to the (Wikimedia-specific) weighted_tags search feature.
16 * @package CirrusSearch\Wikimedia
17 * @see ArticleTopicFeature
18 */
19class WeightedTagsHooks {
20    public const FIELD_NAME = 'weighted_tags';
21    public const FIELD_SIMILARITY = 'weighted_tags_similarity';
22    public const FIELD_INDEX_ANALYZER = 'weighted_tags';
23    public const FIELD_SEARCH_ANALYZER = 'keyword';
24    public const WMF_EXTRA_FEATURES = 'CirrusSearchWMFExtraFeatures';
25    public const CONFIG_OPTIONS = 'weighted_tags';
26    public const BUILD_OPTION = 'build';
27    public const USE_OPTION = 'use';
28    public const MAX_SCORE_OPTION = 'max_score';
29
30    /**
31     * Configure the similarity needed for the article topics field
32     * @param array &$similarity similarity settings to update
33     * @see https://www.mediawiki.org/wiki/Extension:CirrusSearch/Hooks/CirrusSearchSimilarityConfig
34     */
35    public static function onCirrusSearchSimilarityConfig( array &$similarity ) {
36        self::configureWeightedTagsSimilarity( $similarity,
37            MediaWikiServices::getInstance()->getMainConfig() );
38    }
39
40    /**
41     * Visible for testing.
42     * @param array &$similarity similarity settings to update
43     * @param Config $config current configuration
44     */
45    public static function configureWeightedTagsSimilarity(
46        array &$similarity,
47        Config $config
48    ) {
49        if ( !self::canBuild( $config ) ) {
50            return;
51        }
52        $maxScore = self::maxScore( $config );
53        $similarity[self::FIELD_SIMILARITY] = [
54            'type' => 'scripted',
55            // no weight=>' script we do not want doc independent weighing
56            'script' => [
57                // apply boost close to docFreq to force int->float conversion
58                'source' => "return (doc.freq*query.boost)/$maxScore;"
59            ]
60        ];
61    }
62
63    /**
64     * Define mapping for the weighted_tags field.
65     * @param array &$fields array of field definitions to update
66     * @param SearchEngine $engine the search engine requesting field definitions
67     * @see https://www.mediawiki.org/wiki/Manual:Hooks/SearchIndexFields
68     */
69    public static function onSearchIndexFields( array &$fields, SearchEngine $engine ) {
70        if ( !( $engine instanceof CirrusSearch ) ) {
71            return;
72        }
73        self::configureWeightedTagsFieldMapping( $fields,
74            MediaWikiServices::getInstance()->getMainConfig() );
75    }
76
77    /**
78     * Visible for testing
79     * @param \SearchIndexField[] &$fields array of field definitions to update
80     * @param Config $config the wiki configuration
81     */
82    public static function configureWeightedTagsFieldMapping(
83        array &$fields,
84        Config $config
85    ) {
86        if ( !self::canBuild( $config ) ) {
87            return;
88        }
89
90        $fields[self::FIELD_NAME] = new WeightedTags(
91            self::FIELD_NAME,
92            self::FIELD_NAME,
93            self::FIELD_INDEX_ANALYZER,
94            self::FIELD_SEARCH_ANALYZER,
95            self::FIELD_SIMILARITY
96        );
97    }
98
99    /**
100     * Configure default analyzer for the weighted_tags field.
101     * @param array &$config analysis settings to update
102     * @param AnalysisConfigBuilder $analysisConfigBuilder unneeded
103     * @see https://www.mediawiki.org/wiki/Extension:CirrusSearch/Hooks/CirrusSearchAnalysisConfig
104     */
105    public static function onCirrusSearchAnalysisConfig( array &$config, AnalysisConfigBuilder $analysisConfigBuilder ) {
106        self::configureWeightedTagsFieldAnalysis( $config,
107            MediaWikiServices::getInstance()->getMainConfig() );
108    }
109
110    /**
111     * Make weighted_tags search features available
112     * @param SearchConfig $config
113     * @param array &$extraFeatures Array holding KeywordFeature objects
114     * @see ArticleTopicFeature
115     */
116    public static function onCirrusSearchAddQueryFeatures( SearchConfig $config, array &$extraFeatures ) {
117        if ( self::canUse( $config ) ) {
118            // articletopic keyword, matches by ORES topic scores
119            $extraFeatures[] = new ArticleTopicFeature();
120            // article recommendations filter
121            $extraFeatures[] = new HasRecommendationFeature();
122        }
123    }
124
125    /**
126     * Visible only for testing
127     * @param array &$analysisConfig panalysis settings to update
128     * @param Config $config the wiki configuration
129     * @internal
130     */
131    public static function configureWeightedTagsFieldAnalysis(
132        array &$analysisConfig,
133        Config $config
134    ) {
135        if ( !self::canBuild( $config ) ) {
136            return;
137        }
138        $maxScore = self::maxScore( $config );
139        $analysisConfig['analyzer'][self::FIELD_INDEX_ANALYZER] = [
140            'type' => 'custom',
141            'tokenizer' => 'keyword',
142            'filter' => [
143                'weighted_tags_term_freq',
144            ]
145        ];
146        $analysisConfig['filter']['weighted_tags_term_freq'] = [
147            'type' => 'term_freq',
148            // must be a char that never appears in the topic names/ids
149            'split_char' => '|',
150            // max score (clamped), we assume that orig_score * 1000
151            'max_tf' => $maxScore,
152        ];
153    }
154
155    /**
156     * Check whether weighted_tags data should be processed.
157     * @param Config $config
158     * @return bool
159     */
160    private static function canBuild( Config $config ): bool {
161        $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES );
162        $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? [];
163        return (bool)( $weightedTagsOptions[self::BUILD_OPTION] ?? false );
164    }
165
166    /**
167     * Check whether weighted_tags data is available for searching.
168     * @param Config $config
169     * @return bool
170     */
171    private static function canUse( Config $config ): bool {
172        $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES );
173        $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? [];
174        return (bool)( $weightedTagsOptions[self::USE_OPTION] ?? false );
175    }
176
177    private static function maxScore( Config $config ): int {
178        $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES );
179        $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? [];
180        return (int)( $weightedTagsOptions[self::MAX_SCORE_OPTION] ?? 1000 );
181    }
182}