Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
84.91% |
45 / 53 |
|
70.00% |
7 / 10 |
CRAP | |
0.00% |
0 / 1 |
WeightedTagsHooks | |
84.91% |
45 / 53 |
|
70.00% |
7 / 10 |
15.77 | |
0.00% |
0 / 1 |
onCirrusSearchSimilarityConfig | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
configureWeightedTagsSimilarity | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
onSearchIndexFields | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
configureWeightedTagsFieldMapping | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
onCirrusSearchAnalysisConfig | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
onCirrusSearchAddQueryFeatures | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
configureWeightedTagsFieldAnalysis | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
2 | |||
canBuild | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
canUse | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
maxScore | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Wikimedia; |
4 | |
5 | use CirrusSearch\CirrusSearch; |
6 | use CirrusSearch\Maintenance\AnalysisConfigBuilder; |
7 | use CirrusSearch\Query\ArticleTopicFeature; |
8 | use CirrusSearch\Query\HasRecommendationFeature; |
9 | use CirrusSearch\SearchConfig; |
10 | use MediaWiki\Config\Config; |
11 | use MediaWiki\MediaWikiServices; |
12 | use MediaWiki\Search\Hook\SearchIndexFieldsHook; |
13 | use SearchEngine; |
14 | |
15 | /** |
16 | * Functionality related to the (Wikimedia-specific) weighted_tags search feature. |
17 | * @package CirrusSearch\Wikimedia |
18 | * @see ArticleTopicFeature |
19 | */ |
20 | class WeightedTagsHooks implements SearchIndexFieldsHook { |
21 | public const FIELD_NAME = 'weighted_tags'; |
22 | public const FIELD_SIMILARITY = 'weighted_tags_similarity'; |
23 | public const FIELD_INDEX_ANALYZER = 'weighted_tags'; |
24 | public const FIELD_SEARCH_ANALYZER = 'keyword'; |
25 | public const WMF_EXTRA_FEATURES = 'CirrusSearchWMFExtraFeatures'; |
26 | public const CONFIG_OPTIONS = 'weighted_tags'; |
27 | public const BUILD_OPTION = 'build'; |
28 | public const USE_OPTION = 'use'; |
29 | public const MAX_SCORE_OPTION = 'max_score'; |
30 | |
31 | /** |
32 | * Configure the similarity needed for the article topics field |
33 | * @param array &$similarity similarity settings to update |
34 | * @see https://www.mediawiki.org/wiki/Extension:CirrusSearch/Hooks/CirrusSearchSimilarityConfig |
35 | */ |
36 | public static function onCirrusSearchSimilarityConfig( array &$similarity ) { |
37 | self::configureWeightedTagsSimilarity( $similarity, |
38 | MediaWikiServices::getInstance()->getMainConfig() ); |
39 | } |
40 | |
41 | /** |
42 | * Visible for testing. |
43 | * @param array &$similarity similarity settings to update |
44 | * @param Config $config current configuration |
45 | */ |
46 | public static function configureWeightedTagsSimilarity( |
47 | array &$similarity, |
48 | Config $config |
49 | ) { |
50 | if ( !self::canBuild( $config ) ) { |
51 | return; |
52 | } |
53 | $maxScore = self::maxScore( $config ); |
54 | $similarity[self::FIELD_SIMILARITY] = [ |
55 | 'type' => 'scripted', |
56 | // no weight=>' script we do not want doc independent weighing |
57 | 'script' => [ |
58 | // apply boost close to docFreq to force int->float conversion |
59 | 'source' => "return (doc.freq*query.boost)/$maxScore;" |
60 | ] |
61 | ]; |
62 | } |
63 | |
64 | /** |
65 | * Define mapping for the weighted_tags field. |
66 | * @param array &$fields array of field definitions to update |
67 | * @param SearchEngine $engine the search engine requesting field definitions |
68 | * @see https://www.mediawiki.org/wiki/Manual:Hooks/SearchIndexFields |
69 | */ |
70 | public function onSearchIndexFields( &$fields, $engine ) { |
71 | if ( !( $engine instanceof CirrusSearch ) ) { |
72 | return; |
73 | } |
74 | self::configureWeightedTagsFieldMapping( $fields, |
75 | MediaWikiServices::getInstance()->getMainConfig() ); |
76 | } |
77 | |
78 | /** |
79 | * Visible for testing |
80 | * @param \SearchIndexField[] &$fields array of field definitions to update |
81 | * @param Config $config the wiki configuration |
82 | */ |
83 | public static function configureWeightedTagsFieldMapping( |
84 | array &$fields, |
85 | Config $config |
86 | ) { |
87 | if ( !self::canBuild( $config ) ) { |
88 | return; |
89 | } |
90 | |
91 | $fields[self::FIELD_NAME] = new WeightedTags( |
92 | self::FIELD_NAME, |
93 | self::FIELD_NAME, |
94 | self::FIELD_INDEX_ANALYZER, |
95 | self::FIELD_SEARCH_ANALYZER, |
96 | self::FIELD_SIMILARITY |
97 | ); |
98 | } |
99 | |
100 | /** |
101 | * Configure default analyzer for the weighted_tags field. |
102 | * @param array &$config analysis settings to update |
103 | * @param AnalysisConfigBuilder $analysisConfigBuilder unneeded |
104 | * @see https://www.mediawiki.org/wiki/Extension:CirrusSearch/Hooks/CirrusSearchAnalysisConfig |
105 | */ |
106 | public static function onCirrusSearchAnalysisConfig( array &$config, AnalysisConfigBuilder $analysisConfigBuilder ) { |
107 | self::configureWeightedTagsFieldAnalysis( $config, |
108 | MediaWikiServices::getInstance()->getMainConfig() ); |
109 | } |
110 | |
111 | /** |
112 | * Make weighted_tags search features available |
113 | * @param SearchConfig $config |
114 | * @param array &$extraFeatures Array holding KeywordFeature objects |
115 | * @see ArticleTopicFeature |
116 | */ |
117 | public static function onCirrusSearchAddQueryFeatures( SearchConfig $config, array &$extraFeatures ) { |
118 | if ( self::canUse( $config ) ) { |
119 | // articletopic keyword, matches by ORES topic scores |
120 | $extraFeatures[] = new ArticleTopicFeature(); |
121 | // article recommendations filter |
122 | $extraFeatures[] = new HasRecommendationFeature(); |
123 | } |
124 | } |
125 | |
126 | /** |
127 | * Visible only for testing |
128 | * @param array &$analysisConfig panalysis settings to update |
129 | * @param Config $config the wiki configuration |
130 | * @internal |
131 | */ |
132 | public static function configureWeightedTagsFieldAnalysis( |
133 | array &$analysisConfig, |
134 | Config $config |
135 | ) { |
136 | if ( !self::canBuild( $config ) ) { |
137 | return; |
138 | } |
139 | $maxScore = self::maxScore( $config ); |
140 | $analysisConfig['analyzer'][self::FIELD_INDEX_ANALYZER] = [ |
141 | 'type' => 'custom', |
142 | 'tokenizer' => 'keyword', |
143 | 'filter' => [ |
144 | 'weighted_tags_term_freq', |
145 | ] |
146 | ]; |
147 | $analysisConfig['filter']['weighted_tags_term_freq'] = [ |
148 | 'type' => 'term_freq', |
149 | // must be a char that never appears in the topic names/ids |
150 | 'split_char' => '|', |
151 | // max score (clamped), we assume that orig_score * 1000 |
152 | 'max_tf' => $maxScore, |
153 | ]; |
154 | } |
155 | |
156 | /** |
157 | * Check whether weighted_tags data should be processed. |
158 | * @param Config $config |
159 | * @return bool |
160 | */ |
161 | private static function canBuild( Config $config ): bool { |
162 | $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES ); |
163 | $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? []; |
164 | return (bool)( $weightedTagsOptions[self::BUILD_OPTION] ?? false ); |
165 | } |
166 | |
167 | /** |
168 | * Check whether weighted_tags data is available for searching. |
169 | * @param Config $config |
170 | * @return bool |
171 | */ |
172 | private static function canUse( Config $config ): bool { |
173 | $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES ); |
174 | $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? []; |
175 | return (bool)( $weightedTagsOptions[self::USE_OPTION] ?? false ); |
176 | } |
177 | |
178 | private static function maxScore( Config $config ): int { |
179 | $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES ); |
180 | $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? []; |
181 | return (int)( $weightedTagsOptions[self::MAX_SCORE_OPTION] ?? 1000 ); |
182 | } |
183 | } |