Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
84.91% |
45 / 53 |
|
70.00% |
7 / 10 |
CRAP | |
0.00% |
0 / 1 |
WeightedTagsHooks | |
84.91% |
45 / 53 |
|
70.00% |
7 / 10 |
15.77 | |
0.00% |
0 / 1 |
onCirrusSearchSimilarityConfig | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
configureWeightedTagsSimilarity | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
onSearchIndexFields | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
configureWeightedTagsFieldMapping | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
onCirrusSearchAnalysisConfig | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
onCirrusSearchAddQueryFeatures | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
configureWeightedTagsFieldAnalysis | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
2 | |||
canBuild | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
canUse | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
maxScore | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Wikimedia; |
4 | |
5 | use CirrusSearch\CirrusSearch; |
6 | use CirrusSearch\Maintenance\AnalysisConfigBuilder; |
7 | use CirrusSearch\Query\ArticleTopicFeature; |
8 | use CirrusSearch\Query\HasRecommendationFeature; |
9 | use CirrusSearch\SearchConfig; |
10 | use Config; |
11 | use MediaWiki\MediaWikiServices; |
12 | use SearchEngine; |
13 | |
14 | /** |
15 | * Functionality related to the (Wikimedia-specific) weighted_tags search feature. |
16 | * @package CirrusSearch\Wikimedia |
17 | * @see ArticleTopicFeature |
18 | */ |
19 | class WeightedTagsHooks { |
20 | public const FIELD_NAME = 'weighted_tags'; |
21 | public const FIELD_SIMILARITY = 'weighted_tags_similarity'; |
22 | public const FIELD_INDEX_ANALYZER = 'weighted_tags'; |
23 | public const FIELD_SEARCH_ANALYZER = 'keyword'; |
24 | public const WMF_EXTRA_FEATURES = 'CirrusSearchWMFExtraFeatures'; |
25 | public const CONFIG_OPTIONS = 'weighted_tags'; |
26 | public const BUILD_OPTION = 'build'; |
27 | public const USE_OPTION = 'use'; |
28 | public const MAX_SCORE_OPTION = 'max_score'; |
29 | |
30 | /** |
31 | * Configure the similarity needed for the article topics field |
32 | * @param array &$similarity similarity settings to update |
33 | * @see https://www.mediawiki.org/wiki/Extension:CirrusSearch/Hooks/CirrusSearchSimilarityConfig |
34 | */ |
35 | public static function onCirrusSearchSimilarityConfig( array &$similarity ) { |
36 | self::configureWeightedTagsSimilarity( $similarity, |
37 | MediaWikiServices::getInstance()->getMainConfig() ); |
38 | } |
39 | |
40 | /** |
41 | * Visible for testing. |
42 | * @param array &$similarity similarity settings to update |
43 | * @param Config $config current configuration |
44 | */ |
45 | public static function configureWeightedTagsSimilarity( |
46 | array &$similarity, |
47 | Config $config |
48 | ) { |
49 | if ( !self::canBuild( $config ) ) { |
50 | return; |
51 | } |
52 | $maxScore = self::maxScore( $config ); |
53 | $similarity[self::FIELD_SIMILARITY] = [ |
54 | 'type' => 'scripted', |
55 | // no weight=>' script we do not want doc independent weighing |
56 | 'script' => [ |
57 | // apply boost close to docFreq to force int->float conversion |
58 | 'source' => "return (doc.freq*query.boost)/$maxScore;" |
59 | ] |
60 | ]; |
61 | } |
62 | |
63 | /** |
64 | * Define mapping for the weighted_tags field. |
65 | * @param array &$fields array of field definitions to update |
66 | * @param SearchEngine $engine the search engine requesting field definitions |
67 | * @see https://www.mediawiki.org/wiki/Manual:Hooks/SearchIndexFields |
68 | */ |
69 | public static function onSearchIndexFields( array &$fields, SearchEngine $engine ) { |
70 | if ( !( $engine instanceof CirrusSearch ) ) { |
71 | return; |
72 | } |
73 | self::configureWeightedTagsFieldMapping( $fields, |
74 | MediaWikiServices::getInstance()->getMainConfig() ); |
75 | } |
76 | |
77 | /** |
78 | * Visible for testing |
79 | * @param \SearchIndexField[] &$fields array of field definitions to update |
80 | * @param Config $config the wiki configuration |
81 | */ |
82 | public static function configureWeightedTagsFieldMapping( |
83 | array &$fields, |
84 | Config $config |
85 | ) { |
86 | if ( !self::canBuild( $config ) ) { |
87 | return; |
88 | } |
89 | |
90 | $fields[self::FIELD_NAME] = new WeightedTags( |
91 | self::FIELD_NAME, |
92 | self::FIELD_NAME, |
93 | self::FIELD_INDEX_ANALYZER, |
94 | self::FIELD_SEARCH_ANALYZER, |
95 | self::FIELD_SIMILARITY |
96 | ); |
97 | } |
98 | |
99 | /** |
100 | * Configure default analyzer for the weighted_tags field. |
101 | * @param array &$config analysis settings to update |
102 | * @param AnalysisConfigBuilder $analysisConfigBuilder unneeded |
103 | * @see https://www.mediawiki.org/wiki/Extension:CirrusSearch/Hooks/CirrusSearchAnalysisConfig |
104 | */ |
105 | public static function onCirrusSearchAnalysisConfig( array &$config, AnalysisConfigBuilder $analysisConfigBuilder ) { |
106 | self::configureWeightedTagsFieldAnalysis( $config, |
107 | MediaWikiServices::getInstance()->getMainConfig() ); |
108 | } |
109 | |
110 | /** |
111 | * Make weighted_tags search features available |
112 | * @param SearchConfig $config |
113 | * @param array &$extraFeatures Array holding KeywordFeature objects |
114 | * @see ArticleTopicFeature |
115 | */ |
116 | public static function onCirrusSearchAddQueryFeatures( SearchConfig $config, array &$extraFeatures ) { |
117 | if ( self::canUse( $config ) ) { |
118 | // articletopic keyword, matches by ORES topic scores |
119 | $extraFeatures[] = new ArticleTopicFeature(); |
120 | // article recommendations filter |
121 | $extraFeatures[] = new HasRecommendationFeature(); |
122 | } |
123 | } |
124 | |
125 | /** |
126 | * Visible only for testing |
127 | * @param array &$analysisConfig panalysis settings to update |
128 | * @param Config $config the wiki configuration |
129 | * @internal |
130 | */ |
131 | public static function configureWeightedTagsFieldAnalysis( |
132 | array &$analysisConfig, |
133 | Config $config |
134 | ) { |
135 | if ( !self::canBuild( $config ) ) { |
136 | return; |
137 | } |
138 | $maxScore = self::maxScore( $config ); |
139 | $analysisConfig['analyzer'][self::FIELD_INDEX_ANALYZER] = [ |
140 | 'type' => 'custom', |
141 | 'tokenizer' => 'keyword', |
142 | 'filter' => [ |
143 | 'weighted_tags_term_freq', |
144 | ] |
145 | ]; |
146 | $analysisConfig['filter']['weighted_tags_term_freq'] = [ |
147 | 'type' => 'term_freq', |
148 | // must be a char that never appears in the topic names/ids |
149 | 'split_char' => '|', |
150 | // max score (clamped), we assume that orig_score * 1000 |
151 | 'max_tf' => $maxScore, |
152 | ]; |
153 | } |
154 | |
155 | /** |
156 | * Check whether weighted_tags data should be processed. |
157 | * @param Config $config |
158 | * @return bool |
159 | */ |
160 | private static function canBuild( Config $config ): bool { |
161 | $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES ); |
162 | $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? []; |
163 | return (bool)( $weightedTagsOptions[self::BUILD_OPTION] ?? false ); |
164 | } |
165 | |
166 | /** |
167 | * Check whether weighted_tags data is available for searching. |
168 | * @param Config $config |
169 | * @return bool |
170 | */ |
171 | private static function canUse( Config $config ): bool { |
172 | $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES ); |
173 | $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? []; |
174 | return (bool)( $weightedTagsOptions[self::USE_OPTION] ?? false ); |
175 | } |
176 | |
177 | private static function maxScore( Config $config ): int { |
178 | $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES ); |
179 | $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? []; |
180 | return (int)( $weightedTagsOptions[self::MAX_SCORE_OPTION] ?? 1000 ); |
181 | } |
182 | } |