Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
97.78% covered (success)
97.78%
44 / 45
90.00% covered (success)
90.00%
9 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
WeightedTagsHooks
97.78% covered (success)
97.78%
44 / 45
90.00% covered (success)
90.00%
9 / 10
15
0.00% covered (danger)
0.00%
0 / 1
 create
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getConfig
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 onCirrusSearchSimilarityConfig
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
2
 onSearchIndexFields
90.91% covered (success)
90.91%
10 / 11
0.00% covered (danger)
0.00%
0 / 1
3.01
 onCirrusSearchAnalysisConfig
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
2
 onCirrusSearchAddQueryFeatures
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 canBuild
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 canUse
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 maxScore
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace CirrusSearch\Search;
4
5use CirrusSearch\CirrusSearch;
6use CirrusSearch\Hooks\CirrusSearchAddQueryFeaturesHook;
7use CirrusSearch\Hooks\CirrusSearchAnalysisConfigHook;
8use CirrusSearch\Hooks\CirrusSearchSimilarityConfigHook;
9use CirrusSearch\Maintenance\AnalysisConfigBuilder;
10use CirrusSearch\Query\ArticlePredictionKeyword;
11use CirrusSearch\Query\HasRecommendationFeature;
12use CirrusSearch\SearchConfig;
13use MediaWiki\Config\ConfigFactory;
14use MediaWiki\Search\Hook\SearchIndexFieldsHook;
15
16/**
17 * Functionality related to the weighted_tags search feature.
18 * @package CirrusSearch\Search
19 * @see ArticlePredictionKeyword
20 */
21class WeightedTagsHooks implements
22    SearchIndexFieldsHook,
23    CirrusSearchAddQueryFeaturesHook,
24    CirrusSearchAnalysisConfigHook,
25    CirrusSearchSimilarityConfigHook
26{
27    public const FIELD_NAME = 'weighted_tags';
28    public const FIELD_SIMILARITY = 'weighted_tags_similarity';
29    public const FIELD_INDEX_ANALYZER = 'weighted_tags';
30    public const FIELD_SEARCH_ANALYZER = 'keyword';
31    private SearchConfig $config;
32
33    public static function create( ConfigFactory $configFactory ): WeightedTagsHooks {
34        /** @var SearchConfig $searchConfig */
35        $searchConfig = $configFactory->makeConfig( 'CirrusSearch' );
36        /** @phan-suppress-next-line PhanTypeMismatchArgumentSuperType $searchConfig is actually a SearchConfig */
37        return new self( $searchConfig );
38    }
39
40    public function __construct( SearchConfig $config ) {
41        $this->config = $config;
42    }
43
44    /**
45     * Visible for testing
46     * @return SearchConfig
47     */
48    public function getConfig(): SearchConfig {
49        return $this->config;
50    }
51
52    /**
53     * @inheritDoc
54     */
55    public function onCirrusSearchSimilarityConfig( array &$similarity ): void {
56        if ( !$this->canBuild() ) {
57            return;
58        }
59        $maxScore = $this->maxScore();
60        $similarity[self::FIELD_SIMILARITY] = [
61            'type' => 'scripted',
62            // no weight=>' script we do not want doc independent weighing
63            'script' => [
64                // apply boost close to docFreq to force int->float conversion
65                'source' => "return (doc.freq*query.boost)/$maxScore;",
66            ],
67        ];
68    }
69
70    /**
71     * @inheritDoc
72     */
73    public function onSearchIndexFields( &$fields, $engine ) {
74        if ( !( $engine instanceof CirrusSearch ) ) {
75            return;
76        }
77        if ( !$this->canBuild() ) {
78            return;
79        }
80
81        $fields[self::FIELD_NAME] = new WeightedTags(
82            self::FIELD_NAME,
83            self::FIELD_NAME,
84            self::FIELD_INDEX_ANALYZER,
85            self::FIELD_SEARCH_ANALYZER,
86            self::FIELD_SIMILARITY
87        );
88    }
89
90    /**
91     * @inheritDoc
92     */
93    public function onCirrusSearchAnalysisConfig( array &$analysisConfig, AnalysisConfigBuilder $analysisConfigBuilder ): void {
94        if ( !$this->canBuild() ) {
95            return;
96        }
97        $maxScore = $this->maxScore();
98        $analysisConfig['analyzer'][self::FIELD_INDEX_ANALYZER] = [
99            'type' => 'custom',
100            'tokenizer' => 'keyword',
101            'filter' => [
102                'weighted_tags_term_freq',
103            ],
104        ];
105        $analysisConfig['filter']['weighted_tags_term_freq'] = [
106            'type' => 'term_freq',
107            // must be a char that never appears in the topic names/ids
108            'split_char' => '|',
109            // max score (clamped), we assume that orig_score * 1000
110            'max_tf' => $maxScore,
111        ];
112    }
113
114    /**
115     * @inheritDoc
116     */
117    public function onCirrusSearchAddQueryFeatures( SearchConfig $config, array &$extraFeatures ): void {
118        if ( $this->canUse() ) {
119            // articletopic keyword, matches by ORES  scores
120            $extraFeatures[] = new ArticlePredictionKeyword();
121            // article recommendations filter
122            $extraFeatures[] = new HasRecommendationFeature( $this->maxScore() );
123        }
124    }
125
126    /**
127     * Check whether weighted_tags data should be processed.
128     * @return bool
129     */
130    private function canBuild(): bool {
131        return (bool)( $this->config->get( 'CirrusSearchWeightedTags' )['build'] ?? false );
132    }
133
134    /**
135     * Check whether weighted_tags data is available for searching.
136     * @return bool
137     */
138    private function canUse(): bool {
139        return (bool)( $this->config->get( 'CirrusSearchWeightedTags' )['use'] ?? false );
140    }
141
142    private function maxScore(): int {
143        return (int)( $this->config->get( 'CirrusSearchWeightedTags' )['max_score'] ?? 1000 );
144    }
145}