Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
97.78% |
44 / 45 |
|
90.00% |
9 / 10 |
CRAP | |
0.00% |
0 / 1 |
| WeightedTagsHooks | |
97.78% |
44 / 45 |
|
90.00% |
9 / 10 |
15 | |
0.00% |
0 / 1 |
| create | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getConfig | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| onCirrusSearchSimilarityConfig | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
| onSearchIndexFields | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
3.01 | |||
| onCirrusSearchAnalysisConfig | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
2 | |||
| onCirrusSearchAddQueryFeatures | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
| canBuild | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| canUse | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| maxScore | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch\Search; |
| 4 | |
| 5 | use CirrusSearch\CirrusSearch; |
| 6 | use CirrusSearch\Hooks\CirrusSearchAddQueryFeaturesHook; |
| 7 | use CirrusSearch\Hooks\CirrusSearchAnalysisConfigHook; |
| 8 | use CirrusSearch\Hooks\CirrusSearchSimilarityConfigHook; |
| 9 | use CirrusSearch\Maintenance\AnalysisConfigBuilder; |
| 10 | use CirrusSearch\Query\ArticlePredictionKeyword; |
| 11 | use CirrusSearch\Query\HasRecommendationFeature; |
| 12 | use CirrusSearch\SearchConfig; |
| 13 | use MediaWiki\Config\ConfigFactory; |
| 14 | use MediaWiki\Search\Hook\SearchIndexFieldsHook; |
| 15 | |
| 16 | /** |
| 17 | * Functionality related to the weighted_tags search feature. |
| 18 | * @package CirrusSearch\Search |
| 19 | * @see ArticlePredictionKeyword |
| 20 | */ |
| 21 | class WeightedTagsHooks implements |
| 22 | SearchIndexFieldsHook, |
| 23 | CirrusSearchAddQueryFeaturesHook, |
| 24 | CirrusSearchAnalysisConfigHook, |
| 25 | CirrusSearchSimilarityConfigHook |
| 26 | { |
| 27 | public const FIELD_NAME = 'weighted_tags'; |
| 28 | public const FIELD_SIMILARITY = 'weighted_tags_similarity'; |
| 29 | public const FIELD_INDEX_ANALYZER = 'weighted_tags'; |
| 30 | public const FIELD_SEARCH_ANALYZER = 'keyword'; |
| 31 | private SearchConfig $config; |
| 32 | |
| 33 | public static function create( ConfigFactory $configFactory ): WeightedTagsHooks { |
| 34 | /** @var SearchConfig $searchConfig */ |
| 35 | $searchConfig = $configFactory->makeConfig( 'CirrusSearch' ); |
| 36 | /** @phan-suppress-next-line PhanTypeMismatchArgumentSuperType $searchConfig is actually a SearchConfig */ |
| 37 | return new self( $searchConfig ); |
| 38 | } |
| 39 | |
| 40 | public function __construct( SearchConfig $config ) { |
| 41 | $this->config = $config; |
| 42 | } |
| 43 | |
| 44 | /** |
| 45 | * Visible for testing |
| 46 | * @return SearchConfig |
| 47 | */ |
| 48 | public function getConfig(): SearchConfig { |
| 49 | return $this->config; |
| 50 | } |
| 51 | |
| 52 | /** |
| 53 | * @inheritDoc |
| 54 | */ |
| 55 | public function onCirrusSearchSimilarityConfig( array &$similarity ): void { |
| 56 | if ( !$this->canBuild() ) { |
| 57 | return; |
| 58 | } |
| 59 | $maxScore = $this->maxScore(); |
| 60 | $similarity[self::FIELD_SIMILARITY] = [ |
| 61 | 'type' => 'scripted', |
| 62 | // no weight=>' script we do not want doc independent weighing |
| 63 | 'script' => [ |
| 64 | // apply boost close to docFreq to force int->float conversion |
| 65 | 'source' => "return (doc.freq*query.boost)/$maxScore;", |
| 66 | ], |
| 67 | ]; |
| 68 | } |
| 69 | |
| 70 | /** |
| 71 | * @inheritDoc |
| 72 | */ |
| 73 | public function onSearchIndexFields( &$fields, $engine ) { |
| 74 | if ( !( $engine instanceof CirrusSearch ) ) { |
| 75 | return; |
| 76 | } |
| 77 | if ( !$this->canBuild() ) { |
| 78 | return; |
| 79 | } |
| 80 | |
| 81 | $fields[self::FIELD_NAME] = new WeightedTags( |
| 82 | self::FIELD_NAME, |
| 83 | self::FIELD_NAME, |
| 84 | self::FIELD_INDEX_ANALYZER, |
| 85 | self::FIELD_SEARCH_ANALYZER, |
| 86 | self::FIELD_SIMILARITY |
| 87 | ); |
| 88 | } |
| 89 | |
| 90 | /** |
| 91 | * @inheritDoc |
| 92 | */ |
| 93 | public function onCirrusSearchAnalysisConfig( array &$analysisConfig, AnalysisConfigBuilder $analysisConfigBuilder ): void { |
| 94 | if ( !$this->canBuild() ) { |
| 95 | return; |
| 96 | } |
| 97 | $maxScore = $this->maxScore(); |
| 98 | $analysisConfig['analyzer'][self::FIELD_INDEX_ANALYZER] = [ |
| 99 | 'type' => 'custom', |
| 100 | 'tokenizer' => 'keyword', |
| 101 | 'filter' => [ |
| 102 | 'weighted_tags_term_freq', |
| 103 | ], |
| 104 | ]; |
| 105 | $analysisConfig['filter']['weighted_tags_term_freq'] = [ |
| 106 | 'type' => 'term_freq', |
| 107 | // must be a char that never appears in the topic names/ids |
| 108 | 'split_char' => '|', |
| 109 | // max score (clamped), we assume that orig_score * 1000 |
| 110 | 'max_tf' => $maxScore, |
| 111 | ]; |
| 112 | } |
| 113 | |
| 114 | /** |
| 115 | * @inheritDoc |
| 116 | */ |
| 117 | public function onCirrusSearchAddQueryFeatures( SearchConfig $config, array &$extraFeatures ): void { |
| 118 | if ( $this->canUse() ) { |
| 119 | // articletopic keyword, matches by ORES scores |
| 120 | $extraFeatures[] = new ArticlePredictionKeyword(); |
| 121 | // article recommendations filter |
| 122 | $extraFeatures[] = new HasRecommendationFeature( $this->maxScore() ); |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | /** |
| 127 | * Check whether weighted_tags data should be processed. |
| 128 | * @return bool |
| 129 | */ |
| 130 | private function canBuild(): bool { |
| 131 | return (bool)( $this->config->get( 'CirrusSearchWeightedTags' )['build'] ?? false ); |
| 132 | } |
| 133 | |
| 134 | /** |
| 135 | * Check whether weighted_tags data is available for searching. |
| 136 | * @return bool |
| 137 | */ |
| 138 | private function canUse(): bool { |
| 139 | return (bool)( $this->config->get( 'CirrusSearchWeightedTags' )['use'] ?? false ); |
| 140 | } |
| 141 | |
| 142 | private function maxScore(): int { |
| 143 | return (int)( $this->config->get( 'CirrusSearchWeightedTags' )['max_score'] ?? 1000 ); |
| 144 | } |
| 145 | } |