Code Coverage
 
Classes and Traits
Functions and Methods
Lines
Total
0.00% covered (danger)
0.00%
0 / 1
70.00% covered (warning)
70.00%
7 / 10
CRAP
77.55% covered (warning)
77.55%
38 / 49
WeightedTagsHooks
0.00% covered (danger)
0.00%
0 / 1
70.00% covered (warning)
70.00%
7 / 10
17.55
77.55% covered (warning)
77.55%
38 / 49
 onCirrusSearchSimilarityConfig
0.00% covered (danger)
0.00%
0 / 1
2
0.00% covered (danger)
0.00%
0 / 3
 configureWeightedTagsSimilarity
100.00% covered (success)
100.00%
1 / 1
2
100.00% covered (success)
100.00%
7 / 7
 onSearchIndexFields
0.00% covered (danger)
0.00%
0 / 1
6
0.00% covered (danger)
0.00%
0 / 5
 configureWeightedTagsFieldMapping
100.00% covered (success)
100.00%
1 / 1
2
100.00% covered (success)
100.00%
9 / 9
 onCirrusSearchAnalysisConfig
0.00% covered (danger)
0.00%
0 / 1
2
0.00% covered (danger)
0.00%
0 / 3
 onCirrusSearchAddQueryFeatures
100.00% covered (success)
100.00%
1 / 1
2
100.00% covered (success)
100.00%
4 / 4
 configureWeightedTagsFieldAnalysis
100.00% covered (success)
100.00%
1 / 1
2
100.00% covered (success)
100.00%
9 / 9
 canBuild
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
3 / 3
 canUse
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
3 / 3
 maxScore
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
3 / 3
<?php
namespace CirrusSearch\Wikimedia;
use CirrusSearch\CirrusSearch;
use CirrusSearch\Maintenance\AnalysisConfigBuilder;
use CirrusSearch\Query\ArticleTopicFeature;
use CirrusSearch\Query\HasRecommendationFeature;
use CirrusSearch\SearchConfig;
use Config;
use MediaWiki\MediaWikiServices;
use SearchEngine;
/**
 * Functionality related to the (Wikimedia-specific) weighted_tags search feature.
 * @package CirrusSearch\Wikimedia
 * @see ArticleTopicFeature
 */
class WeightedTagsHooks {
    public const FIELD_NAME = 'weighted_tags';
    public const FIELD_SIMILARITY = 'weighted_tags_similarity';
    public const FIELD_INDEX_ANALYZER = 'weighted_tags';
    public const FIELD_SEARCH_ANALYZER = 'keyword';
    public const WMF_EXTRA_FEATURES = 'CirrusSearchWMFExtraFeatures';
    public const CONFIG_OPTIONS = 'weighted_tags';
    public const BUILD_OPTION = 'build';
    public const USE_OPTION = 'use';
    public const MAX_SCORE_OPTION = 'max_score';
    /**
     * Configure the similarity needed for the article topics field
     * @param array &$similarity similarity settings to update
     * @see https://www.mediawiki.org/wiki/Extension:CirrusSearch/Hooks/CirrusSearchSimilarityConfig
     */
    public static function onCirrusSearchSimilarityConfig( array &$similarity ) {
        self::configureWeightedTagsSimilarity( $similarity,
            MediaWikiServices::getInstance()->getMainConfig() );
    }
    /**
     * Visible for testing.
     * @param array &$similarity similarity settings to update
     * @param Config $config current configuration
     */
    public static function configureWeightedTagsSimilarity(
        array &$similarity,
        Config $config
    ) {
        if ( !self::canBuild( $config ) ) {
            return;
        }
        $maxScore = self::maxScore( $config );
        $similarity[self::FIELD_SIMILARITY] = [
            'type' => 'scripted',
            // no weight=>' script we do not want doc independent weighing
            'script' => [
                // apply boost close to docFreq to force int->float conversion
                'source' => "return (doc.freq*query.boost)/$maxScore;"
            ]
        ];
    }
    /**
     * Define mapping for the weighted_tags field.
     * @param array &$fields array of field definitions to update
     * @param SearchEngine $engine the search engine requesting field definitions
     * @see https://www.mediawiki.org/wiki/Manual:Hooks/SearchIndexFields
     */
    public static function onSearchIndexFields( array &$fields, SearchEngine $engine ) {
        if ( !( $engine instanceof CirrusSearch ) ) {
            return;
        }
        self::configureWeightedTagsFieldMapping( $fields,
            MediaWikiServices::getInstance()->getMainConfig() );
    }
    /**
     * Visible for testing
     * @param \SearchIndexField[] &$fields array of field definitions to update
     * @param Config $config the wiki configuration
     */
    public static function configureWeightedTagsFieldMapping(
        array &$fields,
        Config $config
    ) {
        if ( !self::canBuild( $config ) ) {
            return;
        }
        $fields[self::FIELD_NAME] = new WeightedTags(
            self::FIELD_NAME,
            self::FIELD_NAME,
            self::FIELD_INDEX_ANALYZER,
            self::FIELD_SEARCH_ANALYZER,
            self::FIELD_SIMILARITY
        );
    }
    /**
     * Configure default analyzer for the weighted_tags field.
     * @param array &$config analysis settings to update
     * @param AnalysisConfigBuilder $analysisConfigBuilder unneeded
     * @see https://www.mediawiki.org/wiki/Extension:CirrusSearch/Hooks/CirrusSearchAnalysisConfig
     */
    public static function onCirrusSearchAnalysisConfig( array &$config, AnalysisConfigBuilder $analysisConfigBuilder ) {
        self::configureWeightedTagsFieldAnalysis( $config,
            MediaWikiServices::getInstance()->getMainConfig() );
    }
    /**
     * Make weighted_tags search features available
     * @param SearchConfig $config
     * @param array &$extraFeatures Array holding KeywordFeature objects
     * @see ArticleTopicFeature
     */
    public static function onCirrusSearchAddQueryFeatures( SearchConfig $config, array &$extraFeatures ) {
        if ( self::canUse( $config ) ) {
            // articletopic keyword, matches by ORES topic scores
            $extraFeatures[] = new ArticleTopicFeature();
            // article recommendations filter
            $extraFeatures[] = new HasRecommendationFeature();
        }
    }
    /**
     * Visible only for testing
     * @param array &$analysisConfig panalysis settings to update
     * @param Config $config the wiki configuration
     * @internal
     */
    public static function configureWeightedTagsFieldAnalysis(
        array &$analysisConfig,
        Config $config
    ) {
        if ( !self::canBuild( $config ) ) {
            return;
        }
        $maxScore = self::maxScore( $config );
        $analysisConfig['analyzer'][self::FIELD_INDEX_ANALYZER] = [
            'type' => 'custom',
            'tokenizer' => 'keyword',
            'filter' => [
                'weighted_tags_term_freq',
            ]
        ];
        $analysisConfig['filter']['weighted_tags_term_freq'] = [
            'type' => 'term_freq',
            // must be a char that never appears in the topic names/ids
            'split_char' => '|',
            // max score (clamped), we assume that orig_score * 1000
            'max_tf' => $maxScore,
        ];
    }
    /**
     * Check whether weighted_tags data should be processed.
     * @param Config $config
     * @return bool
     */
    private static function canBuild( Config $config ): bool {
        $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES );
        $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? [];
        return (bool)( $weightedTagsOptions[self::BUILD_OPTION] ?? false );
    }
    /**
     * Check whether weighted_tags data is available for searching.
     * @param Config $config
     * @return bool
     */
    private static function canUse( Config $config ): bool {
        $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES );
        $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? [];
        return (bool)( $weightedTagsOptions[self::USE_OPTION] ?? false );
    }
    private static function maxScore( Config $config ): int {
        $extraFeatures = $config->get( self::WMF_EXTRA_FEATURES );
        $weightedTagsOptions = $extraFeatures[self::CONFIG_OPTIONS] ?? [];
        return (int)( $weightedTagsOptions[self::MAX_SCORE_OPTION] ?? 1000 );
    }
}