Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 110
0.00% covered (danger)
0.00%
0 / 1
CRAP
n/a
0 / 0
Wikibase\MediaInfo\Search\closureToAnonymousClass
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace Wikibase\MediaInfo\Search;
4
5use CirrusSearch\Parser\FullTextKeywordRegistry;
6use CirrusSearch\SearchConfig;
7use MediaWiki\MediaWikiServices;
8use RequestContext;
9use UnexpectedValueException;
10use Wikibase\Repo\WikibaseRepo;
11
12if ( !function_exists( 'Wikibase\MediaInfo\Search\closureToAnonymousClass' ) ) {
13    /**
14     * This is a bit of a hack, turning a closure (or any callable) into
15     * an invokable anonymous function.
16     * It'll execute the callable just the same, but it also has a
17     * __toString, which will allow ApiResult::validateValue (used in
18     * ConfigDump to format this) to process the closure (which it is
19     * otherwise unable to do)
20     *
21     * @param callable $callable
22     * @return callable
23     */
24    function closureToAnonymousClass( callable $callable ) {
25        return new class ( $callable ) {
26            /** @var callable $callable */
27            private $callable;
28
29            public function __construct( callable $callable ) {
30                $this->callable = $callable;
31            }
32
33            public function __invoke() {
34                return call_user_func_array( $this->callable, func_get_args() );
35            }
36
37            public function __toString() {
38                return self::class;
39            }
40        };
41    }
42}
43
44$mwServices = MediaWikiServices::getInstance();
45$config = $mwServices->getMainConfig();
46$profiles = $config->get( 'MediaInfoMediaSearchProfiles' ) ?: [];
47
48return array_map( static function ( array $settings ) use ( $config ) {
49    // parse default settings into profile settings, to ensure all expected
50    // settings have some (default) value if not explicitly specified
51    $defaultSettings = [
52        'boost' => [
53            'statement' => array_fill_keys( array_values( $config->get( 'MediaInfoProperties' ) ), 1 ),
54            'weighted_tags' => [],
55            'descriptions.$language' => 1,
56            'descriptions.$language.plain' => 1,
57            'title' => 1,
58            'title.plain' => 1,
59            'category' => 1,
60            'category.plain' => 1,
61            'heading' => 1,
62            'heading.plain' => 1,
63            'auxiliary_text' => 1,
64            'auxiliary_text.plain' => 1,
65            'file_text' => 1,
66            'file_text.plain' => 1,
67            'redirect.title' => 1,
68            'redirect.title.plain' => 1,
69            'text' => 1,
70            'text.plain' => 1,
71            'suggest' => 1,
72        ],
73        'decay' => [
74            'descriptions.$language' => 0.9,
75            'descriptions.$language.plain' => 0.9,
76            // below is not actually a field
77            'synonyms' => 0.5,
78        ],
79        'entitiesVariableBoost' => true,
80        'normalizeMultiClauseScores' => false,
81        'applyLogisticFunction' => false,
82        'useSynonyms' => false,
83        'logisticRegressionIntercept' => 0,
84        'entitySearchBaseUri' => $config->get( 'MediaInfoExternalEntitySearchBaseUri' ),
85        'titleMatchBaseUri' => $config->get( 'MediaInfoMediaSearchTitleMatchBaseUri' ),
86        'synonymsMaxAmount' => 5,
87        'synonymsMinScoreThreshold' => 0.5,
88        'synonymsMinByteLength' => 2,
89        'synonymsMinSimilarityToCanonicalForm' => 0.75,
90        'synonymsMinDifferenceFromOthers' => 0.25,
91        'nearMatchBoost' => 3.0,
92    ];
93    $settings = array_replace_recursive( $defaultSettings, $settings );
94
95    // allow settings (boost etc.) to be customized from URL query params
96    foreach ( RequestContext::getMain()->getRequest()->getQueryValues() as $key => $value ) {
97        // convert [ 'one:two' => 'three' ] into ['one']['two'] = 'three'
98        $flat = array_merge( explode( ':', $key ), [ floatval( $value ) ] );
99        $result = array_reduce(
100            array_reverse( $flat ),
101            static function ( $previous, $key ) {
102                return $previous !== null ? [ $key => $previous ] : $key;
103            },
104            null
105        );
106        $settings = array_replace_recursive( $settings, $result );
107    }
108
109    // work around '.' being replaced by '_'
110    if ( isset( $settings['boost']['redirect_title'] ) ) {
111        $settings['boost']['redirect.title'] = $settings['boost']['redirect_title'];
112        unset( $settings['boost']['redirect_title'] );
113    }
114
115    return [
116        'builder_factory' => closureToAnonymousClass( static function ( array $settings ) {
117            $languageCode = WikibaseRepo::getUserLanguage()->getCode();
118            $languageFallbackChain = WikibaseRepo::getLanguageFallbackChainFactory()
119                ->newFromLanguageCode( $languageCode );
120
121            $mwServices = MediaWikiServices::getInstance();
122            $config = $mwServices->getMainConfig();
123            $configFactory = $mwServices->getConfigFactory();
124            $searchConfig = $configFactory->makeConfig( 'CirrusSearch' );
125            if ( !$searchConfig instanceof SearchConfig ) {
126                throw new UnexpectedValueException( 'CirrusSearch config must be instanceof SearchConfig' );
127            }
128            $features = ( new FullTextKeywordRegistry( $searchConfig ) )->getKeywords();
129
130            $languages = array_merge( [ $languageCode ], $languageFallbackChain->getFetchLanguageCodes() );
131            $languages = array_unique( $languages );
132            $entitySearchBaseUri = sprintf( $settings[ 'entitySearchBaseUri' ], $languageCode );
133            $titleMatchBaseUri = sprintf( $settings[ 'titleMatchBaseUri' ], $languageCode );
134
135            $entitiesFetcher = new MediaSearchMemoryEntitiesFetcher(
136                new MediaSearchCachingEntitiesFetcher(
137                    new MediaSearchEntitiesFetcher(
138                        $mwServices->getHttpRequestFactory()->createMultiClient(),
139                        $entitySearchBaseUri,
140                        $titleMatchBaseUri,
141                        $languageCode,
142                        $config->get( 'LanguageCode' )
143                    ),
144                    $mwServices->getMainWANObjectCache(),
145                    $languageCode,
146                    $config->get( 'LanguageCode' ),
147                    $entitySearchBaseUri . '-' . $titleMatchBaseUri
148                )
149            );
150
151            return new MediaSearchQueryBuilder(
152                $features,
153                new MediaSearchASTQueryBuilder(
154                    new MediaSearchASTEntitiesExtractor( $entitiesFetcher ),
155                    $configFactory->makeConfig( 'WikibaseCirrusSearch' )->get( 'UseStemming' ),
156                    $languages,
157                    $config->get( 'LanguageCode' ),
158                    $settings
159                )
160            );
161        } ),
162        'settings' => $settings,
163    ];
164}, $profiles );