Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 110 |
|
0.00% |
0 / 1 |
CRAP | n/a |
0 / 0 |
|
Wikibase\MediaInfo\Search\closureToAnonymousClass | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace Wikibase\MediaInfo\Search; |
4 | |
5 | use CirrusSearch\Parser\FullTextKeywordRegistry; |
6 | use CirrusSearch\SearchConfig; |
7 | use MediaWiki\MediaWikiServices; |
8 | use RequestContext; |
9 | use UnexpectedValueException; |
10 | use Wikibase\Repo\WikibaseRepo; |
11 | |
12 | if ( !function_exists( 'Wikibase\MediaInfo\Search\closureToAnonymousClass' ) ) { |
13 | /** |
14 | * This is a bit of a hack, turning a closure (or any callable) into |
15 | * an invokable anonymous function. |
16 | * It'll execute the callable just the same, but it also has a |
17 | * __toString, which will allow ApiResult::validateValue (used in |
18 | * ConfigDump to format this) to process the closure (which it is |
19 | * otherwise unable to do) |
20 | * |
21 | * @param callable $callable |
22 | * @return callable |
23 | */ |
24 | function closureToAnonymousClass( callable $callable ) { |
25 | return new class ( $callable ) { |
26 | /** @var callable $callable */ |
27 | private $callable; |
28 | |
29 | public function __construct( callable $callable ) { |
30 | $this->callable = $callable; |
31 | } |
32 | |
33 | public function __invoke() { |
34 | return call_user_func_array( $this->callable, func_get_args() ); |
35 | } |
36 | |
37 | public function __toString() { |
38 | return self::class; |
39 | } |
40 | }; |
41 | } |
42 | } |
43 | |
44 | $mwServices = MediaWikiServices::getInstance(); |
45 | $config = $mwServices->getMainConfig(); |
46 | $profiles = $config->get( 'MediaInfoMediaSearchProfiles' ) ?: []; |
47 | |
48 | return array_map( static function ( array $settings ) use ( $config ) { |
49 | // parse default settings into profile settings, to ensure all expected |
50 | // settings have some (default) value if not explicitly specified |
51 | $defaultSettings = [ |
52 | 'boost' => [ |
53 | 'statement' => array_fill_keys( array_values( $config->get( 'MediaInfoProperties' ) ), 1 ), |
54 | 'weighted_tags' => [], |
55 | 'descriptions.$language' => 1, |
56 | 'descriptions.$language.plain' => 1, |
57 | 'title' => 1, |
58 | 'title.plain' => 1, |
59 | 'category' => 1, |
60 | 'category.plain' => 1, |
61 | 'heading' => 1, |
62 | 'heading.plain' => 1, |
63 | 'auxiliary_text' => 1, |
64 | 'auxiliary_text.plain' => 1, |
65 | 'file_text' => 1, |
66 | 'file_text.plain' => 1, |
67 | 'redirect.title' => 1, |
68 | 'redirect.title.plain' => 1, |
69 | 'text' => 1, |
70 | 'text.plain' => 1, |
71 | 'suggest' => 1, |
72 | ], |
73 | 'decay' => [ |
74 | 'descriptions.$language' => 0.9, |
75 | 'descriptions.$language.plain' => 0.9, |
76 | // below is not actually a field |
77 | 'synonyms' => 0.5, |
78 | ], |
79 | 'entitiesVariableBoost' => true, |
80 | 'normalizeMultiClauseScores' => false, |
81 | 'applyLogisticFunction' => false, |
82 | 'useSynonyms' => false, |
83 | 'logisticRegressionIntercept' => 0, |
84 | 'entitySearchBaseUri' => $config->get( 'MediaInfoExternalEntitySearchBaseUri' ), |
85 | 'titleMatchBaseUri' => $config->get( 'MediaInfoMediaSearchTitleMatchBaseUri' ), |
86 | 'synonymsMaxAmount' => 5, |
87 | 'synonymsMinScoreThreshold' => 0.5, |
88 | 'synonymsMinByteLength' => 2, |
89 | 'synonymsMinSimilarityToCanonicalForm' => 0.75, |
90 | 'synonymsMinDifferenceFromOthers' => 0.25, |
91 | 'nearMatchBoost' => 3.0, |
92 | ]; |
93 | $settings = array_replace_recursive( $defaultSettings, $settings ); |
94 | |
95 | // allow settings (boost etc.) to be customized from URL query params |
96 | foreach ( RequestContext::getMain()->getRequest()->getQueryValues() as $key => $value ) { |
97 | // convert [ 'one:two' => 'three' ] into ['one']['two'] = 'three' |
98 | $flat = array_merge( explode( ':', $key ), [ floatval( $value ) ] ); |
99 | $result = array_reduce( |
100 | array_reverse( $flat ), |
101 | static function ( $previous, $key ) { |
102 | return $previous !== null ? [ $key => $previous ] : $key; |
103 | }, |
104 | null |
105 | ); |
106 | $settings = array_replace_recursive( $settings, $result ); |
107 | } |
108 | |
109 | // work around '.' being replaced by '_' |
110 | if ( isset( $settings['boost']['redirect_title'] ) ) { |
111 | $settings['boost']['redirect.title'] = $settings['boost']['redirect_title']; |
112 | unset( $settings['boost']['redirect_title'] ); |
113 | } |
114 | |
115 | return [ |
116 | 'builder_factory' => closureToAnonymousClass( static function ( array $settings ) { |
117 | $languageCode = WikibaseRepo::getUserLanguage()->getCode(); |
118 | $languageFallbackChain = WikibaseRepo::getLanguageFallbackChainFactory() |
119 | ->newFromLanguageCode( $languageCode ); |
120 | |
121 | $mwServices = MediaWikiServices::getInstance(); |
122 | $config = $mwServices->getMainConfig(); |
123 | $configFactory = $mwServices->getConfigFactory(); |
124 | $searchConfig = $configFactory->makeConfig( 'CirrusSearch' ); |
125 | if ( !$searchConfig instanceof SearchConfig ) { |
126 | throw new UnexpectedValueException( 'CirrusSearch config must be instanceof SearchConfig' ); |
127 | } |
128 | $features = ( new FullTextKeywordRegistry( $searchConfig ) )->getKeywords(); |
129 | |
130 | $languages = array_merge( [ $languageCode ], $languageFallbackChain->getFetchLanguageCodes() ); |
131 | $languages = array_unique( $languages ); |
132 | $entitySearchBaseUri = sprintf( $settings[ 'entitySearchBaseUri' ], $languageCode ); |
133 | $titleMatchBaseUri = sprintf( $settings[ 'titleMatchBaseUri' ], $languageCode ); |
134 | |
135 | $entitiesFetcher = new MediaSearchMemoryEntitiesFetcher( |
136 | new MediaSearchCachingEntitiesFetcher( |
137 | new MediaSearchEntitiesFetcher( |
138 | $mwServices->getHttpRequestFactory()->createMultiClient(), |
139 | $entitySearchBaseUri, |
140 | $titleMatchBaseUri, |
141 | $languageCode, |
142 | $config->get( 'LanguageCode' ) |
143 | ), |
144 | $mwServices->getMainWANObjectCache(), |
145 | $languageCode, |
146 | $config->get( 'LanguageCode' ), |
147 | $entitySearchBaseUri . '-' . $titleMatchBaseUri |
148 | ) |
149 | ); |
150 | |
151 | return new MediaSearchQueryBuilder( |
152 | $features, |
153 | new MediaSearchASTQueryBuilder( |
154 | new MediaSearchASTEntitiesExtractor( $entitiesFetcher ), |
155 | $configFactory->makeConfig( 'WikibaseCirrusSearch' )->get( 'UseStemming' ), |
156 | $languages, |
157 | $config->get( 'LanguageCode' ), |
158 | $settings |
159 | ) |
160 | ); |
161 | } ), |
162 | 'settings' => $settings, |
163 | ]; |
164 | }, $profiles ); |