Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
91.67% |
66 / 72 |
|
50.00% |
2 / 4 |
CRAP | |
0.00% |
0 / 1 |
ApiQueryZObjectLabels | |
91.67% |
66 / 72 |
|
50.00% |
2 / 4 |
21.26 | |
0.00% |
0 / 1 |
__construct | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
execute | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
executeGenerator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
run | |
92.19% |
59 / 64 |
|
0.00% |
0 / 1 |
15.11 | |||
getAllowedParams | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
getMatchRate | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
getExamplesMessages | n/a |
0 / 0 |
n/a |
0 / 0 |
1 |
1 | <?php |
2 | /** |
3 | * WikiLambda ZObject labels helper for the query API |
4 | * |
5 | * @file |
6 | * @ingroup Extensions |
7 | * @copyright 2020– Abstract Wikipedia team; see AUTHORS.txt |
8 | * @license MIT |
9 | */ |
10 | |
11 | namespace MediaWiki\Extension\WikiLambda\ActionAPI; |
12 | |
13 | use ApiBase; |
14 | use ApiPageSet; |
15 | use ApiQueryGeneratorBase; |
16 | use MediaWiki\Extension\WikiLambda\WikiLambdaServices; |
17 | use MediaWiki\Extension\WikiLambda\ZObjectUtils; |
18 | use MediaWiki\MediaWikiServices; |
19 | use MediaWiki\Title\Title; |
20 | use Wikimedia\ParamValidator\ParamValidator; |
21 | use Wikimedia\ParamValidator\TypeDef\IntegerDef; |
22 | |
23 | class ApiQueryZObjectLabels extends ApiQueryGeneratorBase { |
24 | |
25 | /** |
26 | * @inheritDoc |
27 | * @codeCoverageIgnore |
28 | */ |
29 | public function __construct( $query, $moduleName ) { |
30 | parent::__construct( $query, $moduleName, 'wikilambdasearch_' ); |
31 | } |
32 | |
33 | /** |
34 | * @inheritDoc |
35 | */ |
36 | public function execute() { |
37 | // (T362271) Emit appropriate cache headers for a 24 hour TTL |
38 | // NOTE (T362273): MediaWiki out-guesses us and assumes we don't know what we're doing; to fix so it works |
39 | $this->getMain()->setCacheMode( 'public' ); |
40 | $this->getMain()->setCacheMaxAge( 60 * 60 * 24 ); |
41 | |
42 | $this->run(); |
43 | } |
44 | |
45 | /** |
46 | * @inheritDoc |
47 | */ |
48 | public function executeGenerator( $resultPageSet ) { |
49 | $this->run( $resultPageSet ); |
50 | } |
51 | |
52 | /** |
53 | * @param ApiPageSet|null $resultPageSet |
54 | */ |
55 | private function run( $resultPageSet = null ) { |
56 | [ |
57 | 'search' => $searchTerm, |
58 | 'type' => $type, |
59 | 'exact' => $exact, |
60 | 'language' => $language, |
61 | 'return_type' => $returnType, |
62 | 'strict_return_type' => $strictReturnType, |
63 | 'limit' => $limit, |
64 | 'continue' => $continue, |
65 | ] = $this->extractRequestParams(); |
66 | |
67 | // TODO (T348545): We can reduce this control limit to 100 when we have |
68 | // have a system to return results already pre-ranked from the DB. |
69 | $controlLimit = 5000; |
70 | |
71 | $zObjectStore = WikiLambdaServices::getZObjectStore(); |
72 | $res = $zObjectStore->searchZObjectLabels( |
73 | $searchTerm, |
74 | $exact, |
75 | [], |
76 | $type, |
77 | $returnType, |
78 | $strictReturnType, |
79 | null, |
80 | $controlLimit |
81 | ); |
82 | |
83 | // 1. Set match_rate for every entry and eliminate duplicates with lower match rates |
84 | // TODO (T349583): Improve this result sorting algorithm; e.g. should we prioritize matches with primary labels? |
85 | $matches = []; |
86 | $hasSearchTerm = ( $searchTerm !== '' ); |
87 | $matchField = ZObjectUtils::isValidZObjectReference( $searchTerm ) ? 'wlzl_zobject_zid' : 'wlzl_label'; |
88 | |
89 | foreach ( $res as $row ) { |
90 | $matchRate = $hasSearchTerm ? self::getMatchRate( $searchTerm, $row->{ $matchField } ) : 0; |
91 | |
92 | // If the current row is new or a better match, keep. Else, ignore. |
93 | if ( !array_key_exists( $row->wlzl_zobject_zid, $matches ) || |
94 | ( $matches[ $row->wlzl_zobject_zid ][ 'match_rate' ] < $matchRate ) ) { |
95 | $matches[ $row->wlzl_zobject_zid ] = [ |
96 | // TODO (T338248): Implement, otherwise the generator won't work. |
97 | 'page_id' => 0, |
98 | // TODO (T258915): When we support redirects, implement. |
99 | 'page_is_redirect' => false, |
100 | 'page_namespace' => NS_MAIN, |
101 | 'page_content_model' => CONTENT_MODEL_ZOBJECT, |
102 | 'page_title' => $row->wlzl_zobject_zid, |
103 | 'page_type' => $row->wlzl_type, |
104 | 'return_type' => $row->wlzl_return_type, |
105 | 'match_label' => $hasSearchTerm ? $row->{ $matchField } : null, |
106 | 'match_is_primary' => $hasSearchTerm ? $row->wlzl_label_primary : null, |
107 | 'match_lang' => $hasSearchTerm ? $row->wlzl_language : null, |
108 | 'match_rate' => $matchRate, |
109 | // Labels in the user language will be set after selecting the page |
110 | 'label' => null, |
111 | 'type_label' => null, |
112 | ]; |
113 | } |
114 | } |
115 | |
116 | // 2. Sort all results by match_rate to get best hits |
117 | usort( $matches, static function ( $a, $b ) { |
118 | return $b[ 'match_rate' ] <=> $a[ 'match_rate' ]; |
119 | } ); |
120 | |
121 | // 3. Prune the result set to the limit, slice to requested page, and set continue |
122 | $continue = $continue === null ? 0 : intval( $continue ); |
123 | $hits = array_slice( $matches, $continue * $limit, $limit ); |
124 | $pageSize = count( $matches ) - ( $continue * $limit ); |
125 | if ( $pageSize > $limit ) { |
126 | $this->setContinueEnumParameter( 'continue', strval( $continue + 1 ) ); |
127 | } |
128 | |
129 | // 4. Add relevant user language labels to each hit: This will be the main |
130 | // name shown in the selector, while the match_label set above will be used |
131 | // as supporting text when the search text has matched an alias or a label in |
132 | // a different language. |
133 | foreach ( $hits as $index => $hit ) { |
134 | $hits[ $index ][ 'label' ] = $zObjectStore->fetchZObjectLabel( $hit[ 'page_title' ], $language ); |
135 | $hits[ $index ][ 'type_label' ] = $zObjectStore->fetchZObjectLabel( $hit[ 'page_type' ], $language ); |
136 | } |
137 | |
138 | if ( $resultPageSet ) { |
139 | // TODO (T362192): This needs to be an IResultWrapper, not an array of assoc. objects, irritatingly. |
140 | // $resultPageSet->populateFromQueryResult( $dbr, $hits ); |
141 | foreach ( $hits as $index => $entry ) { |
142 | $resultPageSet->setGeneratorData( |
143 | Title::makeTitle( $entry['page_namespace'], $entry['page_title'] ), |
144 | [ 'index' => $index + $continue + 1 ] |
145 | ); |
146 | } |
147 | } else { |
148 | $result = $this->getResult(); |
149 | foreach ( $hits as $entry ) { |
150 | $result->addValue( [ 'query', $this->getModuleName() ], null, $entry ); |
151 | } |
152 | } |
153 | } |
154 | |
155 | /** |
156 | * @inheritDoc |
157 | * @codeCoverageIgnore |
158 | */ |
159 | protected function getAllowedParams(): array { |
160 | return [ |
161 | 'search' => [ |
162 | ParamValidator::PARAM_TYPE => 'string', |
163 | ParamValidator::PARAM_DEFAULT => '', |
164 | ], |
165 | 'language' => [ |
166 | ParamValidator::PARAM_TYPE => array_keys( |
167 | // TODO (T330033): Consider injecting this service rather than just fetching from main |
168 | MediaWikiServices::getInstance()->getLanguageNameUtils()->getLanguageNames() |
169 | ), |
170 | ParamValidator::PARAM_REQUIRED => true, |
171 | ], |
172 | // This is the wrong way around logically, but MediaWiki's Action API doesn't allow for |
173 | // default-true boolean flags to ever be set false. |
174 | 'nofallback' => [ |
175 | ParamValidator::PARAM_TYPE => 'boolean', |
176 | ParamValidator::PARAM_DEFAULT => false, |
177 | ], |
178 | 'exact' => [ |
179 | ParamValidator::PARAM_TYPE => 'boolean', |
180 | ParamValidator::PARAM_DEFAULT => false, |
181 | ], |
182 | 'type' => [ |
183 | ParamValidator::PARAM_TYPE => 'string', |
184 | ], |
185 | 'return_type' => [ |
186 | ParamValidator::PARAM_TYPE => 'string', |
187 | ], |
188 | 'strict_return_type' => [ |
189 | ParamValidator::PARAM_TYPE => 'boolean', |
190 | ParamValidator::PARAM_DEFAULT => false, |
191 | ], |
192 | 'limit' => [ |
193 | ParamValidator::PARAM_TYPE => 'limit', |
194 | ParamValidator::PARAM_DEFAULT => 10, |
195 | IntegerDef::PARAM_MIN => 1, |
196 | IntegerDef::PARAM_MAX => ApiBase::LIMIT_BIG1, |
197 | IntegerDef::PARAM_MAX2 => ApiBase::LIMIT_BIG2, |
198 | ], |
199 | 'continue' => [ |
200 | ApiBase::PARAM_HELP_MSG => 'api-help-param-continue', |
201 | ], |
202 | ]; |
203 | } |
204 | |
205 | /** |
206 | * @param string $substring |
207 | * @param string $hit |
208 | * @return float |
209 | */ |
210 | private static function getMatchRate( $substring, $hit ) { |
211 | $distance = levenshtein( $substring, $hit ); |
212 | $max = max( strlen( $substring ), strlen( $hit ) ); |
213 | $percentage = ( $max - $distance ) / $max; |
214 | return $percentage; |
215 | } |
216 | |
217 | /** |
218 | * @see ApiBase::getExamplesMessages() |
219 | * @return array |
220 | * @codeCoverageIgnore |
221 | */ |
222 | protected function getExamplesMessages() { |
223 | return [ |
224 | 'action=query&list=wikilambdasearch_labels&wikilambdasearch_search=foo&wikilambdasearch_language=en' |
225 | => 'apihelp-query+wikilambda-example-simple', |
226 | 'action=query&list=wikilambdasearch_labels&wikilambdasearch_search=foo&wikilambdasearch_language=fr' |
227 | . '&wikilambdasearch_nofallback=true' |
228 | => 'apihelp-query+wikilambda-example-nofallback', |
229 | 'action=query&list=wikilambdasearch_labels&wikilambdasearch_type=Z4&wikilambdasearch_language=en' |
230 | => 'apihelp-query+wikilambda-example-type', |
231 | 'action=query&list=wikilambdasearch_labels&wikilambdasearch_return_type=Z40&wikilambdasearch_language=en' |
232 | => 'apihelp-query+wikilambda-example-return-type', |
233 | 'action=query&list=wikilambdasearch_labels&wikilambdasearch_return_type=Z40' |
234 | . '&wikilambdasearch_strict_return_type=true&wikilambdasearch_language=en' |
235 | => 'apihelp-query+wikilambda-example-strict-return-type', |
236 | ]; |
237 | } |
238 | } |