Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
45.05% |
91 / 202 |
|
22.73% |
5 / 22 |
CRAP | |
0.00% |
0 / 1 |
Util | |
45.05% |
91 / 202 |
|
22.73% |
5 / 22 |
1226.07 | |
0.00% |
0 / 1 |
getNamespaceText | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
recordPoolStats | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
wrapWithPoolStats | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
doPoolCounterWork | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
20 | |||
parsePotentialPercent | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
parseSettingsInMessage | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
overrideYesNo | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
overrideNumeric | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
72 | |||
getDefaultBoostTemplates | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
getOnWikiBoostTemplates | |
90.91% |
20 / 22 |
|
0.00% |
0 / 1 |
5.02 | |||
stripQuestionMarks | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
7 | |||
getExecutionId | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
resetExecutionId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getRequestSetToken | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
6 | |||
generateIdentToken | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
getExecutionContext | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
identifyNamespace | |
87.50% |
21 / 24 |
|
0.00% |
0 / 1 |
8.12 | |||
isEmpty | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
7 | |||
setIfDefined | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
6 | |||
getStatsFactory | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
looksLikeAutomation | |
80.00% |
12 / 15 |
|
0.00% |
0 / 1 |
6.29 | |||
processSearchRawReturn | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | |
3 | namespace CirrusSearch; |
4 | |
5 | use MediaWiki\Context\RequestContext; |
6 | use MediaWiki\Language\Language; |
7 | use MediaWiki\Logger\LoggerFactory; |
8 | use MediaWiki\MediaWikiServices; |
9 | use MediaWiki\PoolCounter\PoolCounterWorkViaCallback; |
10 | use MediaWiki\Request\WebRequest; |
11 | use MediaWiki\Status\Status; |
12 | use MediaWiki\Title\Title; |
13 | use MediaWiki\User\UserIdentity; |
14 | use MediaWiki\WikiMap\WikiMap; |
15 | use Wikimedia\Assert\Assert; |
16 | use Wikimedia\IPUtils; |
17 | use Wikimedia\Stats\StatsFactory; |
18 | |
19 | /** |
20 | * Random utility functions that don't have a better home |
21 | * |
22 | * This program is free software; you can redistribute it and/or modify |
23 | * it under the terms of the GNU General Public License as published by |
24 | * the Free Software Foundation; either version 2 of the License, or |
25 | * (at your option) any later version. |
26 | * |
27 | * This program is distributed in the hope that it will be useful, |
28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
30 | * GNU General Public License for more details. |
31 | * |
32 | * You should have received a copy of the GNU General Public License along |
33 | * with this program; if not, write to the Free Software Foundation, Inc., |
34 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
35 | * http://www.gnu.org/copyleft/gpl.html |
36 | */ |
37 | class Util { |
38 | /** |
39 | * Cache getDefaultBoostTemplates() |
40 | * |
41 | * @var array|null boost templates |
42 | */ |
43 | private static $defaultBoostTemplates = null; |
44 | |
45 | /** |
46 | * @var string|null Id identifying this php execution |
47 | */ |
48 | private static $executionId; |
49 | |
50 | /** |
51 | * Get the textual representation of a namespace with underscores stripped, varying |
52 | * by gender if need be (using Title::getNsText()). |
53 | * |
54 | * @param Title $title The page title to use |
55 | * @return string|false |
56 | */ |
57 | public static function getNamespaceText( Title $title ) { |
58 | $ret = $title->getNsText(); |
59 | return is_string( $ret ) ? strtr( $ret, '_', ' ' ) : $ret; |
60 | } |
61 | |
62 | /** |
63 | * Set label and statsd BC setup for pool counter metrics |
64 | * @param string $type The pool counter type, such as CirrusSearch-Search |
65 | * @param bool $isSuccess If the pool counter gave a success, or failed the request |
66 | * @param float $observation the time it took to update the counter |
67 | * @return void |
68 | */ |
69 | private static function recordPoolStats( string $type, bool $isSuccess, float $observation ): void { |
70 | $pos = strpos( $type, '-' ); |
71 | if ( $pos !== false ) { |
72 | $type = substr( $type, $pos + 1 ); |
73 | } |
74 | $postfix = $isSuccess ? 'successMs' : 'failureMs'; |
75 | self::getStatsFactory() |
76 | ->getTiming( "pool_counter_seconds" ) |
77 | ->setLabel( "type", $type ) |
78 | ->setLabel( "status", $isSuccess ? "success" : "failure" ) |
79 | ->copyToStatsdAt( "CirrusSearch.poolCounter.$type.$postfix" ) |
80 | ->observe( $observation ); |
81 | } |
82 | |
83 | /** |
84 | * @param float $startPoolWork The time this pool request started, from microtime( true ) |
85 | * @param string $type The pool counter type, such as CirrusSearch-Search |
86 | * @param bool $isSuccess If the pool counter gave a success, or failed the request |
87 | * @param callable $callback The function to wrap |
88 | * @return callable The original callback wrapped to collect pool counter stats |
89 | */ |
90 | private static function wrapWithPoolStats( $startPoolWork, |
91 | $type, |
92 | $isSuccess, |
93 | callable $callback |
94 | ) { |
95 | return function () use ( $type, $isSuccess, $callback, $startPoolWork ) { |
96 | self::recordPoolStats( |
97 | $type, |
98 | $isSuccess, |
99 | 1000 * ( microtime( true ) - $startPoolWork ) ); |
100 | |
101 | return $callback( ...func_get_args() ); |
102 | }; |
103 | } |
104 | |
105 | /** |
106 | * Wraps the complex pool counter interface to force the single call pattern |
107 | * that Cirrus always uses. |
108 | * |
109 | * @param string $type same as type parameter on PoolCounter::factory |
110 | * @param UserIdentity|null $user |
111 | * @param callable $workCallback callback when pool counter is acquired. Called with |
112 | * no parameters. |
113 | * @param string|null $busyErrorMsg The i18n key to return when the queue |
114 | * is full, or null to use the default. |
115 | * @return mixed |
116 | */ |
117 | public static function doPoolCounterWork( $type, $user, $workCallback, $busyErrorMsg = null ) { |
118 | global $wgCirrusSearchPoolCounterKey; |
119 | |
120 | // By default the pool counter allows you to lock the same key with |
121 | // multiple types. That might be useful but it isn't how Cirrus thinks. |
122 | // Instead, all keys are scoped to their type. |
123 | |
124 | if ( !$user ) { |
125 | // We don't want to even use the pool counter if there isn't a user. |
126 | // Note that anonymous users are still users, this is most likely |
127 | // maintenance scripts. |
128 | // @todo Maintenenace scripts and jobs should already override |
129 | // poolcounters as necessary, can this be removed? |
130 | return $workCallback(); |
131 | } |
132 | |
133 | $key = "$type:$wgCirrusSearchPoolCounterKey"; |
134 | |
135 | $errorCallback = static function ( Status $status ) use ( $key, $busyErrorMsg ) { |
136 | $error = $status->getMessages()[0]->getKey(); |
137 | |
138 | LoggerFactory::getInstance( 'CirrusSearch' )->warning( |
139 | "Pool error on {key}: {error}", |
140 | [ 'key' => $key, 'error' => $error ] |
141 | ); |
142 | if ( $error === 'pool-queuefull' ) { |
143 | return Status::newFatal( $busyErrorMsg ?: 'cirrussearch-too-busy-error' ); |
144 | } |
145 | return Status::newFatal( 'cirrussearch-backend-error' ); |
146 | }; |
147 | |
148 | // wrap some stats collection on the success/failure handlers |
149 | $startPoolWork = microtime( true ); |
150 | $workCallback = self::wrapWithPoolStats( $startPoolWork, $type, true, $workCallback ); |
151 | $errorCallback = self::wrapWithPoolStats( $startPoolWork, $type, false, $errorCallback ); |
152 | |
153 | $work = new PoolCounterWorkViaCallback( $type, $key, [ |
154 | 'doWork' => $workCallback, |
155 | 'error' => $errorCallback, |
156 | ] ); |
157 | return $work->execute(); |
158 | } |
159 | |
160 | /** |
161 | * @param string $str |
162 | * @return float |
163 | */ |
164 | public static function parsePotentialPercent( $str ) { |
165 | $result = floatval( $str ); |
166 | if ( strpos( $str, '%' ) === false ) { |
167 | return $result; |
168 | } |
169 | return $result / 100; |
170 | } |
171 | |
172 | /** |
173 | * Parse a message content into an array. This function is generally used to |
174 | * parse settings stored as i18n messages (see cirrussearch-boost-templates). |
175 | * |
176 | * @param string $message |
177 | * @return string[] |
178 | */ |
179 | public static function parseSettingsInMessage( $message ) { |
180 | $lines = explode( "\n", $message ); |
181 | $lines = preg_replace( '/#.*$/', '', $lines ); // Remove comments |
182 | $lines = array_map( 'trim', $lines ); // Remove extra spaces |
183 | $lines = array_filter( $lines ); // Remove empty lines |
184 | return $lines; |
185 | } |
186 | |
187 | /** |
188 | * Set $dest to the true/false from $request->getVal( $name ) if yes/no. |
189 | * |
190 | * @param mixed &$dest |
191 | * @param WebRequest $request |
192 | * @param string $name |
193 | */ |
194 | public static function overrideYesNo( &$dest, $request, $name ) { |
195 | $val = $request->getVal( $name ); |
196 | if ( $val !== null ) { |
197 | $dest = wfStringToBool( $val ); |
198 | } |
199 | } |
200 | |
201 | /** |
202 | * Set $dest to the numeric value from $request->getVal( $name ) if it is <= $limit |
203 | * or => $limit if upperLimit is false. |
204 | * |
205 | * @param mixed &$dest |
206 | * @param WebRequest $request |
207 | * @param string $name |
208 | * @param int|null $limit |
209 | * @param bool $upperLimit |
210 | */ |
211 | public static function overrideNumeric( &$dest, $request, $name, $limit = null, $upperLimit = true ) { |
212 | $val = $request->getVal( $name ); |
213 | if ( $val !== null && is_numeric( $val ) ) { |
214 | if ( $limit === null ) { |
215 | $dest = $val; |
216 | } elseif ( $upperLimit && $val <= $limit ) { |
217 | $dest = $val; |
218 | } elseif ( !$upperLimit && $val >= $limit ) { |
219 | $dest = $val; |
220 | } |
221 | } |
222 | } |
223 | |
224 | /** |
225 | * Get boost templates configured in messages. |
226 | * @param SearchConfig|null $config Search config requesting the templates |
227 | * @return float[] |
228 | */ |
229 | public static function getDefaultBoostTemplates( ?SearchConfig $config = null ) { |
230 | $config ??= MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'CirrusSearch' ); |
231 | |
232 | $fromConfig = $config->get( 'CirrusSearchBoostTemplates' ); |
233 | if ( $config->get( 'CirrusSearchIgnoreOnWikiBoostTemplates' ) ) { |
234 | // on wiki messages disabled, we can return this config |
235 | // directly |
236 | return $fromConfig; |
237 | } |
238 | |
239 | $fromMessage = self::getOnWikiBoostTemplates( $config ); |
240 | if ( !$fromMessage ) { |
241 | // the onwiki config is empty (or unknown for non-local |
242 | // config), we can fallback to templates from config |
243 | return $fromConfig; |
244 | } |
245 | return $fromMessage; |
246 | } |
247 | |
248 | /** |
249 | * Load and cache boost templates configured on wiki via the system |
250 | * message 'cirrussearch-boost-templates'. |
251 | * If called from the local wiki the message will be cached. |
252 | * If called from a non local wiki an attempt to fetch this data from the cache is made. |
253 | * If an empty array is returned it means that no config is available on wiki |
254 | * or the value possibly unknown if run from a non local wiki. |
255 | * |
256 | * @param SearchConfig $config |
257 | * @return float[] indexed by template name |
258 | */ |
259 | private static function getOnWikiBoostTemplates( SearchConfig $config ) { |
260 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
261 | $cacheKey = $cache->makeGlobalKey( 'cirrussearch-boost-templates', $config->getWikiId() ); |
262 | if ( $config->getWikiId() == WikiMap::getCurrentWikiId() ) { |
263 | // Local wiki we can fetch boost templates from system |
264 | // message |
265 | if ( self::$defaultBoostTemplates !== null ) { |
266 | // This static cache is never set with non-local |
267 | // wiki data. |
268 | return self::$defaultBoostTemplates; |
269 | } |
270 | |
271 | $templates = $cache->getWithSetCallback( |
272 | $cacheKey, |
273 | 600, |
274 | static function () { |
275 | $source = wfMessage( 'cirrussearch-boost-templates' )->inContentLanguage(); |
276 | if ( !$source->isDisabled() ) { |
277 | $lines = Util::parseSettingsInMessage( $source->plain() ); |
278 | // Now parse the templates |
279 | return Query\BoostTemplatesFeature::parseBoostTemplates( implode( ' ', $lines ) ); |
280 | } |
281 | return []; |
282 | } |
283 | ); |
284 | self::$defaultBoostTemplates = $templates; |
285 | return $templates; |
286 | } |
287 | // Here we're dealing with boost template from other wiki, try to fetch it if it exists |
288 | // otherwise, don't bother. |
289 | $nonLocalCache = $cache->get( $cacheKey ); |
290 | if ( !is_array( $nonLocalCache ) ) { |
291 | // not yet in cache, value is unknown |
292 | // return empty array |
293 | return []; |
294 | } |
295 | return $nonLocalCache; |
296 | } |
297 | |
298 | /** |
299 | * Strip question marks from queries, according to the defined stripping |
300 | * level, defined by $wgCirrusSearchStripQuestionMarks. Strip all ?s, those |
301 | * at word breaks, or only string-final. Ignore queries that are all |
302 | * punctuation or use insource. Don't remove escaped \?s, but unescape them. |
303 | * |
304 | * @param string $term |
305 | * @param string $strippingLevel Either "all", "break", or "final" |
306 | * @return string modified term, based on strippingLevel |
307 | */ |
308 | public static function stripQuestionMarks( $term, $strippingLevel ) { |
309 | if ( strpos( $term, 'insource:/' ) === false && |
310 | strpos( $term, 'intitle:/' ) === false && |
311 | !preg_match( '/^[\p{P}\p{Z}]+$/u', $term ) |
312 | ) { |
313 | // FIXME: get rid of negative lookbehinds on (?<!\\\\) |
314 | // it may improperly transform \\? into \? instead of \\ and destroy properly escaped \ |
315 | if ( $strippingLevel === 'final' ) { |
316 | // strip only query-final question marks that are not escaped |
317 | $term = preg_replace( "/((?<!\\\\)\?|\s)+$/", '', $term ); |
318 | $term = preg_replace( '/\\\\\?/', '?', $term ); |
319 | } elseif ( $strippingLevel === 'break' ) { |
320 | // strip question marks at word boundaries |
321 | $term = preg_replace( '/(?<!\\\\)\?+(\PL|$)/', '$1', $term ); |
322 | $term = preg_replace( '/\\\\\?/', '?', $term ); |
323 | } elseif ( $strippingLevel === 'all' ) { |
324 | // strip all unescaped question marks |
325 | $term = preg_replace( '/(?<!\\\\)\?+/', ' ', $term ); |
326 | $term = preg_replace( '/\\\\\?/', '?', $term ); |
327 | } |
328 | } |
329 | return $term; |
330 | } |
331 | |
332 | /** |
333 | * Identifies a specific execution of php. That might be one web |
334 | * request, or multiple jobs run in the same executor. An execution id |
335 | * is valid over a brief timespan, perhaps a minute or two for some jobs. |
336 | * |
337 | * @return string unique identifier |
338 | */ |
339 | public static function getExecutionId() { |
340 | if ( self::$executionId === null ) { |
341 | self::$executionId = (string)mt_rand(); |
342 | } |
343 | return self::$executionId; |
344 | } |
345 | |
346 | /** |
347 | * Unit tests only |
348 | */ |
349 | public static function resetExecutionId() { |
350 | self::$executionId = null; |
351 | } |
352 | |
353 | /** |
354 | * Get a token that (hopefully) uniquely identifies this search. It will be |
355 | * added to the search result page js config vars, and put into the url with |
356 | * history.replaceState(). This means click through's from supported browsers |
357 | * will record this token as part of the referrer. |
358 | * |
359 | * @return string |
360 | */ |
361 | public static function getRequestSetToken() { |
362 | static $token; |
363 | if ( $token === null ) { |
364 | // random UID, 70B tokens have a collision probability of 4*10^-16 |
365 | // so should work for marking unique queries. |
366 | $gen = MediaWikiServices::getInstance()->getGlobalIdGenerator(); |
367 | $uuid = $gen->newUUIDv4(); |
368 | // make it a little shorter by using straight base36 |
369 | $hex = substr( $uuid, 0, 8 ) . substr( $uuid, 9, 4 ) . |
370 | substr( $uuid, 14, 4 ) . substr( $uuid, 19, 4 ) . |
371 | substr( $uuid, 24 ); |
372 | $token = \Wikimedia\base_convert( $hex, 16, 36 ); |
373 | } |
374 | return $token; |
375 | } |
376 | |
377 | /** |
378 | * @param string $extraData Extra information to mix into the hash |
379 | * @return string A token that identifies the source of the request |
380 | */ |
381 | public static function generateIdentToken( $extraData = '' ) { |
382 | $request = RequestContext::getMain()->getRequest(); |
383 | try { |
384 | $ip = $request->getIP(); |
385 | } catch ( \MWException $e ) { |
386 | // No ip, probably running cli? |
387 | $ip = 'unknown'; |
388 | } |
389 | return md5( implode( ':', [ |
390 | $extraData, |
391 | $ip, |
392 | $request->getHeader( 'X-Forwarded-For' ), |
393 | $request->getHeader( 'User-Agent' ), |
394 | ] ) ); |
395 | } |
396 | |
397 | /** |
398 | * @return string The context the request is in. Either cli, api, web or misc. |
399 | */ |
400 | public static function getExecutionContext() { |
401 | if ( PHP_SAPI === 'cli' ) { |
402 | return 'cli'; |
403 | } elseif ( MW_ENTRY_POINT == 'api' ) { |
404 | return 'api'; |
405 | } elseif ( MW_ENTRY_POINT == 'index' ) { |
406 | return 'web'; |
407 | } else { |
408 | return 'misc'; |
409 | } |
410 | } |
411 | |
412 | /** |
413 | * Identify a namespace by attempting some unicode folding techniques. |
414 | * 2 methods supported: |
415 | * - naive: case folding + naive accents removal (only some combined accents are removed) |
416 | * - utr30: (slow to load) case folding + strong accent squashing based on the withdrawn UTR30 specs |
417 | * all methods will apply something similar to near space flattener. |
418 | * @param string $namespace name of the namespace to identify |
419 | * @param string $method either naive or utr30 |
420 | * @param Language|null $language |
421 | * @return bool|int |
422 | */ |
423 | public static function identifyNamespace( $namespace, $method = 'naive', ?Language $language = null ) { |
424 | static $naive = null; |
425 | static $utr30 = null; |
426 | |
427 | $normalizer = null; |
428 | if ( $method === 'naive' ) { |
429 | if ( $naive === null ) { |
430 | $naive = \Transliterator::createFromRules( |
431 | '::NFD;::Upper;::Lower;::[:Nonspacing Mark:] Remove;::NFC;[\_\-\'\u2019\u02BC]>\u0020;' |
432 | ); |
433 | } |
434 | $normalizer = $naive; |
435 | } elseif ( $method === 'utr30' ) { |
436 | if ( $utr30 === null ) { |
437 | $utr30 = |
438 | $normalizer = \Transliterator::createFromRules( file_get_contents( __DIR__ . '/../data/utr30.txt' ) ); |
439 | } |
440 | $normalizer = $utr30; |
441 | } |
442 | |
443 | Assert::postcondition( $normalizer !== null, |
444 | 'Failed to load Transliterator with method ' . $method ); |
445 | $namespace = $normalizer->transliterate( $namespace ); |
446 | if ( $namespace === '' ) { |
447 | return false; |
448 | } |
449 | $language ??= MediaWikiServices::getInstance()->getContentLanguage(); |
450 | foreach ( $language->getNamespaceIds() as $candidate => $nsId ) { |
451 | if ( $normalizer->transliterate( $candidate ) === $namespace ) { |
452 | return $nsId; |
453 | } |
454 | } |
455 | |
456 | return false; |
457 | } |
458 | |
459 | /** |
460 | * Helper for PHP's annoying emptiness check. |
461 | * empty(0) should not be true! |
462 | * empty(false) should not be true! |
463 | * Empty arrays, strings, and nulls/undefined count as empty. |
464 | * |
465 | * False otherwise. |
466 | * @param mixed $v |
467 | * @return bool |
468 | */ |
469 | public static function isEmpty( $v ) { |
470 | return ( is_array( $v ) && count( $v ) === 0 ) || |
471 | ( is_object( $v ) && count( (array)$v ) === 0 ) || |
472 | ( is_string( $v ) && strlen( $v ) === 0 ) || |
473 | ( $v === null ); |
474 | } |
475 | |
476 | /** |
477 | * Helper function to conditionally set a key in a dest array only if it |
478 | * is defined in a source array. This is just to help DRY up what would |
479 | * otherwise could be a long series of |
480 | * if ( isset($sourceArray[$key] )) { $destArray[$key] = $sourceArray[$key] } |
481 | * statements. This also supports using a different key in the dest array, |
482 | * as well as mapping the value when assigning to $sourceArray. |
483 | * |
484 | * Usage: |
485 | * $arr1 = ['KEY1' => '123']; |
486 | * $arr2 = []; |
487 | * |
488 | * setIfDefined($arr1, 'KEY1', $arr2, 'key1', 'intval'); |
489 | * // $arr2['key1'] is now set to 123 (integer value) |
490 | * |
491 | * setIfDefined($arr1, 'KEY2', $arr2); |
492 | * // $arr2 stays the same, because $arr1 does not have 'KEY2' defined. |
493 | * |
494 | * @param array $sourceArray the array from which to look for $sourceKey |
495 | * @param string $sourceKey the key to look for in $sourceArray |
496 | * @param array &$destArray by reference destination array in which to set value if defined |
497 | * @param string|null $destKey optional, key to use instead of $sourceKey in $destArray. |
498 | * @param callable|null $mapFn optional, If set, this will be called on the value before setting it. |
499 | * @param bool $checkEmpty If false, emptyiness of result after $mapFn is called will not be |
500 | * checked before setting on $destArray. If true, it will, using Util::isEmpty. |
501 | * Default: true |
502 | * @return array |
503 | */ |
504 | public static function setIfDefined( |
505 | array $sourceArray, |
506 | $sourceKey, |
507 | array &$destArray, |
508 | $destKey = null, |
509 | $mapFn = null, |
510 | $checkEmpty = true |
511 | ) { |
512 | if ( array_key_exists( $sourceKey, $sourceArray ) ) { |
513 | $val = $sourceArray[$sourceKey]; |
514 | if ( $mapFn !== null ) { |
515 | $val = $mapFn( $val ); |
516 | } |
517 | // Only set in $destArray if we are not checking emptiness, |
518 | // or if we are and the $val is not empty. |
519 | if ( !$checkEmpty || !self::isEmpty( $val ) ) { |
520 | $key = $destKey ?: $sourceKey; |
521 | $destArray[$key] = $val; |
522 | } |
523 | } |
524 | return $destArray; |
525 | } |
526 | |
527 | /** |
528 | * @return StatsFactory prefixed with the "CirrusSearch" component |
529 | */ |
530 | public static function getStatsFactory(): StatsFactory { |
531 | return MediaWikiServices::getInstance()->getStatsFactory()->withComponent( "CirrusSearch" ); |
532 | } |
533 | |
534 | /** |
535 | * @param SearchConfig $config Configuration of the check |
536 | * @param string $ip The address to check against, ipv4 or ipv6. |
537 | * @param string[] $headers Map from http header name to value. All names must be uppercased. |
538 | * @return bool True when the parameters appear to be a non-interactive use case. |
539 | */ |
540 | public static function looksLikeAutomation( SearchConfig $config, string $ip, array $headers ): bool { |
541 | // Is there an http header that can be matched with regex to flag automation, |
542 | // such as the user-agent or a flag applied by some infrastructure? |
543 | $automationHeaders = $config->get( 'CirrusSearchAutomationHeaderRegexes' ) ?? []; |
544 | foreach ( $automationHeaders as $name => $pattern ) { |
545 | $name = strtoupper( $name ); |
546 | if ( !isset( $headers[$name] ) ) { |
547 | continue; |
548 | } |
549 | $ret = preg_match( $pattern, $headers[$name] ); |
550 | if ( $ret === 1 ) { |
551 | return true; |
552 | } elseif ( $ret === false ) { |
553 | LoggerFactory::getInstance( 'CirrusSearch' )->warning( |
554 | "Invalid regex provided for header `$name` in `CirrusSearchAutomationHeaderRegexes`." ); |
555 | } |
556 | } |
557 | |
558 | // Does the ip address fall into a subnet known for automation? |
559 | $ranges = $config->get( 'CirrusSearchAutomationCIDRs' ); |
560 | if ( IPUtils::isInRanges( $ip, $ranges ) ) { |
561 | return true; |
562 | } |
563 | |
564 | // Default assumption that requests are interactive |
565 | return false; |
566 | } |
567 | |
568 | /** |
569 | * If we're supposed to create raw result, create and return it, |
570 | * or output it and finish. |
571 | * @param mixed $result Search result data |
572 | * @param WebRequest $request Request context |
573 | * @param CirrusDebugOptions $debugOptions |
574 | * @return string The new raw result. |
575 | */ |
576 | public static function processSearchRawReturn( $result, WebRequest $request, |
577 | CirrusDebugOptions $debugOptions ) { |
578 | $output = null; |
579 | $header = null; |
580 | if ( $debugOptions->getCirrusExplainFormat() !== null ) { |
581 | $header = 'Content-type: text/html; charset=UTF-8'; |
582 | $printer = new ExplainPrinter( $debugOptions->getCirrusExplainFormat() ); |
583 | $output = $printer->format( $result ); |
584 | } |
585 | |
586 | // This should always be true, except in the case of the test suite which wants the actual |
587 | // objects returned. |
588 | if ( $debugOptions->isDumpAndDie() ) { |
589 | if ( $output === null ) { |
590 | $header = 'Content-type: application/json; charset=UTF-8'; |
591 | if ( $result === null ) { |
592 | $output = '{}'; |
593 | } else { |
594 | $output = json_encode( $result, JSON_PRETTY_PRINT ); |
595 | } |
596 | } |
597 | |
598 | // When dumping the query we skip _everything_ but echoing the query. |
599 | RequestContext::getMain()->getOutput()->disable(); |
600 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable $header can't be null here |
601 | $request->response()->header( $header ); |
602 | echo $output; |
603 | exit(); |
604 | } |
605 | |
606 | return $result; |
607 | } |
608 | } |