Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
44.61% |
91 / 204 |
|
21.74% |
5 / 23 |
CRAP | |
0.00% |
0 / 1 |
Util | |
44.61% |
91 / 204 |
|
21.74% |
5 / 23 |
1312.96 | |
0.00% |
0 / 1 |
getNamespaceText | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
getPoolStatsKey | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
wrapWithPoolStats | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
doPoolCounterWork | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
20 | |||
parsePotentialPercent | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
parseSettingsInMessage | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
endsWith | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
overrideYesNo | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
overrideNumeric | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
72 | |||
getDefaultBoostTemplates | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
getOnWikiBoostTemplates | |
90.91% |
20 / 22 |
|
0.00% |
0 / 1 |
5.02 | |||
stripQuestionMarks | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
7 | |||
getExecutionId | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
resetExecutionId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getRequestSetToken | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
generateIdentToken | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
getExecutionContext | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
identifyNamespace | |
87.50% |
21 / 24 |
|
0.00% |
0 / 1 |
8.12 | |||
isEmpty | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
7 | |||
setIfDefined | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
6 | |||
getStatsDataFactory | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
looksLikeAutomation | |
80.00% |
12 / 15 |
|
0.00% |
0 / 1 |
6.29 | |||
processSearchRawReturn | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | |
3 | namespace CirrusSearch; |
4 | |
5 | use IBufferingStatsdDataFactory; |
6 | use MediaWiki\Logger\LoggerFactory; |
7 | use MediaWiki\MediaWikiServices; |
8 | use MediaWiki\Request\WebRequest; |
9 | use MediaWiki\Status\Status; |
10 | use MediaWiki\Title\Title; |
11 | use MediaWiki\User\UserIdentity; |
12 | use MediaWiki\WikiMap\WikiMap; |
13 | use NullStatsdDataFactory; |
14 | use PoolCounterWorkViaCallback; |
15 | use UIDGenerator; |
16 | use Wikimedia\Assert\Assert; |
17 | use Wikimedia\IPUtils; |
18 | |
19 | /** |
20 | * Random utility functions that don't have a better home |
21 | * |
22 | * This program is free software; you can redistribute it and/or modify |
23 | * it under the terms of the GNU General Public License as published by |
24 | * the Free Software Foundation; either version 2 of the License, or |
25 | * (at your option) any later version. |
26 | * |
27 | * This program is distributed in the hope that it will be useful, |
28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
30 | * GNU General Public License for more details. |
31 | * |
32 | * You should have received a copy of the GNU General Public License along |
33 | * with this program; if not, write to the Free Software Foundation, Inc., |
34 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
35 | * http://www.gnu.org/copyleft/gpl.html |
36 | */ |
37 | class Util { |
38 | /** |
39 | * Cache getDefaultBoostTemplates() |
40 | * |
41 | * @var array|null boost templates |
42 | */ |
43 | private static $defaultBoostTemplates = null; |
44 | |
45 | /** |
46 | * @var string|null Id identifying this php execution |
47 | */ |
48 | private static $executionId; |
49 | |
50 | /** |
51 | * Get the textual representation of a namespace with underscores stripped, varying |
52 | * by gender if need be (using Title::getNsText()). |
53 | * |
54 | * @param Title $title The page title to use |
55 | * @return string|false |
56 | */ |
57 | public static function getNamespaceText( Title $title ) { |
58 | $ret = $title->getNsText(); |
59 | return is_string( $ret ) ? strtr( $ret, '_', ' ' ) : $ret; |
60 | } |
61 | |
62 | /** |
63 | * @param string $type The pool counter type, such as CirrusSearch-Search |
64 | * @param bool $isSuccess If the pool counter gave a success, or failed the request |
65 | * @return string The key used for collecting timing stats about this pool counter request |
66 | */ |
67 | private static function getPoolStatsKey( $type, $isSuccess ) { |
68 | $pos = strpos( $type, '-' ); |
69 | if ( $pos !== false ) { |
70 | $type = substr( $type, $pos + 1 ); |
71 | } |
72 | $postfix = $isSuccess ? 'successMs' : 'failureMs'; |
73 | return "CirrusSearch.poolCounter.$type.$postfix"; |
74 | } |
75 | |
76 | /** |
77 | * @param float $startPoolWork The time this pool request started, from microtime( true ) |
78 | * @param string $type The pool counter type, such as CirrusSearch-Search |
79 | * @param bool $isSuccess If the pool counter gave a success, or failed the request |
80 | * @param callable $callback The function to wrap |
81 | * @return callable The original callback wrapped to collect pool counter stats |
82 | */ |
83 | private static function wrapWithPoolStats( $startPoolWork, |
84 | $type, |
85 | $isSuccess, |
86 | callable $callback |
87 | ) { |
88 | return function () use ( $type, $isSuccess, $callback, $startPoolWork ) { |
89 | MediaWikiServices::getInstance()->getStatsdDataFactory()->timing( |
90 | self::getPoolStatsKey( $type, $isSuccess ), |
91 | intval( 1000 * ( microtime( true ) - $startPoolWork ) ) |
92 | ); |
93 | |
94 | return $callback( ...func_get_args() ); |
95 | }; |
96 | } |
97 | |
98 | /** |
99 | * Wraps the complex pool counter interface to force the single call pattern |
100 | * that Cirrus always uses. |
101 | * |
102 | * @param string $type same as type parameter on PoolCounter::factory |
103 | * @param UserIdentity|null $user |
104 | * @param callable $workCallback callback when pool counter is acquired. Called with |
105 | * no parameters. |
106 | * @param string|null $busyErrorMsg The i18n key to return when the queue |
107 | * is full, or null to use the default. |
108 | * @return mixed |
109 | */ |
110 | public static function doPoolCounterWork( $type, $user, $workCallback, $busyErrorMsg = null ) { |
111 | global $wgCirrusSearchPoolCounterKey; |
112 | |
113 | // By default the pool counter allows you to lock the same key with |
114 | // multiple types. That might be useful but it isn't how Cirrus thinks. |
115 | // Instead, all keys are scoped to their type. |
116 | |
117 | if ( !$user ) { |
118 | // We don't want to even use the pool counter if there isn't a user. |
119 | // Note that anonymous users are still users, this is most likely |
120 | // maintenance scripts. |
121 | // @todo Maintenenace scripts and jobs should already override |
122 | // poolcounters as necessary, can this be removed? |
123 | return $workCallback(); |
124 | } |
125 | |
126 | $key = "$type:$wgCirrusSearchPoolCounterKey"; |
127 | |
128 | $errorCallback = static function ( Status $status ) use ( $key, $busyErrorMsg ) { |
129 | /** @todo No good replacements for getErrorsArray */ |
130 | $errors = $status->getErrorsArray(); |
131 | $error = $errors[0][0]; |
132 | |
133 | LoggerFactory::getInstance( 'CirrusSearch' )->warning( |
134 | "Pool error on {key}: {error}", |
135 | [ 'key' => $key, 'error' => $error ] |
136 | ); |
137 | if ( $error === 'pool-queuefull' ) { |
138 | return Status::newFatal( $busyErrorMsg ?: 'cirrussearch-too-busy-error' ); |
139 | } |
140 | return Status::newFatal( 'cirrussearch-backend-error' ); |
141 | }; |
142 | |
143 | // wrap some stats collection on the success/failure handlers |
144 | $startPoolWork = microtime( true ); |
145 | $workCallback = self::wrapWithPoolStats( $startPoolWork, $type, true, $workCallback ); |
146 | $errorCallback = self::wrapWithPoolStats( $startPoolWork, $type, false, $errorCallback ); |
147 | |
148 | $work = new PoolCounterWorkViaCallback( $type, $key, [ |
149 | 'doWork' => $workCallback, |
150 | 'error' => $errorCallback, |
151 | ] ); |
152 | return $work->execute(); |
153 | } |
154 | |
155 | /** |
156 | * @param string $str |
157 | * @return float |
158 | */ |
159 | public static function parsePotentialPercent( $str ) { |
160 | $result = floatval( $str ); |
161 | if ( strpos( $str, '%' ) === false ) { |
162 | return $result; |
163 | } |
164 | return $result / 100; |
165 | } |
166 | |
167 | /** |
168 | * Parse a message content into an array. This function is generally used to |
169 | * parse settings stored as i18n messages (see cirrussearch-boost-templates). |
170 | * |
171 | * @param string $message |
172 | * @return string[] |
173 | */ |
174 | public static function parseSettingsInMessage( $message ) { |
175 | $lines = explode( "\n", $message ); |
176 | $lines = preg_replace( '/#.*$/', '', $lines ); // Remove comments |
177 | $lines = array_map( 'trim', $lines ); // Remove extra spaces |
178 | $lines = array_filter( $lines ); // Remove empty lines |
179 | return $lines; |
180 | } |
181 | |
182 | /** |
183 | * Test if $string ends with $suffix |
184 | * |
185 | * @param string $string string to test |
186 | * @param string $suffix |
187 | * @return bool true if $string ends with $suffix |
188 | */ |
189 | public static function endsWith( $string, $suffix ) { |
190 | $strlen = strlen( $string ); |
191 | $suffixlen = strlen( $suffix ); |
192 | if ( $suffixlen > $strlen ) { |
193 | return false; |
194 | } |
195 | return substr_compare( $string, $suffix, $strlen - $suffixlen, $suffixlen ) === 0; |
196 | } |
197 | |
198 | /** |
199 | * Set $dest to the true/false from $request->getVal( $name ) if yes/no. |
200 | * |
201 | * @param mixed &$dest |
202 | * @param WebRequest $request |
203 | * @param string $name |
204 | */ |
205 | public static function overrideYesNo( &$dest, $request, $name ) { |
206 | $val = $request->getVal( $name ); |
207 | if ( $val !== null ) { |
208 | $dest = wfStringToBool( $val ); |
209 | } |
210 | } |
211 | |
212 | /** |
213 | * Set $dest to the numeric value from $request->getVal( $name ) if it is <= $limit |
214 | * or => $limit if upperLimit is false. |
215 | * |
216 | * @param mixed &$dest |
217 | * @param WebRequest $request |
218 | * @param string $name |
219 | * @param int|null $limit |
220 | * @param bool $upperLimit |
221 | */ |
222 | public static function overrideNumeric( &$dest, $request, $name, $limit = null, $upperLimit = true ) { |
223 | $val = $request->getVal( $name ); |
224 | if ( $val !== null && is_numeric( $val ) ) { |
225 | if ( !isset( $limit ) ) { |
226 | $dest = $val; |
227 | } elseif ( $upperLimit && $val <= $limit ) { |
228 | $dest = $val; |
229 | } elseif ( !$upperLimit && $val >= $limit ) { |
230 | $dest = $val; |
231 | } |
232 | } |
233 | } |
234 | |
235 | /** |
236 | * Get boost templates configured in messages. |
237 | * @param SearchConfig|null $config Search config requesting the templates |
238 | * @return float[] |
239 | */ |
240 | public static function getDefaultBoostTemplates( SearchConfig $config = null ) { |
241 | $config ??= MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'CirrusSearch' ); |
242 | |
243 | $fromConfig = $config->get( 'CirrusSearchBoostTemplates' ); |
244 | if ( $config->get( 'CirrusSearchIgnoreOnWikiBoostTemplates' ) ) { |
245 | // on wiki messages disabled, we can return this config |
246 | // directly |
247 | return $fromConfig; |
248 | } |
249 | |
250 | $fromMessage = self::getOnWikiBoostTemplates( $config ); |
251 | if ( !$fromMessage ) { |
252 | // the onwiki config is empty (or unknown for non-local |
253 | // config), we can fallback to templates from config |
254 | return $fromConfig; |
255 | } |
256 | return $fromMessage; |
257 | } |
258 | |
259 | /** |
260 | * Load and cache boost templates configured on wiki via the system |
261 | * message 'cirrussearch-boost-templates'. |
262 | * If called from the local wiki the message will be cached. |
263 | * If called from a non local wiki an attempt to fetch this data from the cache is made. |
264 | * If an empty array is returned it means that no config is available on wiki |
265 | * or the value possibly unknown if run from a non local wiki. |
266 | * |
267 | * @param SearchConfig $config |
268 | * @return float[] indexed by template name |
269 | */ |
270 | private static function getOnWikiBoostTemplates( SearchConfig $config ) { |
271 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
272 | $cacheKey = $cache->makeGlobalKey( 'cirrussearch-boost-templates', $config->getWikiId() ); |
273 | if ( $config->getWikiId() == WikiMap::getCurrentWikiId() ) { |
274 | // Local wiki we can fetch boost templates from system |
275 | // message |
276 | if ( self::$defaultBoostTemplates !== null ) { |
277 | // This static cache is never set with non-local |
278 | // wiki data. |
279 | return self::$defaultBoostTemplates; |
280 | } |
281 | |
282 | $templates = $cache->getWithSetCallback( |
283 | $cacheKey, |
284 | 600, |
285 | static function () { |
286 | $source = wfMessage( 'cirrussearch-boost-templates' )->inContentLanguage(); |
287 | if ( !$source->isDisabled() ) { |
288 | $lines = Util::parseSettingsInMessage( $source->plain() ); |
289 | // Now parse the templates |
290 | return Query\BoostTemplatesFeature::parseBoostTemplates( implode( ' ', $lines ) ); |
291 | } |
292 | return []; |
293 | } |
294 | ); |
295 | self::$defaultBoostTemplates = $templates; |
296 | return $templates; |
297 | } |
298 | // Here we're dealing with boost template from other wiki, try to fetch it if it exists |
299 | // otherwise, don't bother. |
300 | $nonLocalCache = $cache->get( $cacheKey ); |
301 | if ( !is_array( $nonLocalCache ) ) { |
302 | // not yet in cache, value is unknown |
303 | // return empty array |
304 | return []; |
305 | } |
306 | return $nonLocalCache; |
307 | } |
308 | |
309 | /** |
310 | * Strip question marks from queries, according to the defined stripping |
311 | * level, defined by $wgCirrusSearchStripQuestionMarks. Strip all ?s, those |
312 | * at word breaks, or only string-final. Ignore queries that are all |
313 | * punctuation or use insource. Don't remove escaped \?s, but unescape them. |
314 | * |
315 | * @param string $term |
316 | * @param string $strippingLevel Either "all", "break", or "final" |
317 | * @return string modified term, based on strippingLevel |
318 | */ |
319 | public static function stripQuestionMarks( $term, $strippingLevel ) { |
320 | if ( strpos( $term, 'insource:/' ) === false && |
321 | strpos( $term, 'intitle:/' ) === false && |
322 | !preg_match( '/^[\p{P}\p{Z}]+$/u', $term ) |
323 | ) { |
324 | // FIXME: get rid of negative lookbehinds on (?<!\\\\) |
325 | // it may improperly transform \\? into \? instead of \\ and destroy properly escaped \ |
326 | if ( $strippingLevel === 'final' ) { |
327 | // strip only query-final question marks that are not escaped |
328 | $term = preg_replace( "/((?<!\\\\)\?|\s)+$/", '', $term ); |
329 | $term = preg_replace( '/\\\\\?/', '?', $term ); |
330 | } elseif ( $strippingLevel === 'break' ) { |
331 | // strip question marks at word boundaries |
332 | $term = preg_replace( '/(?<!\\\\)\?+(\PL|$)/', '$1', $term ); |
333 | $term = preg_replace( '/\\\\\?/', '?', $term ); |
334 | } elseif ( $strippingLevel === 'all' ) { |
335 | // strip all unescaped question marks |
336 | $term = preg_replace( '/(?<!\\\\)\?+/', ' ', $term ); |
337 | $term = preg_replace( '/\\\\\?/', '?', $term ); |
338 | } |
339 | } |
340 | return $term; |
341 | } |
342 | |
343 | /** |
344 | * Identifies a specific execution of php. That might be one web |
345 | * request, or multiple jobs run in the same executor. An execution id |
346 | * is valid over a brief timespan, perhaps a minute or two for some jobs. |
347 | * |
348 | * @return string unique identifier |
349 | */ |
350 | public static function getExecutionId() { |
351 | if ( self::$executionId === null ) { |
352 | self::$executionId = (string)mt_rand(); |
353 | } |
354 | return self::$executionId; |
355 | } |
356 | |
357 | /** |
358 | * Unit tests only |
359 | */ |
360 | public static function resetExecutionId() { |
361 | self::$executionId = null; |
362 | } |
363 | |
364 | /** |
365 | * Get a token that (hopefully) uniquely identifies this search. It will be |
366 | * added to the search result page js config vars, and put into the url with |
367 | * history.replaceState(). This means click through's from supported browsers |
368 | * will record this token as part of the referrer. |
369 | * |
370 | * @return string |
371 | */ |
372 | public static function getRequestSetToken() { |
373 | static $token; |
374 | if ( $token === null ) { |
375 | // random UID, 70B tokens have a collision probability of 4*10^-16 |
376 | // so should work for marking unique queries. |
377 | $uuid = UIDGenerator::newUUIDv4(); |
378 | // make it a little shorter by using straight base36 |
379 | $hex = substr( $uuid, 0, 8 ) . substr( $uuid, 9, 4 ) . |
380 | substr( $uuid, 14, 4 ) . substr( $uuid, 19, 4 ) . |
381 | substr( $uuid, 24 ); |
382 | $token = \Wikimedia\base_convert( $hex, 16, 36 ); |
383 | } |
384 | return $token; |
385 | } |
386 | |
387 | /** |
388 | * @param string $extraData Extra information to mix into the hash |
389 | * @return string A token that identifies the source of the request |
390 | */ |
391 | public static function generateIdentToken( $extraData = '' ) { |
392 | $request = \RequestContext::getMain()->getRequest(); |
393 | try { |
394 | $ip = $request->getIP(); |
395 | } catch ( \MWException $e ) { |
396 | // No ip, probably running cli? |
397 | $ip = 'unknown'; |
398 | } |
399 | return md5( implode( ':', [ |
400 | $extraData, |
401 | $ip, |
402 | $request->getHeader( 'X-Forwarded-For' ), |
403 | $request->getHeader( 'User-Agent' ), |
404 | ] ) ); |
405 | } |
406 | |
407 | /** |
408 | * @return string The context the request is in. Either cli, api, web or misc. |
409 | */ |
410 | public static function getExecutionContext() { |
411 | if ( PHP_SAPI === 'cli' ) { |
412 | return 'cli'; |
413 | } elseif ( MW_ENTRY_POINT == 'api' ) { |
414 | return 'api'; |
415 | } elseif ( MW_ENTRY_POINT == 'index' ) { |
416 | return 'web'; |
417 | } else { |
418 | return 'misc'; |
419 | } |
420 | } |
421 | |
422 | /** |
423 | * Identify a namespace by attempting some unicode folding techniques. |
424 | * 2 methods supported: |
425 | * - naive: case folding + naive accents removal (only some combined accents are removed) |
426 | * - utr30: (slow to load) case folding + strong accent squashing based on the withdrawn UTR30 specs |
427 | * all methods will apply something similar to near space flattener. |
428 | * @param string $namespace name of the namespace to identify |
429 | * @param string $method either naive or utr30 |
430 | * @param \Language|null $language |
431 | * @return bool|int |
432 | */ |
433 | public static function identifyNamespace( $namespace, $method = 'naive', \Language $language = null ) { |
434 | static $naive = null; |
435 | static $utr30 = null; |
436 | |
437 | $normalizer = null; |
438 | if ( $method === 'naive' ) { |
439 | if ( $naive === null ) { |
440 | $naive = \Transliterator::createFromRules( |
441 | '::NFD;::Upper;::Lower;::[:Nonspacing Mark:] Remove;::NFC;[\_\-\'\u2019\u02BC]>\u0020;' |
442 | ); |
443 | } |
444 | $normalizer = $naive; |
445 | } elseif ( $method === 'utr30' ) { |
446 | if ( $utr30 === null ) { |
447 | $utr30 = |
448 | $normalizer = \Transliterator::createFromRules( file_get_contents( __DIR__ . '/../data/utr30.txt' ) ); |
449 | } |
450 | $normalizer = $utr30; |
451 | } |
452 | |
453 | Assert::postcondition( $normalizer !== null, |
454 | 'Failed to load Transliterator with method ' . $method ); |
455 | $namespace = $normalizer->transliterate( $namespace ); |
456 | if ( $namespace === '' ) { |
457 | return false; |
458 | } |
459 | $language ??= MediaWikiServices::getInstance()->getContentLanguage(); |
460 | foreach ( $language->getNamespaceIds() as $candidate => $nsId ) { |
461 | if ( $normalizer->transliterate( $candidate ) === $namespace ) { |
462 | return $nsId; |
463 | } |
464 | } |
465 | |
466 | return false; |
467 | } |
468 | |
469 | /** |
470 | * Helper for PHP's annoying emptiness check. |
471 | * empty(0) should not be true! |
472 | * empty(false) should not be true! |
473 | * Empty arrays, strings, and nulls/undefined count as empty. |
474 | * |
475 | * False otherwise. |
476 | * @param mixed $v |
477 | * @return bool |
478 | */ |
479 | public static function isEmpty( $v ) { |
480 | return ( is_array( $v ) && count( $v ) === 0 ) || |
481 | ( is_object( $v ) && count( (array)$v ) === 0 ) || |
482 | ( is_string( $v ) && strlen( $v ) === 0 ) || |
483 | ( $v === null ); |
484 | } |
485 | |
486 | /** |
487 | * Helper function to conditionally set a key in a dest array only if it |
488 | * is defined in a source array. This is just to help DRY up what would |
489 | * otherwise could be a long series of |
490 | * if ( isset($sourceArray[$key] )) { $destArray[$key] = $sourceArray[$key] } |
491 | * statements. This also supports using a different key in the dest array, |
492 | * as well as mapping the value when assigning to $sourceArray. |
493 | * |
494 | * Usage: |
495 | * $arr1 = ['KEY1' => '123']; |
496 | * $arr2 = []; |
497 | * |
498 | * setIfDefined($arr1, 'KEY1', $arr2, 'key1', 'intval'); |
499 | * // $arr2['key1'] is now set to 123 (integer value) |
500 | * |
501 | * setIfDefined($arr1, 'KEY2', $arr2); |
502 | * // $arr2 stays the same, because $arr1 does not have 'KEY2' defined. |
503 | * |
504 | * @param array $sourceArray the array from which to look for $sourceKey |
505 | * @param string $sourceKey the key to look for in $sourceArray |
506 | * @param array &$destArray by reference destination array in which to set value if defined |
507 | * @param string|null $destKey optional, key to use instead of $sourceKey in $destArray. |
508 | * @param callable|null $mapFn optional, If set, this will be called on the value before setting it. |
509 | * @param bool $checkEmpty If false, emptyiness of result after $mapFn is called will not be |
510 | * checked before setting on $destArray. If true, it will, using Util::isEmpty. |
511 | * Default: true |
512 | * @return array |
513 | */ |
514 | public static function setIfDefined( |
515 | array $sourceArray, |
516 | $sourceKey, |
517 | array &$destArray, |
518 | $destKey = null, |
519 | $mapFn = null, |
520 | $checkEmpty = true |
521 | ) { |
522 | if ( array_key_exists( $sourceKey, $sourceArray ) ) { |
523 | $val = $sourceArray[$sourceKey]; |
524 | if ( $mapFn !== null ) { |
525 | $val = $mapFn( $val ); |
526 | } |
527 | // Only set in $destArray if we are not checking emptiness, |
528 | // or if we are and the $val is not empty. |
529 | if ( !$checkEmpty || !self::isEmpty( $val ) ) { |
530 | $key = $destKey ?: $sourceKey; |
531 | $destArray[$key] = $val; |
532 | } |
533 | } |
534 | return $destArray; |
535 | } |
536 | |
537 | /** |
538 | * @return IBufferingStatsdDataFactory |
539 | */ |
540 | public static function getStatsDataFactory(): IBufferingStatsdDataFactory { |
541 | if ( defined( 'MW_PHPUNIT_TEST' ) ) { |
542 | return new NullStatsdDataFactory(); |
543 | } |
544 | return MediaWikiServices::getInstance()->getStatsdDataFactory(); |
545 | } |
546 | |
547 | /** |
548 | * @param SearchConfig $config Configuration of the check |
549 | * @param string $ip The address to check against, ipv4 or ipv6. |
550 | * @param string[] $headers Map from http header name to value. All names must be uppercased. |
551 | * @return bool True when the parameters appear to be a non-interactive use case. |
552 | */ |
553 | public static function looksLikeAutomation( SearchConfig $config, string $ip, array $headers ): bool { |
554 | // Is there an http header that can be matched with regex to flag automation, |
555 | // such as the user-agent or a flag applied by some infrastructure? |
556 | $automationHeaders = $config->get( 'CirrusSearchAutomationHeaderRegexes' ) ?? []; |
557 | foreach ( $automationHeaders as $name => $pattern ) { |
558 | $name = strtoupper( $name ); |
559 | if ( !isset( $headers[$name] ) ) { |
560 | continue; |
561 | } |
562 | $ret = preg_match( $pattern, $headers[$name] ); |
563 | if ( $ret === 1 ) { |
564 | return true; |
565 | } elseif ( $ret === false ) { |
566 | LoggerFactory::getInstance( 'CirrusSearch' )->warning( |
567 | "Invalid regex provided for header `$name` in `CirrusSearchAutomationHeaderRegexes`." ); |
568 | } |
569 | } |
570 | |
571 | // Does the ip address fall into a subnet known for automation? |
572 | $ranges = $config->get( 'CirrusSearchAutomationCIDRs' ); |
573 | if ( IPUtils::isInRanges( $ip, $ranges ) ) { |
574 | return true; |
575 | } |
576 | |
577 | // Default assumption that requests are interactive |
578 | return false; |
579 | } |
580 | |
581 | /** |
582 | * If we're supposed to create raw result, create and return it, |
583 | * or output it and finish. |
584 | * @param mixed $result Search result data |
585 | * @param WebRequest $request Request context |
586 | * @param CirrusDebugOptions $debugOptions |
587 | * @return string The new raw result. |
588 | */ |
589 | public static function processSearchRawReturn( $result, WebRequest $request, |
590 | CirrusDebugOptions $debugOptions ) { |
591 | $output = null; |
592 | $header = null; |
593 | if ( $debugOptions->getCirrusExplainFormat() !== null ) { |
594 | $header = 'Content-type: text/html; charset=UTF-8'; |
595 | $printer = new ExplainPrinter( $debugOptions->getCirrusExplainFormat() ); |
596 | $output = $printer->format( $result ); |
597 | } |
598 | |
599 | // This should always be true, except in the case of the test suite which wants the actual |
600 | // objects returned. |
601 | if ( $debugOptions->isDumpAndDie() ) { |
602 | if ( $output === null ) { |
603 | $header = 'Content-type: application/json; charset=UTF-8'; |
604 | if ( $result === null ) { |
605 | $output = '{}'; |
606 | } else { |
607 | $output = json_encode( $result, JSON_PRETTY_PRINT ); |
608 | } |
609 | } |
610 | |
611 | // When dumping the query we skip _everything_ but echoing the query. |
612 | \RequestContext::getMain()->getOutput()->disable(); |
613 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable $header can't be null here |
614 | $request->response()->header( $header ); |
615 | echo $output; |
616 | exit(); |
617 | } |
618 | |
619 | return $result; |
620 | } |
621 | } |