Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
45.69% |
90 / 197 |
|
22.73% |
5 / 22 |
CRAP | |
0.00% |
0 / 1 |
| Util | |
45.69% |
90 / 197 |
|
22.73% |
5 / 22 |
1105.49 | |
0.00% |
0 / 1 |
| getNamespaceText | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| recordPoolStats | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
| wrapWithPoolStats | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
| doPoolCounterWork | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
20 | |||
| parsePotentialPercent | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| parseSettingsInMessage | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
| overrideYesNo | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| overrideNumeric | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
72 | |||
| getDefaultBoostTemplates | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
| getOnWikiBoostTemplates | |
90.91% |
20 / 22 |
|
0.00% |
0 / 1 |
5.02 | |||
| stripQuestionMarks | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
7 | |||
| getExecutionId | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| resetExecutionId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getRequestSetToken | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
6 | |||
| generateIdentToken | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
| getExecutionContext | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
| identifyNamespace | |
95.24% |
20 / 21 |
|
0.00% |
0 / 1 |
6 | |||
| isEmpty | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
7 | |||
| setIfDefined | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
6 | |||
| getStatsFactory | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| looksLikeAutomation | |
80.00% |
12 / 15 |
|
0.00% |
0 / 1 |
6.29 | |||
| processSearchRawReturn | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
30 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch; |
| 4 | |
| 5 | use MediaWiki\Context\RequestContext; |
| 6 | use MediaWiki\Exception\MWException; |
| 7 | use MediaWiki\Language\Language; |
| 8 | use MediaWiki\Logger\LoggerFactory; |
| 9 | use MediaWiki\MediaWikiServices; |
| 10 | use MediaWiki\PoolCounter\PoolCounterWorkViaCallback; |
| 11 | use MediaWiki\Request\WebRequest; |
| 12 | use MediaWiki\Status\Status; |
| 13 | use MediaWiki\Title\Title; |
| 14 | use MediaWiki\User\UserIdentity; |
| 15 | use MediaWiki\WikiMap\WikiMap; |
| 16 | use Wikimedia\Assert\Assert; |
| 17 | use Wikimedia\IPUtils; |
| 18 | use Wikimedia\Stats\StatsFactory; |
| 19 | |
| 20 | /** |
| 21 | * Random utility functions that don't have a better home |
| 22 | * |
| 23 | * @license GPL-2.0-or-later |
| 24 | */ |
| 25 | class Util { |
| 26 | /** |
| 27 | * Cache getDefaultBoostTemplates() |
| 28 | * |
| 29 | * @var array|null boost templates |
| 30 | */ |
| 31 | private static $defaultBoostTemplates = null; |
| 32 | |
| 33 | /** |
| 34 | * @var string|null Id identifying this php execution |
| 35 | */ |
| 36 | private static $executionId; |
| 37 | |
| 38 | /** |
| 39 | * Get the textual representation of a namespace with underscores stripped, varying |
| 40 | * by gender if need be (using Title::getNsText()). |
| 41 | * |
| 42 | * @param Title $title The page title to use |
| 43 | * @return string|false |
| 44 | */ |
| 45 | public static function getNamespaceText( Title $title ) { |
| 46 | $ret = $title->getNsText(); |
| 47 | return is_string( $ret ) ? strtr( $ret, '_', ' ' ) : $ret; |
| 48 | } |
| 49 | |
| 50 | /** |
| 51 | * Set label and statsd BC setup for pool counter metrics |
| 52 | * @param string $type The pool counter type, such as CirrusSearch-Search |
| 53 | * @param bool $isSuccess If the pool counter gave a success, or failed the request |
| 54 | * @param float $observation the time it took to update the counter |
| 55 | * @return void |
| 56 | */ |
| 57 | private static function recordPoolStats( string $type, bool $isSuccess, float $observation ): void { |
| 58 | $pos = strpos( $type, '-' ); |
| 59 | if ( $pos !== false ) { |
| 60 | $type = substr( $type, $pos + 1 ); |
| 61 | } |
| 62 | self::getStatsFactory() |
| 63 | ->getTiming( "pool_counter_seconds" ) |
| 64 | ->setLabel( "type", $type ) |
| 65 | ->setLabel( "status", $isSuccess ? "success" : "failure" ) |
| 66 | ->observe( $observation ); |
| 67 | } |
| 68 | |
| 69 | /** |
| 70 | * @param float $startPoolWork The time this pool request started, from microtime( true ) |
| 71 | * @param string $type The pool counter type, such as CirrusSearch-Search |
| 72 | * @param bool $isSuccess If the pool counter gave a success, or failed the request |
| 73 | * @param callable $callback The function to wrap |
| 74 | * @return callable The original callback wrapped to collect pool counter stats |
| 75 | */ |
| 76 | private static function wrapWithPoolStats( $startPoolWork, |
| 77 | $type, |
| 78 | $isSuccess, |
| 79 | callable $callback |
| 80 | ) { |
| 81 | return function ( ...$args ) use ( $type, $isSuccess, $callback, $startPoolWork ) { |
| 82 | self::recordPoolStats( |
| 83 | $type, |
| 84 | $isSuccess, |
| 85 | 1000 * ( microtime( true ) - $startPoolWork ) ); |
| 86 | |
| 87 | return $callback( ...$args ); |
| 88 | }; |
| 89 | } |
| 90 | |
| 91 | /** |
| 92 | * Wraps the complex pool counter interface to force the single call pattern |
| 93 | * that Cirrus always uses. |
| 94 | * |
| 95 | * @param string $type same as type parameter on PoolCounter::factory |
| 96 | * @param UserIdentity|null $user |
| 97 | * @param callable $workCallback callback when pool counter is acquired. Called with |
| 98 | * no parameters. |
| 99 | * @param string|null $busyErrorMsg The i18n key to return when the queue |
| 100 | * is full, or null to use the default. |
| 101 | * @return mixed |
| 102 | */ |
| 103 | public static function doPoolCounterWork( $type, $user, $workCallback, $busyErrorMsg = null ) { |
| 104 | global $wgCirrusSearchPoolCounterKey; |
| 105 | |
| 106 | // By default the pool counter allows you to lock the same key with |
| 107 | // multiple types. That might be useful but it isn't how Cirrus thinks. |
| 108 | // Instead, all keys are scoped to their type. |
| 109 | |
| 110 | if ( !$user ) { |
| 111 | // We don't want to even use the pool counter if there isn't a user. |
| 112 | // Note that anonymous users are still users, this is most likely |
| 113 | // maintenance scripts. |
| 114 | // @todo Maintenenace scripts and jobs should already override |
| 115 | // poolcounters as necessary, can this be removed? |
| 116 | return $workCallback(); |
| 117 | } |
| 118 | |
| 119 | $key = "$type:$wgCirrusSearchPoolCounterKey"; |
| 120 | |
| 121 | $errorCallback = static function ( Status $status ) use ( $key, $busyErrorMsg ) { |
| 122 | $error = $status->getMessages()[0]->getKey(); |
| 123 | |
| 124 | LoggerFactory::getInstance( 'CirrusSearch' )->warning( |
| 125 | "Pool error on {key}: {error}", |
| 126 | [ 'key' => $key, 'error' => $error ] |
| 127 | ); |
| 128 | if ( $error === 'pool-queuefull' ) { |
| 129 | return Status::newFatal( $busyErrorMsg ?: 'cirrussearch-too-busy-error' ); |
| 130 | } |
| 131 | return Status::newFatal( 'cirrussearch-backend-error' ); |
| 132 | }; |
| 133 | |
| 134 | // wrap some stats collection on the success/failure handlers |
| 135 | $startPoolWork = microtime( true ); |
| 136 | $workCallback = self::wrapWithPoolStats( $startPoolWork, $type, true, $workCallback ); |
| 137 | $errorCallback = self::wrapWithPoolStats( $startPoolWork, $type, false, $errorCallback ); |
| 138 | |
| 139 | $work = new PoolCounterWorkViaCallback( $type, $key, [ |
| 140 | 'doWork' => $workCallback, |
| 141 | 'error' => $errorCallback, |
| 142 | ] ); |
| 143 | return $work->execute(); |
| 144 | } |
| 145 | |
| 146 | /** |
| 147 | * @param string $str |
| 148 | * @return float |
| 149 | */ |
| 150 | public static function parsePotentialPercent( $str ) { |
| 151 | $result = floatval( $str ); |
| 152 | if ( strpos( $str, '%' ) === false ) { |
| 153 | return $result; |
| 154 | } |
| 155 | return $result / 100; |
| 156 | } |
| 157 | |
| 158 | /** |
| 159 | * Parse a message content into an array. This function is generally used to |
| 160 | * parse settings stored as i18n messages (see cirrussearch-boost-templates). |
| 161 | * |
| 162 | * @param string $message |
| 163 | * @return string[] |
| 164 | */ |
| 165 | public static function parseSettingsInMessage( $message ) { |
| 166 | $lines = explode( "\n", $message ); |
| 167 | $lines = preg_replace( '/#.*$/', '', $lines ); // Remove comments |
| 168 | $lines = array_map( 'trim', $lines ); // Remove extra spaces |
| 169 | $lines = array_filter( $lines ); // Remove empty lines |
| 170 | return $lines; |
| 171 | } |
| 172 | |
| 173 | /** |
| 174 | * Set $dest to the true/false from $request->getVal( $name ) if yes/no. |
| 175 | * |
| 176 | * @param mixed &$dest |
| 177 | * @param WebRequest $request |
| 178 | * @param string $name |
| 179 | */ |
| 180 | public static function overrideYesNo( &$dest, $request, $name ) { |
| 181 | $val = $request->getVal( $name ); |
| 182 | if ( $val !== null ) { |
| 183 | $dest = wfStringToBool( $val ); |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | /** |
| 188 | * Set $dest to the numeric value from $request->getVal( $name ) if it is <= $limit |
| 189 | * or => $limit if upperLimit is false. |
| 190 | * |
| 191 | * @param mixed &$dest |
| 192 | * @param WebRequest $request |
| 193 | * @param string $name |
| 194 | * @param int|null $limit |
| 195 | * @param bool $upperLimit |
| 196 | */ |
| 197 | public static function overrideNumeric( &$dest, $request, $name, $limit = null, $upperLimit = true ) { |
| 198 | $val = $request->getVal( $name ); |
| 199 | if ( $val !== null && is_numeric( $val ) ) { |
| 200 | if ( $limit === null ) { |
| 201 | $dest = $val; |
| 202 | } elseif ( $upperLimit && $val <= $limit ) { |
| 203 | $dest = $val; |
| 204 | } elseif ( !$upperLimit && $val >= $limit ) { |
| 205 | $dest = $val; |
| 206 | } |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | /** |
| 211 | * Get boost templates configured in messages. |
| 212 | * @param SearchConfig|null $config Search config requesting the templates |
| 213 | * @return float[] |
| 214 | */ |
| 215 | public static function getDefaultBoostTemplates( ?SearchConfig $config = null ) { |
| 216 | $config ??= MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'CirrusSearch' ); |
| 217 | |
| 218 | $fromConfig = $config->get( 'CirrusSearchBoostTemplates' ); |
| 219 | if ( $config->get( 'CirrusSearchIgnoreOnWikiBoostTemplates' ) ) { |
| 220 | // on wiki messages disabled, we can return this config |
| 221 | // directly |
| 222 | return $fromConfig; |
| 223 | } |
| 224 | |
| 225 | $fromMessage = self::getOnWikiBoostTemplates( $config ); |
| 226 | if ( !$fromMessage ) { |
| 227 | // the onwiki config is empty (or unknown for non-local |
| 228 | // config), we can fallback to templates from config |
| 229 | return $fromConfig; |
| 230 | } |
| 231 | return $fromMessage; |
| 232 | } |
| 233 | |
| 234 | /** |
| 235 | * Load and cache boost templates configured on wiki via the system |
| 236 | * message 'cirrussearch-boost-templates'. |
| 237 | * If called from the local wiki the message will be cached. |
| 238 | * If called from a non local wiki an attempt to fetch this data from the cache is made. |
| 239 | * If an empty array is returned it means that no config is available on wiki |
| 240 | * or the value possibly unknown if run from a non local wiki. |
| 241 | * |
| 242 | * @param SearchConfig $config |
| 243 | * @return float[] indexed by template name |
| 244 | */ |
| 245 | private static function getOnWikiBoostTemplates( SearchConfig $config ) { |
| 246 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
| 247 | $cacheKey = $cache->makeGlobalKey( 'cirrussearch-boost-templates', $config->getWikiId() ); |
| 248 | if ( $config->getWikiId() == WikiMap::getCurrentWikiId() ) { |
| 249 | // Local wiki we can fetch boost templates from system |
| 250 | // message |
| 251 | if ( self::$defaultBoostTemplates !== null ) { |
| 252 | // This static cache is never set with non-local |
| 253 | // wiki data. |
| 254 | return self::$defaultBoostTemplates; |
| 255 | } |
| 256 | |
| 257 | $templates = $cache->getWithSetCallback( |
| 258 | $cacheKey, |
| 259 | 600, |
| 260 | static function () { |
| 261 | $source = wfMessage( 'cirrussearch-boost-templates' )->inContentLanguage(); |
| 262 | if ( !$source->isDisabled() ) { |
| 263 | $lines = Util::parseSettingsInMessage( $source->plain() ); |
| 264 | // Now parse the templates |
| 265 | return Query\BoostTemplatesFeature::parseBoostTemplates( implode( ' ', $lines ) ); |
| 266 | } |
| 267 | return []; |
| 268 | } |
| 269 | ); |
| 270 | self::$defaultBoostTemplates = $templates; |
| 271 | return $templates; |
| 272 | } |
| 273 | // Here we're dealing with boost template from other wiki, try to fetch it if it exists |
| 274 | // otherwise, don't bother. |
| 275 | $nonLocalCache = $cache->get( $cacheKey ); |
| 276 | if ( !is_array( $nonLocalCache ) ) { |
| 277 | // not yet in cache, value is unknown |
| 278 | // return empty array |
| 279 | return []; |
| 280 | } |
| 281 | return $nonLocalCache; |
| 282 | } |
| 283 | |
| 284 | /** |
| 285 | * Strip question marks from queries, according to the defined stripping |
| 286 | * level, defined by $wgCirrusSearchStripQuestionMarks. Strip all ?s, those |
| 287 | * at word breaks, or only string-final. Ignore queries that are all |
| 288 | * punctuation or use insource. Don't remove escaped \?s, but unescape them. |
| 289 | * |
| 290 | * @param string $term |
| 291 | * @param string $strippingLevel Either "all", "break", "final", or "none" |
| 292 | * @return string modified term, based on strippingLevel |
| 293 | */ |
| 294 | public static function stripQuestionMarks( $term, $strippingLevel ) { |
| 295 | if ( strpos( $term, 'insource:/' ) === false && |
| 296 | strpos( $term, 'intitle:/' ) === false && |
| 297 | !preg_match( '/^[\p{P}\p{Z}]+$/u', $term ) |
| 298 | ) { |
| 299 | // FIXME: get rid of negative lookbehinds on (?<!\\\\) |
| 300 | // it may improperly transform \\? into \? instead of \\ and destroy properly escaped \ |
| 301 | if ( $strippingLevel === 'final' ) { |
| 302 | // strip only query-final question marks that are not escaped |
| 303 | $term = preg_replace( "/((?<!\\\\)\?|\s)+$/", '', $term ); |
| 304 | $term = preg_replace( '/\\\\\?/', '?', $term ); |
| 305 | } elseif ( $strippingLevel === 'break' ) { |
| 306 | // strip question marks at word boundaries |
| 307 | $term = preg_replace( '/(?<!\\\\)\?+(\PL|$)/', '$1', $term ); |
| 308 | $term = preg_replace( '/\\\\\?/', '?', $term ); |
| 309 | } elseif ( $strippingLevel === 'all' ) { |
| 310 | // strip all unescaped question marks |
| 311 | $term = preg_replace( '/(?<!\\\\)\?+/', ' ', $term ); |
| 312 | $term = preg_replace( '/\\\\\?/', '?', $term ); |
| 313 | } |
| 314 | } |
| 315 | return $term; |
| 316 | } |
| 317 | |
| 318 | /** |
| 319 | * Identifies a specific execution of php. That might be one web |
| 320 | * request, or multiple jobs run in the same executor. An execution id |
| 321 | * is valid over a brief timespan, perhaps a minute or two for some jobs. |
| 322 | * |
| 323 | * @return string unique identifier |
| 324 | */ |
| 325 | public static function getExecutionId() { |
| 326 | if ( self::$executionId === null ) { |
| 327 | self::$executionId = (string)mt_rand(); |
| 328 | } |
| 329 | return self::$executionId; |
| 330 | } |
| 331 | |
| 332 | /** |
| 333 | * Unit tests only |
| 334 | */ |
| 335 | public static function resetExecutionId() { |
| 336 | self::$executionId = null; |
| 337 | } |
| 338 | |
| 339 | /** |
| 340 | * Get a token that (hopefully) uniquely identifies this search. It will be |
| 341 | * added to the search result page js config vars, and put into the url with |
| 342 | * history.replaceState(). This means click through's from supported browsers |
| 343 | * will record this token as part of the referrer. |
| 344 | * |
| 345 | * @return string |
| 346 | */ |
| 347 | public static function getRequestSetToken() { |
| 348 | static $token; |
| 349 | if ( $token === null ) { |
| 350 | // random UID, 70B tokens have a collision probability of 4*10^-16 |
| 351 | // so should work for marking unique queries. |
| 352 | $gen = MediaWikiServices::getInstance()->getGlobalIdGenerator(); |
| 353 | $uuid = $gen->newUUIDv4(); |
| 354 | // make it a little shorter by using straight base36 |
| 355 | $hex = substr( $uuid, 0, 8 ) . substr( $uuid, 9, 4 ) . |
| 356 | substr( $uuid, 14, 4 ) . substr( $uuid, 19, 4 ) . |
| 357 | substr( $uuid, 24 ); |
| 358 | $token = \Wikimedia\base_convert( $hex, 16, 36 ); |
| 359 | } |
| 360 | return $token; |
| 361 | } |
| 362 | |
| 363 | /** |
| 364 | * @param string $extraData Extra information to mix into the hash |
| 365 | * @return string A token that identifies the source of the request |
| 366 | */ |
| 367 | public static function generateIdentToken( $extraData = '' ) { |
| 368 | $request = RequestContext::getMain()->getRequest(); |
| 369 | try { |
| 370 | $ip = $request->getIP(); |
| 371 | } catch ( MWException ) { |
| 372 | // No ip, probably running cli? |
| 373 | $ip = 'unknown'; |
| 374 | } |
| 375 | return md5( implode( ':', [ |
| 376 | $extraData, |
| 377 | $ip, |
| 378 | $request->getHeader( 'X-Forwarded-For' ), |
| 379 | $request->getHeader( 'User-Agent' ), |
| 380 | ] ) ); |
| 381 | } |
| 382 | |
| 383 | /** |
| 384 | * @return string The context the request is in. Either cli, api, web or misc. |
| 385 | */ |
| 386 | public static function getExecutionContext() { |
| 387 | if ( PHP_SAPI === 'cli' ) { |
| 388 | return 'cli'; |
| 389 | } elseif ( MW_ENTRY_POINT == 'api' ) { |
| 390 | return 'api'; |
| 391 | } elseif ( MW_ENTRY_POINT == 'index' ) { |
| 392 | return 'web'; |
| 393 | } else { |
| 394 | return 'misc'; |
| 395 | } |
| 396 | } |
| 397 | |
| 398 | /** |
| 399 | * Identify a namespace by attempting some unicode folding techniques. |
| 400 | * 2 methods supported: |
| 401 | * - naive: case folding + naive accents removal (only some combined accents are removed) |
| 402 | * - utr30: (slow to load) case folding + strong accent squashing based on the withdrawn UTR30 specs |
| 403 | * all methods will apply something similar to near space flattener. |
| 404 | * @param string $namespace name of the namespace to identify |
| 405 | * @param string $method either naive or utr30 |
| 406 | * @param Language|null $language |
| 407 | * @return bool|int |
| 408 | */ |
| 409 | public static function identifyNamespace( $namespace, $method = 'naive', ?Language $language = null ) { |
| 410 | static $naive = null; |
| 411 | static $utr30 = null; |
| 412 | |
| 413 | $normalizer = null; |
| 414 | if ( $method === 'naive' ) { |
| 415 | $naive ??= \Transliterator::createFromRules( |
| 416 | '::NFD;::Upper;::Lower;::[:Nonspacing Mark:] Remove;::NFC;[\_\-\'\u2019\u02BC]>\u0020;' |
| 417 | ); |
| 418 | $normalizer = $naive; |
| 419 | } elseif ( $method === 'utr30' ) { |
| 420 | $utr30 ??= \Transliterator::createFromRules( file_get_contents( __DIR__ . '/../data/utr30.txt' ) ); |
| 421 | $normalizer = $utr30; |
| 422 | } |
| 423 | |
| 424 | Assert::postcondition( $normalizer !== null, |
| 425 | 'Failed to load Transliterator with method ' . $method ); |
| 426 | $namespace = $normalizer->transliterate( $namespace ); |
| 427 | if ( $namespace === '' ) { |
| 428 | return false; |
| 429 | } |
| 430 | $language ??= MediaWikiServices::getInstance()->getContentLanguage(); |
| 431 | foreach ( $language->getNamespaceIds() as $candidate => $nsId ) { |
| 432 | if ( $normalizer->transliterate( $candidate ) === $namespace ) { |
| 433 | return $nsId; |
| 434 | } |
| 435 | } |
| 436 | |
| 437 | return false; |
| 438 | } |
| 439 | |
| 440 | /** |
| 441 | * Helper for PHP's annoying emptiness check. |
| 442 | * empty(0) should not be true! |
| 443 | * empty(false) should not be true! |
| 444 | * Empty arrays, strings, and nulls/undefined count as empty. |
| 445 | * |
| 446 | * False otherwise. |
| 447 | * @param mixed $v |
| 448 | * @return bool |
| 449 | */ |
| 450 | public static function isEmpty( $v ) { |
| 451 | return ( is_array( $v ) && count( $v ) === 0 ) || |
| 452 | ( is_object( $v ) && count( (array)$v ) === 0 ) || |
| 453 | ( is_string( $v ) && strlen( $v ) === 0 ) || |
| 454 | ( $v === null ); |
| 455 | } |
| 456 | |
| 457 | /** |
| 458 | * Helper function to conditionally set a key in a dest array only if it |
| 459 | * is defined in a source array. This is just to help DRY up what would |
| 460 | * otherwise could be a long series of |
| 461 | * if ( isset($sourceArray[$key] )) { $destArray[$key] = $sourceArray[$key] } |
| 462 | * statements. This also supports using a different key in the dest array, |
| 463 | * as well as mapping the value when assigning to $sourceArray. |
| 464 | * |
| 465 | * Usage: |
| 466 | * $arr1 = ['KEY1' => '123']; |
| 467 | * $arr2 = []; |
| 468 | * |
| 469 | * setIfDefined($arr1, 'KEY1', $arr2, 'key1', 'intval'); |
| 470 | * // $arr2['key1'] is now set to 123 (integer value) |
| 471 | * |
| 472 | * setIfDefined($arr1, 'KEY2', $arr2); |
| 473 | * // $arr2 stays the same, because $arr1 does not have 'KEY2' defined. |
| 474 | * |
| 475 | * @param array $sourceArray the array from which to look for $sourceKey |
| 476 | * @param string $sourceKey the key to look for in $sourceArray |
| 477 | * @param array &$destArray by reference destination array in which to set value if defined |
| 478 | * @param string|null $destKey optional, key to use instead of $sourceKey in $destArray. |
| 479 | * @param callable|null $mapFn optional, If set, this will be called on the value before setting it. |
| 480 | * @param bool $checkEmpty If false, emptyiness of result after $mapFn is called will not be |
| 481 | * checked before setting on $destArray. If true, it will, using Util::isEmpty. |
| 482 | * Default: true |
| 483 | * @return array |
| 484 | */ |
| 485 | public static function setIfDefined( |
| 486 | array $sourceArray, |
| 487 | $sourceKey, |
| 488 | array &$destArray, |
| 489 | $destKey = null, |
| 490 | $mapFn = null, |
| 491 | $checkEmpty = true |
| 492 | ) { |
| 493 | if ( array_key_exists( $sourceKey, $sourceArray ) ) { |
| 494 | $val = $sourceArray[$sourceKey]; |
| 495 | if ( $mapFn !== null ) { |
| 496 | $val = $mapFn( $val ); |
| 497 | } |
| 498 | // Only set in $destArray if we are not checking emptiness, |
| 499 | // or if we are and the $val is not empty. |
| 500 | if ( !$checkEmpty || !self::isEmpty( $val ) ) { |
| 501 | $key = $destKey ?: $sourceKey; |
| 502 | $destArray[$key] = $val; |
| 503 | } |
| 504 | } |
| 505 | return $destArray; |
| 506 | } |
| 507 | |
| 508 | /** |
| 509 | * @return StatsFactory prefixed with the "CirrusSearch" component |
| 510 | */ |
| 511 | public static function getStatsFactory(): StatsFactory { |
| 512 | return MediaWikiServices::getInstance()->getStatsFactory()->withComponent( "CirrusSearch" ); |
| 513 | } |
| 514 | |
| 515 | /** |
| 516 | * @param SearchConfig $config Configuration of the check |
| 517 | * @param string $ip The address to check against, ipv4 or ipv6. |
| 518 | * @param string[] $headers Map from http header name to value. All names must be uppercased. |
| 519 | * @return bool True when the parameters appear to be a non-interactive use case. |
| 520 | */ |
| 521 | public static function looksLikeAutomation( SearchConfig $config, string $ip, array $headers ): bool { |
| 522 | // Is there an http header that can be matched with regex to flag automation, |
| 523 | // such as the user-agent or a flag applied by some infrastructure? |
| 524 | $automationHeaders = $config->get( 'CirrusSearchAutomationHeaderRegexes' ) ?? []; |
| 525 | foreach ( $automationHeaders as $name => $pattern ) { |
| 526 | $name = strtoupper( $name ); |
| 527 | if ( !isset( $headers[$name] ) ) { |
| 528 | continue; |
| 529 | } |
| 530 | $ret = preg_match( $pattern, $headers[$name] ); |
| 531 | if ( $ret === 1 ) { |
| 532 | return true; |
| 533 | } elseif ( $ret === false ) { |
| 534 | LoggerFactory::getInstance( 'CirrusSearch' )->warning( |
| 535 | "Invalid regex provided for header `$name` in `CirrusSearchAutomationHeaderRegexes`." ); |
| 536 | } |
| 537 | } |
| 538 | |
| 539 | // Does the ip address fall into a subnet known for automation? |
| 540 | $ranges = $config->get( 'CirrusSearchAutomationCIDRs' ); |
| 541 | if ( IPUtils::isInRanges( $ip, $ranges ) ) { |
| 542 | return true; |
| 543 | } |
| 544 | |
| 545 | // Default assumption that requests are interactive |
| 546 | return false; |
| 547 | } |
| 548 | |
| 549 | /** |
| 550 | * If we're supposed to create raw result, create and return it, |
| 551 | * or output it and finish. |
| 552 | * @template T the type of the result passed and the return value of this function |
| 553 | * |
| 554 | * @param T $result Search result data |
| 555 | * @param WebRequest $request Request context |
| 556 | * @param CirrusDebugOptions $debugOptions |
| 557 | * @return T |
| 558 | */ |
| 559 | public static function processSearchRawReturn( $result, WebRequest $request, |
| 560 | CirrusDebugOptions $debugOptions ) { |
| 561 | $output = null; |
| 562 | $header = null; |
| 563 | if ( $debugOptions->getCirrusExplainFormat() !== null ) { |
| 564 | $header = 'Content-type: text/html; charset=UTF-8'; |
| 565 | $printer = new ExplainPrinter( $debugOptions->getCirrusExplainFormat() ); |
| 566 | $output = $printer->format( $result ); |
| 567 | } |
| 568 | |
| 569 | // This should always be true, except in the case of the test suite which wants the actual |
| 570 | // objects returned. |
| 571 | if ( $debugOptions->isDumpAndDie() ) { |
| 572 | if ( $output === null ) { |
| 573 | $header = 'Content-type: application/json; charset=UTF-8'; |
| 574 | if ( $result === null ) { |
| 575 | $output = '{}'; |
| 576 | } else { |
| 577 | $output = json_encode( $result, JSON_PRETTY_PRINT ); |
| 578 | } |
| 579 | } |
| 580 | |
| 581 | // When dumping the query we skip _everything_ but echoing the query. |
| 582 | RequestContext::getMain()->getOutput()->disable(); |
| 583 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable $header can't be null here |
| 584 | $request->response()->header( $header ); |
| 585 | echo $output; |
| 586 | exit(); |
| 587 | } |
| 588 | |
| 589 | return $result; |
| 590 | } |
| 591 | } |