Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
96.81% |
303 / 313 |
|
57.14% |
4 / 7 |
CRAP | |
0.00% |
0 / 1 |
| LazyVariableComputer | |
96.81% |
303 / 313 |
|
57.14% |
4 / 7 |
79 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| compute | |
97.61% |
245 / 251 |
|
0.00% |
0 / 1 |
64 | |||
| getLinksFromDB | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
| getLastPageAuthors | |
96.97% |
32 / 33 |
|
0.00% |
0 / 1 |
4 | |||
| getRevisionFromParameters | |
70.00% |
7 / 10 |
|
0.00% |
0 / 1 |
5.68 | |||
| getContentModelFromRevision | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| parseNonEditWikitext | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Extension\AbuseFilter\Variables; |
| 4 | |
| 5 | use InvalidArgumentException; |
| 6 | use MediaWiki\Content\ContentHandler; |
| 7 | use MediaWiki\Content\TextContent; |
| 8 | use MediaWiki\Extension\AbuseFilter\Hooks\AbuseFilterHookRunner; |
| 9 | use MediaWiki\Extension\AbuseFilter\Parser\AFPData; |
| 10 | use MediaWiki\Extension\AbuseFilter\TextExtractor; |
| 11 | use MediaWiki\ExternalLinks\ExternalLinksLookup; |
| 12 | use MediaWiki\ExternalLinks\LinkFilter; |
| 13 | use MediaWiki\Language\Language; |
| 14 | use MediaWiki\Page\PageIdentity; |
| 15 | use MediaWiki\Page\WikiPage; |
| 16 | use MediaWiki\Parser\ParserFactory; |
| 17 | use MediaWiki\Parser\ParserOptions; |
| 18 | use MediaWiki\Permissions\PermissionManager; |
| 19 | use MediaWiki\Permissions\RestrictionStore; |
| 20 | use MediaWiki\RecentChanges\RecentChange; |
| 21 | use MediaWiki\Revision\RevisionLookup; |
| 22 | use MediaWiki\Revision\RevisionRecord; |
| 23 | use MediaWiki\Revision\RevisionStore; |
| 24 | use MediaWiki\Revision\SlotRecord; |
| 25 | use MediaWiki\Storage\PreparedUpdate; |
| 26 | use MediaWiki\Title\Title; |
| 27 | use MediaWiki\User\ExternalUserNames; |
| 28 | use MediaWiki\User\User; |
| 29 | use MediaWiki\User\UserEditTracker; |
| 30 | use MediaWiki\User\UserGroupManager; |
| 31 | use MediaWiki\User\UserIdentity; |
| 32 | use MediaWiki\User\UserIdentityUtils; |
| 33 | use MediaWiki\User\UserNameUtils; |
| 34 | use Psr\Log\LoggerInterface; |
| 35 | use stdClass; |
| 36 | use StringUtils; |
| 37 | use UnexpectedValueException; |
| 38 | use Wikimedia\Diff\Diff; |
| 39 | use Wikimedia\Diff\UnifiedDiffFormatter; |
| 40 | use Wikimedia\IPUtils; |
| 41 | use Wikimedia\ObjectCache\WANObjectCache; |
| 42 | use Wikimedia\Rdbms\Database; |
| 43 | use Wikimedia\Rdbms\LBFactory; |
| 44 | use Wikimedia\Rdbms\SelectQueryBuilder; |
| 45 | |
| 46 | /** |
| 47 | * Service used to compute lazy-loaded variable. |
| 48 | * @internal |
| 49 | */ |
| 50 | class LazyVariableComputer { |
| 51 | public const SERVICE_NAME = 'AbuseFilterLazyVariableComputer'; |
| 52 | |
| 53 | /** |
| 54 | * @var float The amount of time to subtract from profiling |
| 55 | * @todo This is a hack |
| 56 | */ |
| 57 | public static $profilingExtraTime = 0; |
| 58 | |
| 59 | public function __construct( |
| 60 | private readonly TextExtractor $textExtractor, |
| 61 | private readonly AbuseFilterHookRunner $hookRunner, |
| 62 | private readonly LoggerInterface $logger, |
| 63 | private readonly LBFactory $lbFactory, |
| 64 | private readonly WANObjectCache $wanCache, |
| 65 | private readonly RevisionLookup $revisionLookup, |
| 66 | private readonly RevisionStore $revisionStore, |
| 67 | private readonly Language $contentLanguage, |
| 68 | private readonly ParserFactory $parserFactory, |
| 69 | private readonly UserEditTracker $userEditTracker, |
| 70 | private readonly UserGroupManager $userGroupManager, |
| 71 | private readonly PermissionManager $permissionManager, |
| 72 | private readonly RestrictionStore $restrictionStore, |
| 73 | private readonly UserIdentityUtils $userIdentityUtils, |
| 74 | private readonly UserNameUtils $userNameUtils, |
| 75 | private readonly string $wikiID |
| 76 | ) { |
| 77 | } |
| 78 | |
| 79 | /** |
| 80 | * XXX: $getVarCB is a hack to hide the cyclic dependency with VariablesManager. See T261069 for possible |
| 81 | * solutions. This might also be merged into VariablesManager, but it would bring a ton of dependencies. |
| 82 | * @todo Should we remove $vars parameter (check hooks)? |
| 83 | * |
| 84 | * @param LazyLoadedVariable $var |
| 85 | * @param VariableHolder $vars |
| 86 | * @param callable $getVarCB |
| 87 | * @phan-param callable(string $name):AFPData $getVarCB |
| 88 | * @return AFPData |
| 89 | */ |
| 90 | public function compute( LazyLoadedVariable $var, VariableHolder $vars, callable $getVarCB ) { |
| 91 | $parameters = $var->getParameters(); |
| 92 | $varMethod = $var->getMethod(); |
| 93 | $result = null; |
| 94 | |
| 95 | if ( !$this->hookRunner->onAbuseFilter_interceptVariable( |
| 96 | $varMethod, |
| 97 | $vars, |
| 98 | $parameters, |
| 99 | $result |
| 100 | ) ) { |
| 101 | return $result instanceof AFPData |
| 102 | ? $result : AFPData::newFromPHPVar( $result ); |
| 103 | } |
| 104 | |
| 105 | switch ( $varMethod ) { |
| 106 | case 'diff': |
| 107 | $text1Var = $parameters['oldtext-var']; |
| 108 | $text2Var = $parameters['newtext-var']; |
| 109 | $text1 = $getVarCB( $text1Var )->toString(); |
| 110 | $text2 = $getVarCB( $text2Var )->toString(); |
| 111 | // T74329: if there's no text, don't return an array with the empty string |
| 112 | $text1 = $text1 === '' ? [] : explode( "\n", $text1 ); |
| 113 | $text2 = $text2 === '' ? [] : explode( "\n", $text2 ); |
| 114 | $diffs = new Diff( $text1, $text2 ); |
| 115 | $format = new UnifiedDiffFormatter(); |
| 116 | $result = $format->format( $diffs ); |
| 117 | break; |
| 118 | case 'diff-split': |
| 119 | $diff = $getVarCB( $parameters['diff-var'] )->toString(); |
| 120 | $line_prefix = $parameters['line-prefix']; |
| 121 | $diff_lines = explode( "\n", $diff ); |
| 122 | $result = []; |
| 123 | foreach ( $diff_lines as $line ) { |
| 124 | if ( ( $line[0] ?? '' ) === $line_prefix ) { |
| 125 | $result[] = substr( $line, 1 ); |
| 126 | } |
| 127 | } |
| 128 | break; |
| 129 | case 'array-diff': |
| 130 | $baseVar = $parameters['base-var']; |
| 131 | $minusVar = $parameters['minus-var']; |
| 132 | |
| 133 | $baseArray = $getVarCB( $baseVar )->toNative(); |
| 134 | $minusArray = $getVarCB( $minusVar )->toNative(); |
| 135 | |
| 136 | $result = array_diff( $baseArray, $minusArray ); |
| 137 | break; |
| 138 | case 'links-from-wikitext': |
| 139 | // This should ONLY be used when sharing a parse operation with the edit. |
| 140 | |
| 141 | /** @var WikiPage $article */ |
| 142 | $article = $parameters['article']; |
| 143 | if ( $article->getContentModel() === CONTENT_MODEL_WIKITEXT ) { |
| 144 | // Shared with the edit, don't count it in profiling |
| 145 | $startTime = microtime( true ); |
| 146 | $textVar = $parameters['text-var']; |
| 147 | |
| 148 | $new_text = $getVarCB( $textVar )->toString(); |
| 149 | $content = ContentHandler::makeContent( $new_text, $article->getTitle() ); |
| 150 | $editInfo = $article->prepareContentForEdit( |
| 151 | $content, |
| 152 | null, |
| 153 | $parameters['contextUserIdentity'] |
| 154 | ); |
| 155 | $result = LinkFilter::getIndexedUrlsNonReversed( |
| 156 | array_keys( $editInfo->output->getExternalLinks() ) |
| 157 | ); |
| 158 | self::$profilingExtraTime += ( microtime( true ) - $startTime ); |
| 159 | break; |
| 160 | } |
| 161 | // Otherwise fall back to database |
| 162 | case 'links-from-wikitext-or-database': |
| 163 | // TODO: use Content object instead, if available! |
| 164 | /** @var WikiPage $article */ |
| 165 | $article ??= $parameters['article']; |
| 166 | |
| 167 | // this inference is ugly, but the name isn't accessible from here |
| 168 | // and we only want this for debugging |
| 169 | $textVar = $parameters['text-var']; |
| 170 | $varName = str_starts_with( $textVar, 'old_' ) ? 'old_links' : 'new_links'; |
| 171 | if ( $parameters['forFilter'] ?? false ) { |
| 172 | $this->logger->debug( "Loading $varName from DB" ); |
| 173 | $links = $this->getLinksFromDB( $article ); |
| 174 | } elseif ( $article->getContentModel() === CONTENT_MODEL_WIKITEXT ) { |
| 175 | $this->logger->debug( "Loading $varName from Parser" ); |
| 176 | |
| 177 | $wikitext = $getVarCB( $textVar )->toString(); |
| 178 | $editInfo = $this->parseNonEditWikitext( |
| 179 | $wikitext, |
| 180 | $article, |
| 181 | $parameters['contextUserIdentity'] |
| 182 | ); |
| 183 | $links = LinkFilter::getIndexedUrlsNonReversed( |
| 184 | array_keys( $editInfo->output->getExternalLinks() ) |
| 185 | ); |
| 186 | } else { |
| 187 | // TODO: Get links from Content object. But we don't have the content object. |
| 188 | // And for non-text content, $wikitext is usually not going to be a valid |
| 189 | // serialization, but rather some dummy text for filtering. |
| 190 | $links = []; |
| 191 | } |
| 192 | |
| 193 | $result = $links; |
| 194 | break; |
| 195 | case 'links-from-update': |
| 196 | /** @var PreparedUpdate $update */ |
| 197 | $update = $parameters['update']; |
| 198 | // Shared with the edit, don't count it in profiling |
| 199 | $startTime = microtime( true ); |
| 200 | $result = LinkFilter::getIndexedUrlsNonReversed( |
| 201 | array_keys( $update->getParserOutputForMetaData()->getExternalLinks() ) |
| 202 | ); |
| 203 | self::$profilingExtraTime += ( microtime( true ) - $startTime ); |
| 204 | break; |
| 205 | case 'links-from-database': |
| 206 | /** @var PageIdentity $article */ |
| 207 | $article = $parameters['article']; |
| 208 | $this->logger->debug( 'Loading old_links from DB' ); |
| 209 | $result = $this->getLinksFromDB( $article ); |
| 210 | break; |
| 211 | case 'parse-wikitext': |
| 212 | // Should ONLY be used when sharing a parse operation with the edit. |
| 213 | // TODO: use Content object instead, if available! |
| 214 | /* @var WikiPage $article */ |
| 215 | $article = $parameters['article']; |
| 216 | if ( $article->getContentModel() === CONTENT_MODEL_WIKITEXT ) { |
| 217 | // Shared with the edit, don't count it in profiling |
| 218 | $startTime = microtime( true ); |
| 219 | $textVar = $parameters['wikitext-var']; |
| 220 | |
| 221 | $new_text = $getVarCB( $textVar )->toString(); |
| 222 | $content = ContentHandler::makeContent( $new_text, $article->getTitle() ); |
| 223 | $editInfo = $article->prepareContentForEdit( |
| 224 | $content, |
| 225 | null, |
| 226 | $parameters['contextUserIdentity'] |
| 227 | ); |
| 228 | if ( isset( $parameters['pst'] ) && $parameters['pst'] ) { |
| 229 | $result = $editInfo->pstContent->serialize( $editInfo->format ); |
| 230 | } else { |
| 231 | // Note: as of core change r727361, the PP limit comments (which we don't want to be here) |
| 232 | // are already excluded. |
| 233 | $popts = $editInfo->popts; |
| 234 | $result = $editInfo->getOutput()->runOutputPipeline( $popts, [] )->getContentHolderText(); |
| 235 | } |
| 236 | self::$profilingExtraTime += ( microtime( true ) - $startTime ); |
| 237 | } else { |
| 238 | $result = ''; |
| 239 | } |
| 240 | break; |
| 241 | case 'pst-from-update': |
| 242 | /** @var PreparedUpdate $update */ |
| 243 | $update = $parameters['update']; |
| 244 | $result = $this->textExtractor->revisionToString( |
| 245 | $update->getRevision(), |
| 246 | $parameters['contextUser'] |
| 247 | ); |
| 248 | break; |
| 249 | case 'html-from-update': |
| 250 | /** @var PreparedUpdate $update */ |
| 251 | $update = $parameters['update']; |
| 252 | // Shared with the edit, don't count it in profiling |
| 253 | $startTime = microtime( true ); |
| 254 | $popts = $update->getRenderedRevision()->getOptions(); |
| 255 | $result = $update->getCanonicalParserOutput()->runOutputPipeline( $popts, [] )->getContentHolderText(); |
| 256 | self::$profilingExtraTime += ( microtime( true ) - $startTime ); |
| 257 | break; |
| 258 | case 'strip-html': |
| 259 | $htmlVar = $parameters['html-var']; |
| 260 | $html = $getVarCB( $htmlVar )->toString(); |
| 261 | $stripped = StringUtils::delimiterReplace( '<', '>', '', $html ); |
| 262 | // We strip extra spaces to the right because the stripping above |
| 263 | // could leave a lot of whitespace. |
| 264 | // @fixme Find a better way to do this. |
| 265 | $result = TextContent::normalizeLineEndings( $stripped ); |
| 266 | break; |
| 267 | case 'load-recent-authors': |
| 268 | $result = $this->getLastPageAuthors( $parameters['title'] ); |
| 269 | break; |
| 270 | case 'load-first-author': |
| 271 | $revision = $this->revisionLookup->getFirstRevision( $parameters['title'] ); |
| 272 | if ( $revision ) { |
| 273 | // TODO T233241 |
| 274 | $user = $revision->getUser(); |
| 275 | $result = $user === null ? '' : $user->getName(); |
| 276 | } else { |
| 277 | $result = ''; |
| 278 | } |
| 279 | break; |
| 280 | case 'get-page-restrictions': |
| 281 | $action = $parameters['action']; |
| 282 | /** @var Title $title */ |
| 283 | $title = $parameters['title']; |
| 284 | $result = $this->restrictionStore->getRestrictions( $title, $action ); |
| 285 | break; |
| 286 | case 'account-type': |
| 287 | /** @var User $createdUser */ |
| 288 | $createdUser = $parameters['createdUser']; |
| 289 | $isTemp = $this->userIdentityUtils->isTemp( $createdUser ); |
| 290 | if ( $parameters['autocreate'] && $isTemp ) { |
| 291 | $result = 'temp'; |
| 292 | } elseif ( !$isTemp && $this->userNameUtils->isCreatable( $createdUser->getName() ) ) { |
| 293 | // At this point the account hasn't been written to the DB yet, so: |
| 294 | // - User::getId() is still 0 |
| 295 | // - User::isRegistered() will always be false |
| 296 | // - and User::isNamed() can't be trusted here |
| 297 | // |
| 298 | // That means the only thing we can really rely on during pre-auth/pre-creation |
| 299 | // is the username itself. If it's not a temporary account and the username is |
| 300 | // creatable, then this is effectively a named account creation attempt. |
| 301 | $result = 'named'; |
| 302 | } else { |
| 303 | $result = 'unknown'; |
| 304 | } |
| 305 | break; |
| 306 | case 'user-unnamed-ip': |
| 307 | /** @var User $user */ |
| 308 | $user = $parameters['user']; |
| 309 | $result = null; |
| 310 | |
| 311 | // Reveal IPs for: |
| 312 | // - temporary accounts: temporary account names will replace the IP in the `user_name` |
| 313 | // variable. This variable restores this access. |
| 314 | // - logged-out users: This supports the transition to the use of temporary accounts |
| 315 | // so that filter maintainers on pre-transition wikis can migrate `user_name` to `user_unnamed_ip` |
| 316 | // where necessary and see no disruption on transition. |
| 317 | // |
| 318 | // This variable should only ever be exposed for these use cases and shouldn't be extended |
| 319 | // to registered accounts, as that would leak account PII to users without the right to see |
| 320 | // that information |
| 321 | if ( |
| 322 | $this->userIdentityUtils->isTemp( $user ) || |
| 323 | IPUtils::isIPAddress( $user->getName() ) |
| 324 | ) { |
| 325 | /** @var RecentChange|null $rc */ |
| 326 | $rc = $parameters['rc']; |
| 327 | if ( $rc !== null ) { |
| 328 | $result = $rc->getAttribute( 'rc_ip' ); |
| 329 | } else { |
| 330 | $result = $user->getRequest()->getIP(); |
| 331 | } |
| 332 | } |
| 333 | break; |
| 334 | case 'user-type': |
| 335 | /** @var UserIdentity $userIdentity */ |
| 336 | $userIdentity = $parameters['user-identity']; |
| 337 | if ( $this->userIdentityUtils->isNamed( $userIdentity ) ) { |
| 338 | $result = 'named'; |
| 339 | } elseif ( $this->userIdentityUtils->isTemp( $userIdentity ) ) { |
| 340 | $result = 'temp'; |
| 341 | } elseif ( IPUtils::isIPAddress( $userIdentity->getName() ) ) { |
| 342 | $result = 'ip'; |
| 343 | } elseif ( ExternalUserNames::isExternal( $userIdentity->getName() ) ) { |
| 344 | $result = 'external'; |
| 345 | } else { |
| 346 | $result = 'unknown'; |
| 347 | } |
| 348 | break; |
| 349 | case 'user-editcount': |
| 350 | /** @var UserIdentity $userIdentity */ |
| 351 | $userIdentity = $parameters['user-identity']; |
| 352 | $result = $this->userEditTracker->getUserEditCount( $userIdentity ); |
| 353 | break; |
| 354 | case 'user-emailconfirm': |
| 355 | /** @var User $user */ |
| 356 | $user = $parameters['user']; |
| 357 | $result = $user->getEmailAuthenticationTimestamp(); |
| 358 | break; |
| 359 | case 'user-groups': |
| 360 | /** @var UserIdentity $userIdentity */ |
| 361 | $userIdentity = $parameters['user-identity']; |
| 362 | $result = $this->userGroupManager->getUserEffectiveGroups( $userIdentity ); |
| 363 | break; |
| 364 | case 'user-rights': |
| 365 | /** @var UserIdentity $userIdentity */ |
| 366 | $userIdentity = $parameters['user-identity']; |
| 367 | $result = $this->permissionManager->getUserPermissions( $userIdentity ); |
| 368 | break; |
| 369 | case 'user-block': |
| 370 | // @todo Support partial blocks? |
| 371 | /** @var User $user */ |
| 372 | $user = $parameters['user']; |
| 373 | $result = (bool)$user->getBlock(); |
| 374 | break; |
| 375 | case 'user-age': |
| 376 | /** @var User $user */ |
| 377 | $user = $parameters['user']; |
| 378 | $asOf = $parameters['asof']; |
| 379 | |
| 380 | if ( !$user->isRegistered() ) { |
| 381 | $result = 0; |
| 382 | } else { |
| 383 | // HACK: If there's no registration date, assume 2008-01-15, Wikipedia Day |
| 384 | // in the year before the new user log was created. See T243469. |
| 385 | $registration = $user->getRegistration() ?? "20080115000000"; |
| 386 | $result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $registration ); |
| 387 | } |
| 388 | break; |
| 389 | case 'page-age': |
| 390 | /** @var Title $title */ |
| 391 | $title = $parameters['title']; |
| 392 | |
| 393 | $firstRev = $this->revisionLookup->getFirstRevision( $title ); |
| 394 | $firstRevisionTime = $firstRev?->getTimestamp(); |
| 395 | if ( !$firstRevisionTime ) { |
| 396 | $result = 0; |
| 397 | break; |
| 398 | } |
| 399 | |
| 400 | $asOf = $parameters['asof']; |
| 401 | $result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $firstRevisionTime ); |
| 402 | break; |
| 403 | case 'revision-age': |
| 404 | $revRec = $this->getRevisionFromParameters( $parameters ); |
| 405 | if ( !$revRec ) { |
| 406 | $result = null; |
| 407 | break; |
| 408 | } |
| 409 | $asOf = $parameters['asof']; |
| 410 | $result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $revRec->getTimestamp() ); |
| 411 | break; |
| 412 | case 'length': |
| 413 | $s = $getVarCB( $parameters['length-var'] )->toString(); |
| 414 | $result = strlen( $s ); |
| 415 | break; |
| 416 | case 'subtract-int': |
| 417 | $v1 = $getVarCB( $parameters['val1-var'] )->toInt(); |
| 418 | $v2 = $getVarCB( $parameters['val2-var'] )->toInt(); |
| 419 | $result = $v1 - $v2; |
| 420 | break; |
| 421 | case 'content-model': |
| 422 | $revRec = $this->getRevisionFromParameters( $parameters ); |
| 423 | $result = $this->getContentModelFromRevision( $revRec ); |
| 424 | break; |
| 425 | case 'revision-text': |
| 426 | $revRec = $this->getRevisionFromParameters( $parameters ); |
| 427 | $result = $this->textExtractor->revisionToString( $revRec, $parameters['contextUser'] ); |
| 428 | break; |
| 429 | case 'get-wiki-name': |
| 430 | $result = $this->wikiID; |
| 431 | break; |
| 432 | case 'get-wiki-language': |
| 433 | $result = $this->contentLanguage->getCode(); |
| 434 | break; |
| 435 | default: |
| 436 | if ( $this->hookRunner->onAbuseFilter_computeVariable( |
| 437 | $varMethod, |
| 438 | $vars, |
| 439 | $parameters, |
| 440 | $result |
| 441 | ) ) { |
| 442 | throw new UnexpectedValueException( 'Unknown variable compute type ' . $varMethod ); |
| 443 | } |
| 444 | } |
| 445 | |
| 446 | return $result instanceof AFPData ? $result : AFPData::newFromPHPVar( $result ); |
| 447 | } |
| 448 | |
| 449 | /** |
| 450 | * @param PageIdentity $page |
| 451 | * @return array |
| 452 | */ |
| 453 | private function getLinksFromDB( PageIdentity $page ): array { |
| 454 | $id = $page->getId(); |
| 455 | if ( !$id ) { |
| 456 | return []; |
| 457 | } |
| 458 | |
| 459 | return ExternalLinksLookup::getExtLinksForPage( |
| 460 | $id, |
| 461 | $this->lbFactory, |
| 462 | __METHOD__ |
| 463 | ); |
| 464 | } |
| 465 | |
| 466 | /** |
| 467 | * @todo Move to MW core (T272050) |
| 468 | * @param Title $title |
| 469 | * @return string[] Usernames of the last 10 (unique) authors from $title |
| 470 | */ |
| 471 | private function getLastPageAuthors( Title $title ) { |
| 472 | if ( !$title->exists() ) { |
| 473 | return []; |
| 474 | } |
| 475 | |
| 476 | $fname = __METHOD__; |
| 477 | |
| 478 | return $this->wanCache->getWithSetCallback( |
| 479 | $this->wanCache->makeKey( 'last-10-authors', 'revision', $title->getLatestRevID() ), |
| 480 | WANObjectCache::TTL_MINUTE, |
| 481 | function ( $oldValue, &$ttl, array &$setOpts ) use ( $title, $fname ) { |
| 482 | $dbr = $this->lbFactory->getReplicaDatabase(); |
| 483 | |
| 484 | $setOpts += Database::getCacheSetOptions( $dbr ); |
| 485 | // Get the last 100 edit authors with a trivial query (avoid T116557) |
| 486 | $revQuery = $this->revisionStore->getQueryInfo(); |
| 487 | $revAuthors = $dbr->newSelectQueryBuilder() |
| 488 | ->tables( $revQuery['tables'] ) |
| 489 | ->field( $revQuery['fields']['rev_user_text'] ) |
| 490 | ->where( [ |
| 491 | 'rev_page' => $title->getArticleID(), |
| 492 | // TODO Should deleted names be counted in the 10 authors? If yes, this check should |
| 493 | // be moved inside the foreach |
| 494 | 'rev_deleted' => 0 |
| 495 | ] ) |
| 496 | ->caller( $fname ) |
| 497 | // Some pages have < 10 authors but many revisions (e.g. bot pages) |
| 498 | ->orderBy( [ 'rev_timestamp', 'rev_id' ], SelectQueryBuilder::SORT_DESC ) |
| 499 | ->limit( 100 ) |
| 500 | // Force index per T116557 |
| 501 | ->useIndex( [ 'revision' => 'rev_page_timestamp' ] ) |
| 502 | ->joinConds( $revQuery['joins'] ) |
| 503 | ->fetchFieldValues(); |
| 504 | // Get the last 10 distinct authors within this set of edits |
| 505 | $users = []; |
| 506 | foreach ( $revAuthors as $author ) { |
| 507 | $users[$author] = 1; |
| 508 | if ( count( $users ) >= 10 ) { |
| 509 | break; |
| 510 | } |
| 511 | } |
| 512 | |
| 513 | return array_keys( $users ); |
| 514 | } |
| 515 | ); |
| 516 | } |
| 517 | |
| 518 | /** |
| 519 | * @param array{revid?:int,title?:Title,parent?:true} $params |
| 520 | * @return ?RevisionRecord |
| 521 | */ |
| 522 | private function getRevisionFromParameters( array $params ): ?RevisionRecord { |
| 523 | if ( isset( $params['revid'] ) ) { |
| 524 | $revision = $this->revisionLookup->getRevisionById( $params['revid'] ); |
| 525 | } elseif ( isset( $params['title'] ) ) { |
| 526 | $revision = $this->revisionLookup->getRevisionByTitle( $params['title'] ); |
| 527 | } else { |
| 528 | throw new InvalidArgumentException( |
| 529 | "Either 'revid' or 'title' are mandatory revision specifiers" |
| 530 | ); |
| 531 | } |
| 532 | if ( ( $params['parent'] ?? false ) && $revision !== null ) { |
| 533 | $revision = $this->revisionLookup->getPreviousRevision( $revision ); |
| 534 | } |
| 535 | return $revision; |
| 536 | } |
| 537 | |
| 538 | private function getContentModelFromRevision( ?RevisionRecord $revision ): string { |
| 539 | // this is consistent with what is done on various places in RunVariableGenerator |
| 540 | // and RCVariableGenerator |
| 541 | return $revision?->getContent( SlotRecord::MAIN, RevisionRecord::RAW ) |
| 542 | ->getModel() ?? ''; |
| 543 | } |
| 544 | |
| 545 | /** |
| 546 | * It's like WikiPage::prepareContentForEdit, but not for editing (old wikitext usually) |
| 547 | * |
| 548 | * @param string $wikitext |
| 549 | * @param WikiPage $article |
| 550 | * @param UserIdentity $userIdentity Context user |
| 551 | * |
| 552 | * @return stdClass |
| 553 | */ |
| 554 | private function parseNonEditWikitext( $wikitext, WikiPage $article, UserIdentity $userIdentity ) { |
| 555 | static $cache = []; |
| 556 | |
| 557 | $cacheKey = md5( $wikitext ) . ':' . $article->getTitle()->getPrefixedText(); |
| 558 | |
| 559 | if ( !isset( $cache[$cacheKey] ) ) { |
| 560 | $options = ParserOptions::newFromUser( $userIdentity ); |
| 561 | $cache[$cacheKey] = (object)[ |
| 562 | 'output' => $this->parserFactory->getInstance()->parse( $wikitext, $article->getTitle(), $options ) |
| 563 | ]; |
| 564 | } |
| 565 | |
| 566 | return $cache[$cacheKey]; |
| 567 | } |
| 568 | } |