Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
90.16% covered (success)
90.16%
284 / 315
50.00% covered (danger)
50.00%
3 / 6
CRAP
0.00% covered (danger)
0.00%
0 / 1
LazyVariableComputer
90.16% covered (success)
90.16%
284 / 315
50.00% covered (danger)
50.00%
3 / 6
79.22
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
1
 compute
88.71% covered (warning)
88.71%
220 / 248
0.00% covered (danger)
0.00%
0 / 1
68.71
 getLinksFromDB
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
2
 getLastPageAuthors
93.75% covered (success)
93.75%
30 / 32
0.00% covered (danger)
0.00%
0 / 1
4.00
 getContentModelFromRevision
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
2.06
 parseNonEditWikitext
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2
3namespace MediaWiki\Extension\AbuseFilter\Variables;
4
5use ContentHandler;
6use Language;
7use MediaWiki\Extension\AbuseFilter\Hooks\AbuseFilterHookRunner;
8use MediaWiki\Extension\AbuseFilter\Parser\AFPData;
9use MediaWiki\Extension\AbuseFilter\TextExtractor;
10use MediaWiki\ExternalLinks\ExternalLinksLookup;
11use MediaWiki\ExternalLinks\LinkFilter;
12use MediaWiki\Permissions\PermissionManager;
13use MediaWiki\Permissions\RestrictionStore;
14use MediaWiki\Revision\RevisionLookup;
15use MediaWiki\Revision\RevisionRecord;
16use MediaWiki\Revision\RevisionStore;
17use MediaWiki\Revision\SlotRecord;
18use MediaWiki\Storage\PreparedUpdate;
19use MediaWiki\Title\Title;
20use MediaWiki\User\ExternalUserNames;
21use MediaWiki\User\User;
22use MediaWiki\User\UserEditTracker;
23use MediaWiki\User\UserGroupManager;
24use MediaWiki\User\UserIdentity;
25use MediaWiki\User\UserIdentityUtils;
26use ParserFactory;
27use ParserOptions;
28use Psr\Log\LoggerInterface;
29use stdClass;
30use StringUtils;
31use TextContent;
32use UnexpectedValueException;
33use WANObjectCache;
34use Wikimedia\Diff\Diff;
35use Wikimedia\Diff\UnifiedDiffFormatter;
36use Wikimedia\IPUtils;
37use Wikimedia\Rdbms\Database;
38use Wikimedia\Rdbms\LBFactory;
39use WikiPage;
40
41/**
42 * Service used to compute lazy-loaded variable.
43 * @internal
44 */
45class LazyVariableComputer {
46    public const SERVICE_NAME = 'AbuseFilterLazyVariableComputer';
47
48    /**
49     * @var float The amount of time to subtract from profiling
50     * @todo This is a hack
51     */
52    public static $profilingExtraTime = 0;
53
54    /** @var TextExtractor */
55    private $textExtractor;
56
57    /** @var AbuseFilterHookRunner */
58    private $hookRunner;
59
60    /** @var LoggerInterface */
61    private $logger;
62
63    /** @var LBFactory */
64    private $lbFactory;
65
66    /** @var WANObjectCache */
67    private $wanCache;
68
69    /** @var RevisionLookup */
70    private $revisionLookup;
71
72    /** @var RevisionStore */
73    private $revisionStore;
74
75    /** @var Language */
76    private $contentLanguage;
77
78    /** @var ParserFactory */
79    private $parserFactory;
80
81    /** @var UserEditTracker */
82    private $userEditTracker;
83
84    /** @var UserGroupManager */
85    private $userGroupManager;
86
87    /** @var PermissionManager */
88    private $permissionManager;
89
90    /** @var RestrictionStore */
91    private $restrictionStore;
92
93    /** @var UserIdentityUtils */
94    private $userIdentityUtils;
95
96    /** @var string */
97    private $wikiID;
98
99    /**
100     * @param TextExtractor $textExtractor
101     * @param AbuseFilterHookRunner $hookRunner
102     * @param LoggerInterface $logger
103     * @param LBFactory $lbFactory
104     * @param WANObjectCache $wanCache
105     * @param RevisionLookup $revisionLookup
106     * @param RevisionStore $revisionStore
107     * @param Language $contentLanguage
108     * @param ParserFactory $parserFactory
109     * @param UserEditTracker $userEditTracker
110     * @param UserGroupManager $userGroupManager
111     * @param PermissionManager $permissionManager
112     * @param RestrictionStore $restrictionStore
113     * @param UserIdentityUtils $userIdentityUtils
114     * @param string $wikiID
115     */
116    public function __construct(
117        TextExtractor $textExtractor,
118        AbuseFilterHookRunner $hookRunner,
119        LoggerInterface $logger,
120        LBFactory $lbFactory,
121        WANObjectCache $wanCache,
122        RevisionLookup $revisionLookup,
123        RevisionStore $revisionStore,
124        Language $contentLanguage,
125        ParserFactory $parserFactory,
126        UserEditTracker $userEditTracker,
127        UserGroupManager $userGroupManager,
128        PermissionManager $permissionManager,
129        RestrictionStore $restrictionStore,
130        UserIdentityUtils $userIdentityUtils,
131        string $wikiID
132    ) {
133        $this->textExtractor = $textExtractor;
134        $this->hookRunner = $hookRunner;
135        $this->logger = $logger;
136        $this->lbFactory = $lbFactory;
137        $this->wanCache = $wanCache;
138        $this->revisionLookup = $revisionLookup;
139        $this->revisionStore = $revisionStore;
140        $this->contentLanguage = $contentLanguage;
141        $this->parserFactory = $parserFactory;
142        $this->userEditTracker = $userEditTracker;
143        $this->userGroupManager = $userGroupManager;
144        $this->permissionManager = $permissionManager;
145        $this->restrictionStore = $restrictionStore;
146        $this->userIdentityUtils = $userIdentityUtils;
147        $this->wikiID = $wikiID;
148    }
149
150    /**
151     * XXX: $getVarCB is a hack to hide the cyclic dependency with VariablesManager. See T261069 for possible
152     * solutions. This might also be merged into VariablesManager, but it would bring a ton of dependencies.
153     * @todo Should we remove $vars parameter (check hooks)?
154     *
155     * @param LazyLoadedVariable $var
156     * @param VariableHolder $vars
157     * @param callable $getVarCB
158     * @phan-param callable(string $name):AFPData $getVarCB
159     * @return AFPData
160     */
161    public function compute( LazyLoadedVariable $var, VariableHolder $vars, callable $getVarCB ) {
162        $parameters = $var->getParameters();
163        $varMethod = $var->getMethod();
164        $result = null;
165
166        if ( !$this->hookRunner->onAbuseFilter_interceptVariable(
167            $varMethod,
168            $vars,
169            $parameters,
170            $result
171        ) ) {
172            return $result instanceof AFPData
173                ? $result : AFPData::newFromPHPVar( $result );
174        }
175
176        switch ( $varMethod ) {
177            case 'diff':
178                $text1Var = $parameters['oldtext-var'];
179                $text2Var = $parameters['newtext-var'];
180                $text1 = $getVarCB( $text1Var )->toString();
181                $text2 = $getVarCB( $text2Var )->toString();
182                // T74329: if there's no text, don't return an array with the empty string
183                $text1 = $text1 === '' ? [] : explode( "\n", $text1 );
184                $text2 = $text2 === '' ? [] : explode( "\n", $text2 );
185                $diffs = new Diff( $text1, $text2 );
186                $format = new UnifiedDiffFormatter();
187                $result = $format->format( $diffs );
188                break;
189            case 'diff-split':
190                $diff = $getVarCB( $parameters['diff-var'] )->toString();
191                $line_prefix = $parameters['line-prefix'];
192                $diff_lines = explode( "\n", $diff );
193                $result = [];
194                foreach ( $diff_lines as $line ) {
195                    if ( ( $line[0] ?? '' ) === $line_prefix ) {
196                        $result[] = substr( $line, 1 );
197                    }
198                }
199                break;
200            case 'links-from-wikitext':
201                // This should ONLY be used when sharing a parse operation with the edit.
202
203                /** @var WikiPage $article */
204                $article = $parameters['article'];
205                if ( $article->getContentModel() === CONTENT_MODEL_WIKITEXT ) {
206                    // Shared with the edit, don't count it in profiling
207                    $startTime = microtime( true );
208                    $textVar = $parameters['text-var'];
209
210                    $new_text = $getVarCB( $textVar )->toString();
211                    $content = ContentHandler::makeContent( $new_text, $article->getTitle() );
212                    $editInfo = $article->prepareContentForEdit(
213                        $content,
214                        null,
215                        $parameters['contextUserIdentity']
216                    );
217                    $result = LinkFilter::getIndexedUrlsNonReversed(
218                        array_keys( $editInfo->output->getExternalLinks() )
219                    );
220                    self::$profilingExtraTime += ( microtime( true ) - $startTime );
221                    break;
222                }
223            // Otherwise fall back to database
224            case 'links-from-wikitext-or-database':
225                // TODO: use Content object instead, if available!
226                /** @var WikiPage $article */
227                $article ??= $parameters['article'];
228
229                // this inference is ugly, but the name isn't accessible from here
230                // and we only want this for debugging
231                $textVar = $parameters['text-var'];
232                $varName = str_starts_with( $textVar, 'old_' ) ? 'old_links' : 'all_links';
233                if ( $parameters['forFilter'] ?? false ) {
234                    $this->logger->debug( "Loading $varName from DB" );
235                    $links = $this->getLinksFromDB( $article );
236                } elseif ( $article->getContentModel() === CONTENT_MODEL_WIKITEXT ) {
237                    $this->logger->debug( "Loading $varName from Parser" );
238
239                    $wikitext = $getVarCB( $textVar )->toString();
240                    $editInfo = $this->parseNonEditWikitext(
241                        $wikitext,
242                        $article,
243                        $parameters['contextUserIdentity']
244                    );
245                    $links = LinkFilter::getIndexedUrlsNonReversed(
246                        array_keys( $editInfo->output->getExternalLinks() )
247                    );
248                } else {
249                    // TODO: Get links from Content object. But we don't have the content object.
250                    // And for non-text content, $wikitext is usually not going to be a valid
251                    // serialization, but rather some dummy text for filtering.
252                    $links = [];
253                }
254
255                $result = $links;
256                break;
257            case 'links-from-update':
258                /** @var PreparedUpdate $update */
259                $update = $parameters['update'];
260                // Shared with the edit, don't count it in profiling
261                $startTime = microtime( true );
262                $result = LinkFilter::getIndexedUrlsNonReversed(
263                    array_keys( $update->getParserOutputForMetaData()->getExternalLinks() )
264                );
265                self::$profilingExtraTime += ( microtime( true ) - $startTime );
266                break;
267            case 'links-from-database':
268                /** @var WikiPage $article */
269                $article = $parameters['article'];
270                $this->logger->debug( 'Loading old_links from DB' );
271                $result = $this->getLinksFromDB( $article );
272                break;
273            case 'link-diff-added':
274            case 'link-diff-removed':
275                $oldLinkVar = $parameters['oldlink-var'];
276                $newLinkVar = $parameters['newlink-var'];
277
278                $oldLinks = $getVarCB( $oldLinkVar )->toNative();
279                $newLinks = $getVarCB( $newLinkVar )->toNative();
280
281                if ( $varMethod === 'link-diff-added' ) {
282                    $result = array_diff( $newLinks, $oldLinks );
283                }
284                if ( $varMethod === 'link-diff-removed' ) {
285                    $result = array_diff( $oldLinks, $newLinks );
286                }
287                break;
288            case 'parse-wikitext':
289                // Should ONLY be used when sharing a parse operation with the edit.
290                // TODO: use Content object instead, if available!
291                /* @var WikiPage $article */
292                $article = $parameters['article'];
293                if ( $article->getContentModel() === CONTENT_MODEL_WIKITEXT ) {
294                    // Shared with the edit, don't count it in profiling
295                    $startTime = microtime( true );
296                    $textVar = $parameters['wikitext-var'];
297
298                    $new_text = $getVarCB( $textVar )->toString();
299                    $content = ContentHandler::makeContent( $new_text, $article->getTitle() );
300                    $editInfo = $article->prepareContentForEdit(
301                        $content,
302                        null,
303                        $parameters['contextUserIdentity']
304                    );
305                    if ( isset( $parameters['pst'] ) && $parameters['pst'] ) {
306                        $result = $editInfo->pstContent->serialize( $editInfo->format );
307                    } else {
308                        // Note: as of core change r727361, the PP limit comments (which we don't want to be here)
309                        // are already excluded.
310                        $result = $editInfo->getOutput()->getText();
311                    }
312                    self::$profilingExtraTime += ( microtime( true ) - $startTime );
313                } else {
314                    $result = '';
315                }
316                break;
317            case 'html-from-update':
318                /** @var PreparedUpdate $update */
319                $update = $parameters['update'];
320                // Shared with the edit, don't count it in profiling
321                $startTime = microtime( true );
322                $result = $update->getCanonicalParserOutput()->getText();
323                self::$profilingExtraTime += ( microtime( true ) - $startTime );
324                break;
325            case 'strip-html':
326                $htmlVar = $parameters['html-var'];
327                $html = $getVarCB( $htmlVar )->toString();
328                $stripped = StringUtils::delimiterReplace( '<', '>', '', $html );
329                // We strip extra spaces to the right because the stripping above
330                // could leave a lot of whitespace.
331                // @fixme Find a better way to do this.
332                $result = TextContent::normalizeLineEndings( $stripped );
333                break;
334            case 'load-recent-authors':
335                $result = $this->getLastPageAuthors( $parameters['title'] );
336                break;
337            case 'load-first-author':
338                $revision = $this->revisionLookup->getFirstRevision( $parameters['title'] );
339                if ( $revision ) {
340                    // TODO T233241
341                    $user = $revision->getUser();
342                    $result = $user === null ? '' : $user->getName();
343                } else {
344                    $result = '';
345                }
346                break;
347            case 'get-page-restrictions':
348                $action = $parameters['action'];
349                /** @var Title $title */
350                $title = $parameters['title'];
351                $result = $this->restrictionStore->getRestrictions( $title, $action );
352                break;
353            case 'user-type':
354                /** @var UserIdentity $userIdentity */
355                $userIdentity = $parameters['user-identity'];
356                if ( $this->userIdentityUtils->isNamed( $userIdentity ) ) {
357                    $result = 'named';
358                } elseif ( $this->userIdentityUtils->isTemp( $userIdentity ) ) {
359                    $result = 'temp';
360                } elseif ( IPUtils::isIPAddress( $userIdentity->getName() ) ) {
361                    $result = 'ip';
362                } elseif ( ExternalUserNames::isExternal( $userIdentity->getName() ) ) {
363                    $result = 'external';
364                } else {
365                    $result = 'unknown';
366                }
367                break;
368            case 'user-editcount':
369                /** @var UserIdentity $userIdentity */
370                $userIdentity = $parameters['user-identity'];
371                $result = $this->userEditTracker->getUserEditCount( $userIdentity );
372                break;
373            case 'user-emailconfirm':
374                /** @var User $user */
375                $user = $parameters['user'];
376                $result = $user->getEmailAuthenticationTimestamp();
377                break;
378            case 'user-groups':
379                /** @var UserIdentity $userIdentity */
380                $userIdentity = $parameters['user-identity'];
381                $result = $this->userGroupManager->getUserEffectiveGroups( $userIdentity );
382                break;
383            case 'user-rights':
384                /** @var UserIdentity $userIdentity */
385                $userIdentity = $parameters['user-identity'];
386                $result = $this->permissionManager->getUserPermissions( $userIdentity );
387                break;
388            case 'user-block':
389                // @todo Support partial blocks?
390                /** @var User $user */
391                $user = $parameters['user'];
392                $result = (bool)$user->getBlock();
393                break;
394            case 'user-age':
395                /** @var User $user */
396                $user = $parameters['user'];
397                $asOf = $parameters['asof'];
398
399                if ( !$user->isRegistered() ) {
400                    $result = 0;
401                } else {
402                    // HACK: If there's no registration date, assume 2008-01-15, Wikipedia Day
403                    // in the year before the new user log was created. See T243469.
404                    $registration = $user->getRegistration() ?? "20080115000000";
405                    $result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $registration );
406                }
407                break;
408            case 'page-age':
409                /** @var Title $title */
410                $title = $parameters['title'];
411
412                $firstRev = $this->revisionLookup->getFirstRevision( $title );
413                $firstRevisionTime = $firstRev ? $firstRev->getTimestamp() : null;
414                if ( !$firstRevisionTime ) {
415                    $result = 0;
416                    break;
417                }
418
419                $asOf = $parameters['asof'];
420                $result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $firstRevisionTime );
421                break;
422            case 'revision-age-by-id':
423                $timestamp = $this->revisionLookup->getTimestampFromId( $parameters['revid'] );
424                if ( !$timestamp ) {
425                    $result = null;
426                    break;
427                }
428                $asOf = $parameters['asof'];
429                $result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $timestamp );
430                break;
431            case 'revision-age-by-title':
432                /** @var Title $title */
433                $title = $parameters['title'];
434                $revRec = $this->revisionLookup->getRevisionByTitle( $title );
435                if ( !$revRec ) {
436                    $result = null;
437                    break;
438                }
439                $asOf = $parameters['asof'];
440                $result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $revRec->getTimestamp() );
441                break;
442            case 'previous-revision-age':
443                $revRec = $this->revisionLookup->getRevisionById( $parameters['revid'] );
444                if ( !$revRec ) {
445                    $result = null;
446                    break;
447                }
448                $prev = $this->revisionLookup->getPreviousRevision( $revRec );
449                if ( !$prev ) {
450                    $result = null;
451                    break;
452                }
453                $asOf = $parameters['asof'] ?? $revRec->getTimestamp();
454                $result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $prev->getTimestamp() );
455                break;
456            case 'length':
457                $s = $getVarCB( $parameters['length-var'] )->toString();
458                $result = strlen( $s );
459                break;
460            case 'subtract-int':
461                $v1 = $getVarCB( $parameters['val1-var'] )->toInt();
462                $v2 = $getVarCB( $parameters['val2-var'] )->toInt();
463                $result = $v1 - $v2;
464                break;
465            case 'content-model-by-id':
466                $revRec = $this->revisionLookup->getRevisionById( $parameters['revid'] );
467                $result = $this->getContentModelFromRevision( $revRec );
468                break;
469            case 'revision-text-by-id':
470                $revRec = $this->revisionLookup->getRevisionById( $parameters['revid'] );
471                $result = $this->textExtractor->revisionToString( $revRec, $parameters['contextUser'] );
472                break;
473            case 'get-wiki-name':
474                $result = $this->wikiID;
475                break;
476            case 'get-wiki-language':
477                $result = $this->contentLanguage->getCode();
478                break;
479            default:
480                if ( $this->hookRunner->onAbuseFilter_computeVariable(
481                    $varMethod,
482                    $vars,
483                    $parameters,
484                    $result
485                ) ) {
486                    throw new UnexpectedValueException( 'Unknown variable compute type ' . $varMethod );
487                }
488        }
489
490        return $result instanceof AFPData ? $result : AFPData::newFromPHPVar( $result );
491    }
492
493    /**
494     * @param WikiPage $article
495     * @return array
496     */
497    private function getLinksFromDB( WikiPage $article ) {
498        $id = $article->getId();
499        if ( !$id ) {
500            return [];
501        }
502
503        return ExternalLinksLookup::getExternalLinksForPage(
504            $id,
505            $this->lbFactory->getReplicaDatabase(),
506            __METHOD__
507        );
508    }
509
510    /**
511     * @todo Move to MW core (T272050)
512     * @param Title $title
513     * @return string[] Usernames of the last 10 (unique) authors from $title
514     */
515    private function getLastPageAuthors( Title $title ) {
516        if ( !$title->exists() ) {
517            return [];
518        }
519
520        $fname = __METHOD__;
521
522        return $this->wanCache->getWithSetCallback(
523            $this->wanCache->makeKey( 'last-10-authors', 'revision', $title->getLatestRevID() ),
524            WANObjectCache::TTL_MINUTE,
525            function ( $oldValue, &$ttl, array &$setOpts ) use ( $title, $fname ) {
526                $dbr = $this->lbFactory->getReplicaDatabase();
527
528                $setOpts += Database::getCacheSetOptions( $dbr );
529                // Get the last 100 edit authors with a trivial query (avoid T116557)
530                $revQuery = $this->revisionStore->getQueryInfo();
531                $revAuthors = $dbr->selectFieldValues(
532                    $revQuery['tables'],
533                    $revQuery['fields']['rev_user_text'],
534                    [
535                        'rev_page' => $title->getArticleID(),
536                        // TODO Should deleted names be counted in the 10 authors? If yes, this check should
537                        // be moved inside the foreach
538                        'rev_deleted' => 0
539                    ],
540                    $fname,
541                    // Some pages have < 10 authors but many revisions (e.g. bot pages)
542                    [ 'ORDER BY' => 'rev_timestamp DESC, rev_id DESC',
543                        'LIMIT' => 100,
544                        // Force index per T116557
545                        'USE INDEX' => [ 'revision' => 'rev_page_timestamp' ],
546                    ],
547                    $revQuery['joins']
548                );
549                // Get the last 10 distinct authors within this set of edits
550                $users = [];
551                foreach ( $revAuthors as $author ) {
552                    $users[$author] = 1;
553                    if ( count( $users ) >= 10 ) {
554                        break;
555                    }
556                }
557
558                return array_keys( $users );
559            }
560        );
561    }
562
563    /**
564     * @param ?RevisionRecord $revision
565     * @return string
566     */
567    private function getContentModelFromRevision( ?RevisionRecord $revision ): string {
568        // this is consistent with what is done on various places in RunVariableGenerator
569        // and RCVariableGenerator
570        if ( $revision !== null ) {
571            $content = $revision->getContent( SlotRecord::MAIN, RevisionRecord::RAW );
572            return $content->getModel();
573        }
574        return '';
575    }
576
577    /**
578     * It's like WikiPage::prepareContentForEdit, but not for editing (old wikitext usually)
579     *
580     * @param string $wikitext
581     * @param WikiPage $article
582     * @param UserIdentity $userIdentity Context user
583     *
584     * @return stdClass
585     */
586    private function parseNonEditWikitext( $wikitext, WikiPage $article, UserIdentity $userIdentity ) {
587        static $cache = [];
588
589        $cacheKey = md5( $wikitext ) . ':' . $article->getTitle()->getPrefixedText();
590
591        if ( !isset( $cache[$cacheKey] ) ) {
592            $options = ParserOptions::newFromUser( $userIdentity );
593            $cache[$cacheKey] = (object)[
594                'output' => $this->parserFactory->getInstance()->parse( $wikitext, $article->getTitle(), $options )
595            ];
596        }
597
598        return $cache[$cacheKey];
599    }
600}