Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 122
0.00% covered (danger)
0.00%
0 / 11
CRAP
0.00% covered (danger)
0.00%
0 / 1
RevalidateLinkRecommendations
0.00% covered (danger)
0.00%
0 / 116
0.00% covered (danger)
0.00%
0 / 11
1406
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
2
 checkRequiredExtensions
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
6
 execute
0.00% covered (danger)
0.00%
0 / 31
0.00% covered (danger)
0.00%
0 / 1
132
 initGrowthConfig
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 initServices
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 validateRecommendation
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
72
 regenerateRecommendation
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
12
 getTitle
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 verboseLog
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
6
 getAllowedChecksums
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
20
 getOlderThanTimestamp
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
12
1<?php
2
3namespace GrowthExperiments\Maintenance;
4
5use GrowthExperiments\GrowthExperimentsServices;
6use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendation;
7use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationHelper;
8use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationLink;
9use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationStore;
10use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationUpdater;
11use GrowthExperiments\WikiConfigException;
12use LogicException;
13use Maintenance;
14use MediaWiki\Config\Config;
15use MediaWiki\MediaWikiServices;
16use MediaWiki\Status\Status;
17use MediaWiki\Title\Title;
18use MediaWiki\Title\TitleFactory;
19use UnexpectedValueException;
20use Wikimedia\Rdbms\DBReadOnlyError;
21
22$path = dirname( dirname( dirname( __DIR__ ) ) );
23
24if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
25    $path = getenv( 'MW_INSTALL_PATH' );
26}
27
28require_once $path . '/maintenance/Maintenance.php';
29
30/**
31 * Iterate through the growthexperiments_link_recommendations table and regenerate the ones which
32 * do not match the specified criteria. If a valid task cannot be generated, the existing task will
33 * be discarded.
34 * This is mainly meant for updating tasks after the recommendation algorithm changes.
35 */
36class RevalidateLinkRecommendations extends Maintenance {
37
38    /** @var TitleFactory */
39    private $titleFactory;
40
41    /** @var LinkRecommendationStore */
42    private $linkRecommendationStore;
43
44    /** @var LinkRecommendationHelper */
45    private $linkRecommendationHelper;
46
47    /** @var LinkRecommendationUpdater */
48    private $linkRecommendationUpdater;
49
50    /** @var Config */
51    private $growthConfig;
52
53    /** @var string[] */
54    private $allowedChecksums;
55
56    private ?int $olderThanTimestamp = null;
57
58    public function __construct() {
59        parent::__construct();
60        $this->requireExtension( 'GrowthExperiments' );
61
62        $this->addDescription( 'Iterate through the growthexperiments_link_recommendations table and '
63            . 'regenerate the ones which do not match the specified criteria. If a valid task cannot be '
64            . 'generated, the existing task will be discarded.' );
65        $this->addOption( 'fromPageId', 'Start iterating upwards from this page ID.', false, true );
66        $this->addOption( 'all', 'Regenerate all tasks.' );
67        $this->addOption( 'exceptDatasetChecksums', 'Regenerate a task unless its '
68            . 'model checksum appears in the given file (one checksum per line)', false, true );
69        $this->addOption( 'olderThan', 'Regenerate a task which was generated '
70            . 'before this date', false, true );
71        $this->addOption( 'scoreLessThan', 'Regenerate a task when any suggested link has '
72            . 'a lower score than this one.', false, true );
73        $this->addOption( 'limit', 'Limit the number of changes.', false, true );
74        $this->addOption( 'force', 'Store the new recommendation even if it fails quality criteria.' );
75        $this->addOption( 'dry-run', 'Do not actually make any changes.' );
76        $this->addOption( 'verbose', 'Show debug output.' );
77        $this->setBatchSize( 500 );
78    }
79
80    public function checkRequiredExtensions() {
81        // Hack: must be early enough for requireExtension to work but late enough for config
82        // to be available.
83        $growthServices = GrowthExperimentsServices::wrap( MediaWikiServices::getInstance() );
84        if ( $growthServices->getGrowthConfig()->get( 'GELinkRecommendationsUseEventGate' ) ) {
85            $this->requireExtension( 'EventBus' );
86        } else {
87            $this->requireExtension( 'CirrusSearch' );
88        }
89        parent::checkRequiredExtensions();
90    }
91
92    public function execute() {
93        $this->initGrowthConfig();
94        if ( !$this->growthConfig->get( 'GENewcomerTasksLinkRecommendationsEnabled' ) ) {
95            $this->output( "Disabled\n" );
96            return;
97        } elseif ( $this->growthConfig->get( 'GENewcomerTasksRemoteApiUrl' ) ) {
98            $this->output( "Local tasks disabled\n" );
99            return;
100        }
101        $this->initServices();
102
103        $this->output( "Revalidating link recommendations:\n" );
104
105        $replaced = $discarded = 0;
106        $fromPageId = (int)$this->getOption( 'fromPageId', 0 );
107        while ( $fromPageId !== false ) {
108            $this->output( "  fetching task batch starting with page $fromPageId\n" );
109            $linkRecommendations = $this->linkRecommendationStore->getAllRecommendations(
110                $this->getBatchSize(), $fromPageId );
111            foreach ( $linkRecommendations as $linkRecommendation ) {
112                if ( !$this->validateRecommendation( $linkRecommendation ) ) {
113                    $this->verboseLog( '  ' . $this->getTitle( $linkRecommendation )->getPrefixedText()
114                        . ' is outdated, regenerating... ' );
115                    if ( $this->getOption( 'dry-run' ) ) {
116                        $replaced++;
117                        $this->verboseLog( "(dry-run)\n" );
118                    } else {
119                        $status = $this->regenerateRecommendation( $linkRecommendation );
120                        $this->verboseLog( $status->isOK() ? "success\n"
121                            : $status->getWikiText( false, false, 'en' ) . "\n" );
122                        $replaced += $status->isOK() ? 1 : 0;
123                        $discarded += $status->isOK() ? 0 : 1;
124                    }
125                    if ( $replaced + $discarded == $this->getOption( 'limit', -1 ) ) {
126                        $this->verboseLog( "Limit reached, aborting.\n" );
127                        break 2;
128                    }
129                }
130            }
131        }
132        $this->output( "Done; replaced $replaced, discarded $discarded\n" );
133    }
134
135    protected function initGrowthConfig(): void {
136        // Needs to be separate from initServices/initConfig as checking whether the script
137        // should run on a given wiki relies on this, but initServices/initConfig will break
138        // on some wikis where the script is not supposed to run and the task configuration
139        // is missing.
140        $services = MediaWikiServices::getInstance();
141        $growthServices = GrowthExperimentsServices::wrap( $services );
142        $this->growthConfig = $growthServices->getGrowthConfig();
143    }
144
145    protected function initServices(): void {
146        $services = MediaWikiServices::getInstance();
147        $growthServices = GrowthExperimentsServices::wrap( $services );
148        $this->titleFactory = $services->getTitleFactory();
149        $this->linkRecommendationStore = $growthServices->getLinkRecommendationStore();
150        $this->linkRecommendationHelper = $growthServices->getLinkRecommendationHelper();
151        $this->linkRecommendationUpdater = $growthServices->getLinkRecommendationUpdater();
152    }
153
154    /**
155     * Check whether the recommendation still meets our standards.
156     * @param LinkRecommendation $linkRecommendation
157     * @return bool
158     */
159    private function validateRecommendation( LinkRecommendation $linkRecommendation ): bool {
160        if ( $this->hasOption( 'all' ) ) {
161            return false;
162        }
163        if ( $this->hasOption( 'exceptDatasetChecksums' ) ) {
164            $allowedChecksums = $this->getAllowedChecksums();
165            $actualChecksum = $linkRecommendation->getMetadata()->getDatasetChecksums()['model'] ?? 'wrong';
166
167            // Abort if the recommendation is invalid and give chance to other checks
168            if ( !in_array( $actualChecksum, $allowedChecksums, true ) ) {
169                return false;
170            }
171        }
172        if ( $this->hasOption( 'olderThan' ) ) {
173            // Abort if the recommendation is invalid and give chance to other checks
174            if (
175                $linkRecommendation->getMetadata()->getTaskTimestamp() <
176                $this->getOlderThanTimestamp()
177            ) {
178                return false;
179            }
180        }
181        if ( $this->hasOption( 'scoreLessThan' ) ) {
182            $recommendationScore = min( array_map( static function ( LinkRecommendationLink $link ) {
183                return $link->getScore();
184            }, $linkRecommendation->getLinks() ) );
185
186            // Abort if the recommendation is invalid and give chance to other checks
187            if ( $recommendationScore < (float)$this->getOption( 'scoreLessThan' ) ) {
188                return false;
189            }
190        }
191        return true;
192    }
193
194    /**
195     * Discard the existing recommendation and try to fetch a new one.
196     * @param LinkRecommendation $linkRecommendation
197     * @return Status
198     */
199    private function regenerateRecommendation( LinkRecommendation $linkRecommendation ): Status {
200        $title = $this->titleFactory->newFromLinkTarget( $linkRecommendation->getTitle() );
201        // Deleting from the search index is instantaneous, adding to the search index takes a few
202        // hours, so in theory the script could deplete the existing pool temporarily. In practice
203        // fetching a new candidate takes about a second, so the script shouldn't progess fast
204        // enough for that to be a problem.
205        $this->linkRecommendationHelper->deleteLinkRecommendation( $title->toPageIdentity(), true );
206        try {
207            $force = $this->hasOption( 'force' );
208            return Status::wrap( $this->linkRecommendationUpdater->processCandidate( $title, $force ) );
209        } catch ( DBReadOnlyError $e ) {
210            $this->fatalError( 'DB is readonly, aborting' );
211        } catch ( WikiConfigException $e ) {
212            $this->fatalError( $e->getMessage() );
213        }
214        throw new LogicException( 'Cannot reach here' );
215    }
216
217    private function getTitle( LinkRecommendation $linkRecommendation ): Title {
218        // The title is already cached by this point so no need for a LinkBatch.
219        return $this->titleFactory->newFromLinkTarget( $linkRecommendation->getTitle() );
220    }
221
222    private function verboseLog( string $message ): void {
223        if ( $this->hasOption( 'verbose' ) ) {
224            $this->output( $message );
225        }
226    }
227
228    /**
229     * Helper method to handle caching of the checksum file.
230     * @return string[]
231     */
232    private function getAllowedChecksums(): array {
233        if ( !$this->allowedChecksums ) {
234            $filename = $this->getOption( 'exceptDatasetChecksums' );
235            // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
236            $content = @file_get_contents( $filename );
237            if ( $content === false ) {
238                throw new UnexpectedValueException( "File $filename could not be opened" );
239            }
240            $this->allowedChecksums = array_filter(
241                array_map(
242                    'trim',
243                    file( $filename, FILE_IGNORE_NEW_LINES )
244                )
245            );
246            if ( !$this->allowedChecksums ) {
247                throw new UnexpectedValueException( "File $filename did not contain checksums" );
248            }
249        }
250        return $this->allowedChecksums;
251    }
252
253    /**
254     * Helper method to handle caching/fetching of the older than timestamp
255     * @return int
256     */
257    private function getOlderThanTimestamp(): int {
258        if ( !$this->olderThanTimestamp ) {
259            $rawTS = wfTimestamp(
260                TS_UNIX,
261                $this->getOption( 'olderThan' )
262            );
263            if ( !$rawTS ) {
264                throw new UnexpectedValueException( "Parameter olderThan does not contain a valid timestamp" );
265            }
266            $this->olderThanTimestamp = (int)$rawTS;
267        }
268        return $this->olderThanTimestamp;
269    }
270
271}
272
273$maintClass = RevalidateLinkRecommendations::class;
274require_once RUN_MAINTENANCE_IF_MAIN;