Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 147
0.00% covered (danger)
0.00%
0 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
RefreshLinkRecommendations
0.00% covered (danger)
0.00%
0 / 141
0.00% covered (danger)
0.00%
0 / 10
1332
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
2
 checkRequiredExtensions
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
6
 execute
0.00% covered (danger)
0.00%
0 / 58
0.00% covered (danger)
0.00%
0 / 1
240
 initGrowthConfig
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 initServices
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
2
 initConfig
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
12
 getOresTopics
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
12
 findArticlesInTopic
0.00% covered (danger)
0.00%
0 / 23
0.00% covered (danger)
0.00%
0 / 1
20
 processCandidate
0.00% covered (danger)
0.00%
0 / 12
0.00% covered (danger)
0.00%
0 / 1
20
 verboseLog
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3namespace GrowthExperiments\Maintenance;
4
5use CirrusSearch\Query\ArticleTopicFeature;
6use Generator;
7use GrowthExperiments\GrowthExperimentsServices;
8use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationStore;
9use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationUpdater;
10use GrowthExperiments\NewcomerTasks\ConfigurationLoader\ConfigurationLoader;
11use GrowthExperiments\NewcomerTasks\ConfigurationLoader\TopicDecorator;
12use GrowthExperiments\NewcomerTasks\Task\TaskSetFilters;
13use GrowthExperiments\NewcomerTasks\TaskSuggester\TaskSuggester;
14use GrowthExperiments\NewcomerTasks\TaskType\LinkRecommendationTaskType;
15use GrowthExperiments\NewcomerTasks\TaskType\LinkRecommendationTaskTypeHandler;
16use GrowthExperiments\NewcomerTasks\TaskType\NullTaskTypeHandler;
17use GrowthExperiments\WikiConfigException;
18use Maintenance;
19use MediaWiki\Cache\LinkBatchFactory;
20use MediaWiki\Config\Config;
21use MediaWiki\MediaWikiServices;
22use MediaWiki\Status\Status;
23use MediaWiki\Title\Title;
24use MediaWiki\Title\TitleFactory;
25use MediaWiki\User\User;
26use MediaWiki\WikiMap\WikiMap;
27use RuntimeException;
28use StatusValue;
29use Wikimedia\Rdbms\DBReadOnlyError;
30
31$IP = getenv( 'MW_INSTALL_PATH' );
32if ( $IP === false ) {
33    $IP = __DIR__ . '/../../..';
34}
35require_once "$IP/maintenance/Maintenance.php";
36
37/**
38 * Update the growthexperiments_link_recommendations table to ensure there are enough
39 * recommendations for all topics
40 */
41class RefreshLinkRecommendations extends Maintenance {
42
43    /** @var Config */
44    private $growthConfig;
45
46    /** @var TitleFactory */
47    private $titleFactory;
48
49    /** @var LinkBatchFactory */
50    private $linkBatchFactory;
51
52    /** @var ConfigurationLoader */
53    private $configurationLoader;
54
55    /** @var TaskSuggester */
56    private $taskSuggester;
57
58    /** @var LinkRecommendationStore */
59    private $linkRecommendationStore;
60
61    /** @var LinkRecommendationUpdater */
62    private $linkRecommendationUpdater;
63
64    /** @var LinkRecommendationTaskType */
65    private $recommendationTaskType;
66
67    /** @var User */
68    private $searchUser;
69
70    public function __construct() {
71        parent::__construct();
72        $this->requireExtension( 'GrowthExperiments' );
73        $this->requireExtension( 'CirrusSearch' );
74
75        $this->addDescription( 'Update the growthexperiments_link_recommendations table to ensure '
76            . 'there are enough recommendations for all topics.' );
77        $this->addOption( 'topic', 'Only update articles in the given ORES topic.', false, true );
78        $this->addOption( 'page', 'Only update a specific page.', false, true );
79        $this->addOption( 'force', 'Generate recommendations even if they fail quality criteria.' );
80        $this->addOption( 'verbose', 'Show debug output.' );
81        $this->setBatchSize( 500 );
82    }
83
84    public function checkRequiredExtensions() {
85        // Hack: must be early enough for requireExtension to work but late enough for config
86        // to be available.
87        $growthServices = GrowthExperimentsServices::wrap( MediaWikiServices::getInstance() );
88        if ( $growthServices->getGrowthConfig()->get( 'GELinkRecommendationsUseEventGate' ) ) {
89            $this->requireExtension( 'EventBus' );
90        }
91        parent::checkRequiredExtensions();
92    }
93
94    public function execute() {
95        $this->initGrowthConfig();
96        if ( !$this->growthConfig->get( 'GENewcomerTasksLinkRecommendationsEnabled' ) ) {
97            $this->output( "Disabled\n" );
98            return;
99        } elseif ( $this->growthConfig->get( 'GENewcomerTasksRemoteApiUrl' ) ) {
100            $this->output( "Local tasks disabled\n" );
101            return;
102        }
103        $this->initServices();
104        $this->initConfig();
105        $lockName = 'GrowthExperiments-RefreshLinkRecommendations-' . WikiMap::getCurrentWikiId();
106        if ( !$this->linkRecommendationStore->getDB( DB_PRIMARY )->lock( $lockName, __METHOD__, 0 ) ) {
107            $this->output( "Previous invocation of the script is still running\n" );
108            return;
109        }
110
111        $force = $this->hasOption( 'force' );
112        $this->output( "Refreshing link recommendations...\n" );
113
114        $pageName = $this->getOption( 'page' );
115        if ( $pageName ) {
116            $title = $this->titleFactory->newFromText( $pageName );
117            if ( $title ) {
118                $this->processCandidate( $title, $force );
119            } else {
120                $this->fatalError( 'Invalid title: ' . $pageName );
121            }
122            return;
123        }
124
125        $oresTopics = $this->getOresTopics();
126        foreach ( $oresTopics as $oresTopic ) {
127            $this->output( "  processing topic $oresTopic...\n" );
128            $suggestions = $this->taskSuggester->suggest(
129                $this->searchUser,
130                new TaskSetFilters(
131                    [ LinkRecommendationTaskTypeHandler::TASK_TYPE_ID ],
132                    [ $oresTopic ]
133                ),
134                1,
135                0,
136                // Enabling the debug flag is relatively harmless, and disables all caching,
137                // which we need here. useCache would prevent reading the cache, but would
138                // still write it, which would be just a waste of space.
139                [ 'debug' => true ]
140            );
141
142            // TaskSuggester::suggest() only returns StatusValue when there's an error.
143            if ( $suggestions instanceof StatusValue ) {
144                $this->error( Status::wrap( $suggestions )->getWikiText( false, false, 'en' ) );
145                continue;
146            }
147
148            $recommendationsNeeded = $this->recommendationTaskType->getMinimumTasksPerTopic()
149                - $suggestions->getTotalCount();
150
151            if ( $recommendationsNeeded <= 0 ) {
152                $this->output( "    no new tasks needed\n" );
153                continue;
154            }
155            $this->output( "    $recommendationsNeeded new tasks needed\n" );
156            foreach ( $this->findArticlesInTopic( $oresTopic ) as $titleBatch ) {
157                $recommendationsFound = 0;
158                foreach ( $titleBatch as $title ) {
159                    // TODO filter out protected pages. Needs to be batched. Or wait for T259346.
160                    $success = $this->processCandidate( $title, $force );
161                    if ( $success ) {
162                        $recommendationsFound++;
163                        $recommendationsNeeded--;
164                        if ( $recommendationsNeeded <= 0 ) {
165                            break 2;
166                        }
167                    }
168                }
169                $this->waitForReplication();
170                // findArticlesInTopic() picks articles at random, so we need to abort the loop
171                // at some point. Do it when no new tasks were generated from the current batch.
172                if ( $recommendationsFound === 0 ) {
173                    break;
174                }
175            }
176            $this->output( ( $recommendationsNeeded === 0 ) ? "    task pool filled\n"
177                : "    topic exhausted, $recommendationsNeeded tasks still needed\n" );
178        }
179    }
180
181    protected function initGrowthConfig(): void {
182        // Needs to be separate from initServices/initConfig as checking whether the script
183        // should run on a given wiki relies on this, but initServices/initConfig will break
184        // on some wikis where the script is not supposed to run and the task configuration
185        // is missing.
186        $services = MediaWikiServices::getInstance();
187        $growthServices = GrowthExperimentsServices::wrap( $services );
188        $this->growthConfig = $growthServices->getGrowthConfig();
189    }
190
191    protected function initServices(): void {
192        // Extend the task type configuration with a custom "candidate" task type, which
193        // finds articles which do not have link recommendations.
194        $linkRecommendationCandidateTaskType = NullTaskTypeHandler::getNullTaskType(
195            '_nolinkrecommendations', '-hasrecommendation:link' );
196
197        $services = MediaWikiServices::getInstance();
198        $growthServices = GrowthExperimentsServices::wrap( MediaWikiServices::getInstance() );
199        $newcomerTaskConfigurationLoader = $growthServices->getNewcomerTasksConfigurationLoader();
200        $this->configurationLoader = new TopicDecorator(
201            $newcomerTaskConfigurationLoader,
202            true,
203            [ $linkRecommendationCandidateTaskType ]
204        );
205        $this->titleFactory = $services->getTitleFactory();
206        $this->linkBatchFactory = $services->getLinkBatchFactory();
207        $this->taskSuggester = $growthServices->getTaskSuggesterFactory()->create( $this->configurationLoader );
208        $this->linkRecommendationStore = $growthServices->getLinkRecommendationStore();
209        $this->linkRecommendationUpdater = $growthServices->getLinkRecommendationUpdater();
210    }
211
212    protected function initConfig(): void {
213        $taskTypes = $this->configurationLoader->getTaskTypes();
214        $taskType = $taskTypes[LinkRecommendationTaskTypeHandler::TASK_TYPE_ID] ?? null;
215        if ( !$taskType || !$taskType instanceof LinkRecommendationTaskType ) {
216            $this->fatalError( sprintf( "'%s' is not a link recommendation task type",
217                LinkRecommendationTaskTypeHandler::TASK_TYPE_ID ) );
218        } else {
219            $this->recommendationTaskType = $taskType;
220        }
221        $this->searchUser = User::newSystemUser( 'Maintenance script', [ 'steal' => true ] );
222    }
223
224    /**
225     * @return string[]
226     */
227    private function getOresTopics(): array {
228        $topic = $this->getOption( 'topic' );
229        $oresTopics = array_keys( ArticleTopicFeature::TERMS_TO_LABELS );
230        if ( $topic ) {
231            $oresTopics = array_intersect( $oresTopics, [ $topic ] );
232            if ( !$oresTopics ) {
233                $this->fatalError( "invalid topic $topic" );
234            }
235        }
236        return $oresTopics;
237    }
238
239    /**
240     * @param string $oresTopic
241     * @return Generator<Title[]>
242     */
243    private function findArticlesInTopic( $oresTopic ) {
244        $batchSize = $this->getBatchSize();
245        do {
246            $this->output( "    fetching $batchSize tasks...\n" );
247            $candidates = $this->taskSuggester->suggest(
248                $this->searchUser,
249                new TaskSetFilters(
250                    [ '_nolinkrecommendations' ],
251                    [ $oresTopic ]
252                ),
253                $batchSize,
254                null,
255                [ 'debug' => true ]
256            );
257            if ( $candidates instanceof StatusValue ) {
258                // FIXME exiting will make the cronjob unreliable. Not exiting might result
259                //  in an infinite error loop. Neither looks like a great option.
260                throw new RuntimeException( 'Search error: '
261                    . Status::wrap( $candidates )->getWikiText( false, false, 'en' ) );
262            }
263
264            $linkTargets = $titles = [];
265            foreach ( $candidates as $candidate ) {
266                $linkTargets[] = $candidate->getTitle();
267            }
268            $this->linkBatchFactory->newLinkBatch( $linkTargets )->execute();
269            foreach ( $linkTargets as $linkTarget ) {
270                $titles[] = $this->titleFactory->newFromLinkTarget( $linkTarget );
271            }
272            yield $titles;
273        } while ( $candidates->count() );
274    }
275
276    /**
277     * Evaluate a task candidate and potentially generate the task.
278     * @param Title $title
279     * @param bool $force Ignore all failed conditions that can be safely ignored.
280     * @return bool Whether a new task was generated.
281     */
282    private function processCandidate( Title $title, bool $force = false ): bool {
283        $this->verboseLog( "    checking candidate " . $title->getPrefixedDBkey() . "... " );
284        try {
285            $status = $this->linkRecommendationUpdater->processCandidate( $title, $force );
286            if ( $status->isOK() ) {
287                $this->verboseLog( "success, updating index\n" );
288                return true;
289            } else {
290                $error = Status::wrap( $status )->getWikiText( false, false, 'en' );
291                $this->verboseLog( "$error\n" );
292            }
293        } catch ( DBReadOnlyError $e ) {
294            // This is a long-running script, read-only state can change in the middle.
295            // It's run frequently so just do the easy thing and abort.
296            $this->fatalError( 'DB is readonly, aborting' );
297        } catch ( WikiConfigException $e ) {
298            // Link recommendations are not configured correctly.
299            $this->fatalError( $e->getMessage() );
300        }
301        return false;
302    }
303
304    private function verboseLog( string $message ): void {
305        if ( $this->hasOption( 'verbose' ) ) {
306            $this->output( $message );
307        }
308    }
309
310}
311
312$maintClass = RefreshLinkRecommendations::class;
313require_once RUN_MAINTENANCE_IF_MAIN;