Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 150
0.00% covered (danger)
0.00%
0 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
RefreshLinkRecommendations
0.00% covered (danger)
0.00%
0 / 144
0.00% covered (danger)
0.00%
0 / 10
1406
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
2
 checkRequiredExtensions
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
6
 execute
0.00% covered (danger)
0.00%
0 / 59
0.00% covered (danger)
0.00%
0 / 1
240
 initGrowthConfig
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 initServices
0.00% covered (danger)
0.00%
0 / 17
0.00% covered (danger)
0.00%
0 / 1
6
 initConfig
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
12
 getOresTopics
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
12
 findArticlesInTopic
0.00% covered (danger)
0.00%
0 / 23
0.00% covered (danger)
0.00%
0 / 1
20
 processCandidate
0.00% covered (danger)
0.00%
0 / 12
0.00% covered (danger)
0.00%
0 / 1
20
 verboseLog
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3namespace GrowthExperiments\Maintenance;
4
5use CirrusSearch\Query\ArticleTopicFeature;
6use Generator;
7use GrowthExperiments\GrowthExperimentsServices;
8use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationStore;
9use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationUpdater;
10use GrowthExperiments\NewcomerTasks\ConfigurationLoader\AbstractDataConfigurationLoader;
11use GrowthExperiments\NewcomerTasks\ConfigurationLoader\ConfigurationLoader;
12use GrowthExperiments\NewcomerTasks\ConfigurationLoader\TopicDecorator;
13use GrowthExperiments\NewcomerTasks\Task\TaskSetFilters;
14use GrowthExperiments\NewcomerTasks\TaskSuggester\TaskSuggester;
15use GrowthExperiments\NewcomerTasks\TaskType\LinkRecommendationTaskType;
16use GrowthExperiments\NewcomerTasks\TaskType\LinkRecommendationTaskTypeHandler;
17use GrowthExperiments\NewcomerTasks\TaskType\NullTaskTypeHandler;
18use GrowthExperiments\WikiConfigException;
19use MediaWiki\Cache\LinkBatchFactory;
20use MediaWiki\Config\Config;
21use MediaWiki\Maintenance\Maintenance;
22use MediaWiki\Status\Status;
23use MediaWiki\Title\Title;
24use MediaWiki\Title\TitleFactory;
25use MediaWiki\User\User;
26use MediaWiki\WikiMap\WikiMap;
27use RuntimeException;
28use StatusValue;
29use Wikimedia\Rdbms\DBReadOnlyError;
30
31$IP = getenv( 'MW_INSTALL_PATH' );
32if ( $IP === false ) {
33    $IP = __DIR__ . '/../../..';
34}
35require_once "$IP/maintenance/Maintenance.php";
36
37/**
38 * Update the growthexperiments_link_recommendations table to ensure there are enough
39 * recommendations for all topics
40 */
41class RefreshLinkRecommendations extends Maintenance {
42
43    /** @var Config */
44    private $growthConfig;
45
46    /** @var TitleFactory */
47    private $titleFactory;
48
49    /** @var LinkBatchFactory */
50    private $linkBatchFactory;
51
52    /** @var ConfigurationLoader */
53    private $configurationLoader;
54
55    /** @var TaskSuggester */
56    private $taskSuggester;
57
58    /** @var LinkRecommendationStore */
59    private $linkRecommendationStore;
60
61    /** @var LinkRecommendationUpdater */
62    private $linkRecommendationUpdater;
63
64    /** @var LinkRecommendationTaskType */
65    private $recommendationTaskType;
66
67    /** @var User */
68    private $searchUser;
69
70    public function __construct() {
71        parent::__construct();
72        $this->requireExtension( 'GrowthExperiments' );
73        $this->requireExtension( 'CirrusSearch' );
74
75        $this->addDescription( 'Update the growthexperiments_link_recommendations table to ensure '
76            . 'there are enough recommendations for all topics.' );
77        $this->addOption( 'topic', 'Only update articles in the given ORES topic.', false, true );
78        $this->addOption( 'page', 'Only update a specific page.', false, true );
79        $this->addOption( 'force', 'Generate recommendations even if they fail quality criteria.' );
80        $this->addOption( 'verbose', 'Show debug output.' );
81        $this->setBatchSize( 500 );
82    }
83
84    public function checkRequiredExtensions() {
85        // Hack: must be early enough for requireExtension to work but late enough for config
86        // to be available.
87        $growthServices = GrowthExperimentsServices::wrap( $this->getServiceContainer() );
88        if ( $growthServices->getGrowthConfig()->get( 'GELinkRecommendationsUseEventGate' ) ) {
89            $this->requireExtension( 'EventBus' );
90        }
91        parent::checkRequiredExtensions();
92    }
93
94    public function execute() {
95        $this->initGrowthConfig();
96        if ( !$this->growthConfig->get( 'GENewcomerTasksLinkRecommendationsEnabled' ) ) {
97            $this->output( "Disabled\n" );
98            return;
99        } elseif ( $this->growthConfig->get( 'GENewcomerTasksRemoteApiUrl' ) ) {
100            $this->output( "Local tasks disabled\n" );
101            return;
102        }
103        $this->initServices();
104        $this->initConfig();
105        $lockName = 'GrowthExperiments-RefreshLinkRecommendations-' . WikiMap::getCurrentWikiId();
106        if ( !$this->linkRecommendationStore->getDB( DB_PRIMARY )->lock( $lockName, __METHOD__, 0 ) ) {
107            $this->output( "Previous invocation of the script is still running\n" );
108            return;
109        }
110
111        $force = $this->hasOption( 'force' );
112        $this->output( "Refreshing link recommendations...\n" );
113
114        $pageName = $this->getOption( 'page' );
115        if ( $pageName ) {
116            $title = $this->titleFactory->newFromText( $pageName );
117            if ( $title ) {
118                $this->processCandidate( $title, $force );
119            } else {
120                $this->fatalError( 'Invalid title: ' . $pageName );
121            }
122            return;
123        }
124
125        $oresTopics = $this->getOresTopics();
126        foreach ( $oresTopics as $oresTopic ) {
127            $this->output( "  processing topic $oresTopic...\n" );
128            $suggestions = $this->taskSuggester->suggest(
129                $this->searchUser,
130                new TaskSetFilters(
131                    [ LinkRecommendationTaskTypeHandler::TASK_TYPE_ID ],
132                    [ $oresTopic ]
133                ),
134                1,
135                0,
136                // Enabling the debug flag is relatively harmless, and disables all caching,
137                // which we need here. useCache would prevent reading the cache, but would
138                // still write it, which would be just a waste of space.
139                [ 'debug' => true ]
140            );
141
142            // TaskSuggester::suggest() only returns StatusValue when there's an error.
143            if ( $suggestions instanceof StatusValue ) {
144                $this->error( Status::wrap( $suggestions )->getWikiText( false, false, 'en' ) );
145                continue;
146            }
147
148            $totalExistingSuggestionsCount = $suggestions->getTotalCount();
149            $recommendationsNeeded = $this->recommendationTaskType->getMinimumTasksPerTopic()
150                - $totalExistingSuggestionsCount;
151
152            if ( $recommendationsNeeded <= 0 ) {
153                $this->output( "    no new tasks needed, $totalExistingSuggestionsCount existing suggestions\n" );
154                continue;
155            }
156            $this->output( "    $recommendationsNeeded new tasks needed\n" );
157            foreach ( $this->findArticlesInTopic( $oresTopic ) as $titleBatch ) {
158                $recommendationsFound = 0;
159                foreach ( $titleBatch as $title ) {
160                    // TODO filter out protected pages. Needs to be batched. Or wait for T259346.
161                    $success = $this->processCandidate( $title, $force );
162                    if ( $success ) {
163                        $recommendationsFound++;
164                        $recommendationsNeeded--;
165                        if ( $recommendationsNeeded <= 0 ) {
166                            break 2;
167                        }
168                    }
169                }
170                $this->waitForReplication();
171                // findArticlesInTopic() picks articles at random, so we need to abort the loop
172                // at some point. Do it when no new tasks were generated from the current batch.
173                if ( $recommendationsFound === 0 ) {
174                    break;
175                }
176            }
177            $this->output( ( $recommendationsNeeded === 0 ) ? "    task pool filled\n"
178                : "    topic exhausted, $recommendationsNeeded tasks still needed\n" );
179        }
180    }
181
182    protected function initGrowthConfig(): void {
183        // Needs to be separate from initServices/initConfig as checking whether the script
184        // should run on a given wiki relies on this, but initServices/initConfig will break
185        // on some wikis where the script is not supposed to run and the task configuration
186        // is missing.
187        $services = $this->getServiceContainer();
188        $growthServices = GrowthExperimentsServices::wrap( $services );
189        $this->growthConfig = $growthServices->getGrowthConfig();
190    }
191
192    protected function initServices(): void {
193        // Extend the task type configuration with a custom "candidate" task type, which
194        // finds articles which do not have link recommendations.
195        $linkRecommendationCandidateTaskType = NullTaskTypeHandler::getNullTaskType(
196            '_nolinkrecommendations', '-hasrecommendation:link' );
197
198        $services = $this->getServiceContainer();
199        $growthServices = GrowthExperimentsServices::wrap( $services );
200        $newcomerTaskConfigurationLoader = $growthServices->getNewcomerTasksConfigurationLoader();
201        if ( $newcomerTaskConfigurationLoader instanceof AbstractDataConfigurationLoader ) {
202            // Pretend link-recommendation is enabled (T371316)
203            // Task suggester cannot be adapted to query disabled task types, because it is also
204            // used in Homepage (where the disabled flag has to be honored).
205            $newcomerTaskConfigurationLoader->enableTaskType( LinkRecommendationTaskTypeHandler::TASK_TYPE_ID );
206        }
207        $this->configurationLoader = new TopicDecorator(
208            $newcomerTaskConfigurationLoader,
209            true,
210            [ $linkRecommendationCandidateTaskType ]
211        );
212        $this->titleFactory = $services->getTitleFactory();
213        $this->linkBatchFactory = $services->getLinkBatchFactory();
214        $this->taskSuggester = $growthServices->getTaskSuggesterFactory()->create( $this->configurationLoader );
215        $this->linkRecommendationStore = $growthServices->getLinkRecommendationStore();
216        $this->linkRecommendationUpdater = $growthServices->getLinkRecommendationUpdater();
217    }
218
219    protected function initConfig(): void {
220        $taskTypes = $this->configurationLoader->getTaskTypes();
221        $taskType = $taskTypes[LinkRecommendationTaskTypeHandler::TASK_TYPE_ID] ?? null;
222        if ( !$taskType || !$taskType instanceof LinkRecommendationTaskType ) {
223            $this->fatalError( sprintf( "'%s' is not a link recommendation task type",
224                LinkRecommendationTaskTypeHandler::TASK_TYPE_ID ) );
225        } else {
226            $this->recommendationTaskType = $taskType;
227        }
228        $this->searchUser = User::newSystemUser( 'Maintenance script', [ 'steal' => true ] );
229    }
230
231    /**
232     * @return string[]
233     */
234    private function getOresTopics(): array {
235        $topic = $this->getOption( 'topic' );
236        $oresTopics = array_keys( ArticleTopicFeature::TERMS_TO_LABELS );
237        if ( $topic ) {
238            $oresTopics = array_intersect( $oresTopics, [ $topic ] );
239            if ( !$oresTopics ) {
240                $this->fatalError( "invalid topic $topic" );
241            }
242        }
243        return $oresTopics;
244    }
245
246    /**
247     * @param string $oresTopic
248     * @return Generator<Title[]>
249     */
250    private function findArticlesInTopic( $oresTopic ) {
251        $batchSize = $this->getBatchSize();
252        do {
253            $this->output( "    fetching $batchSize tasks...\n" );
254            $candidates = $this->taskSuggester->suggest(
255                $this->searchUser,
256                new TaskSetFilters(
257                    [ '_nolinkrecommendations' ],
258                    [ $oresTopic ]
259                ),
260                $batchSize,
261                null,
262                [ 'debug' => true ]
263            );
264            if ( $candidates instanceof StatusValue ) {
265                // FIXME exiting will make the cronjob unreliable. Not exiting might result
266                //  in an infinite error loop. Neither looks like a great option.
267                throw new RuntimeException( 'Search error: '
268                    . Status::wrap( $candidates )->getWikiText( false, false, 'en' ) );
269            }
270
271            $linkTargets = $titles = [];
272            foreach ( $candidates as $candidate ) {
273                $linkTargets[] = $candidate->getTitle();
274            }
275            $this->linkBatchFactory->newLinkBatch( $linkTargets )->execute();
276            foreach ( $linkTargets as $linkTarget ) {
277                $titles[] = $this->titleFactory->newFromLinkTarget( $linkTarget );
278            }
279            yield $titles;
280        } while ( $candidates->count() );
281    }
282
283    /**
284     * Evaluate a task candidate and potentially generate the task.
285     * @param Title $title
286     * @param bool $force Ignore all failed conditions that can be safely ignored.
287     * @return bool Whether a new task was generated.
288     */
289    private function processCandidate( Title $title, bool $force = false ): bool {
290        $this->verboseLog( "    checking candidate " . $title->getPrefixedDBkey() . "... " );
291        try {
292            $status = $this->linkRecommendationUpdater->processCandidate( $title, $force );
293            if ( $status->isOK() ) {
294                $this->verboseLog( "success, updating index\n" );
295                return true;
296            } else {
297                $error = Status::wrap( $status )->getWikiText( false, false, 'en' );
298                $this->verboseLog( "$error\n" );
299            }
300        } catch ( DBReadOnlyError $e ) {
301            // This is a long-running script, read-only state can change in the middle.
302            // It's run frequently so just do the easy thing and abort.
303            $this->fatalError( 'DB is readonly, aborting' );
304        } catch ( WikiConfigException $e ) {
305            // Link recommendations are not configured correctly.
306            $this->fatalError( $e->getMessage() );
307        }
308        return false;
309    }
310
311    private function verboseLog( string $message ): void {
312        if ( $this->hasOption( 'verbose' ) ) {
313            $this->output( $message );
314        }
315    }
316
317}
318
319$maintClass = RefreshLinkRecommendations::class;
320require_once RUN_MAINTENANCE_IF_MAIN;