Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 147 |
|
0.00% |
0 / 10 |
CRAP | |
0.00% |
0 / 1 |
RefreshLinkRecommendations | |
0.00% |
0 / 141 |
|
0.00% |
0 / 10 |
1332 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
checkRequiredExtensions | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
execute | |
0.00% |
0 / 58 |
|
0.00% |
0 / 1 |
240 | |||
initGrowthConfig | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
initServices | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
initConfig | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
getOresTopics | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
findArticlesInTopic | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
20 | |||
processCandidate | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
20 | |||
verboseLog | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\Maintenance; |
4 | |
5 | use CirrusSearch\Query\ArticleTopicFeature; |
6 | use Generator; |
7 | use GrowthExperiments\GrowthExperimentsServices; |
8 | use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationStore; |
9 | use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationUpdater; |
10 | use GrowthExperiments\NewcomerTasks\ConfigurationLoader\ConfigurationLoader; |
11 | use GrowthExperiments\NewcomerTasks\ConfigurationLoader\TopicDecorator; |
12 | use GrowthExperiments\NewcomerTasks\Task\TaskSetFilters; |
13 | use GrowthExperiments\NewcomerTasks\TaskSuggester\TaskSuggester; |
14 | use GrowthExperiments\NewcomerTasks\TaskType\LinkRecommendationTaskType; |
15 | use GrowthExperiments\NewcomerTasks\TaskType\LinkRecommendationTaskTypeHandler; |
16 | use GrowthExperiments\NewcomerTasks\TaskType\NullTaskTypeHandler; |
17 | use GrowthExperiments\WikiConfigException; |
18 | use Maintenance; |
19 | use MediaWiki\Cache\LinkBatchFactory; |
20 | use MediaWiki\Config\Config; |
21 | use MediaWiki\MediaWikiServices; |
22 | use MediaWiki\Status\Status; |
23 | use MediaWiki\Title\Title; |
24 | use MediaWiki\Title\TitleFactory; |
25 | use MediaWiki\User\User; |
26 | use MediaWiki\WikiMap\WikiMap; |
27 | use RuntimeException; |
28 | use StatusValue; |
29 | use Wikimedia\Rdbms\DBReadOnlyError; |
30 | |
31 | $IP = getenv( 'MW_INSTALL_PATH' ); |
32 | if ( $IP === false ) { |
33 | $IP = __DIR__ . '/../../..'; |
34 | } |
35 | require_once "$IP/maintenance/Maintenance.php"; |
36 | |
37 | /** |
38 | * Update the growthexperiments_link_recommendations table to ensure there are enough |
39 | * recommendations for all topics |
40 | */ |
41 | class RefreshLinkRecommendations extends Maintenance { |
42 | |
43 | /** @var Config */ |
44 | private $growthConfig; |
45 | |
46 | /** @var TitleFactory */ |
47 | private $titleFactory; |
48 | |
49 | /** @var LinkBatchFactory */ |
50 | private $linkBatchFactory; |
51 | |
52 | /** @var ConfigurationLoader */ |
53 | private $configurationLoader; |
54 | |
55 | /** @var TaskSuggester */ |
56 | private $taskSuggester; |
57 | |
58 | /** @var LinkRecommendationStore */ |
59 | private $linkRecommendationStore; |
60 | |
61 | /** @var LinkRecommendationUpdater */ |
62 | private $linkRecommendationUpdater; |
63 | |
64 | /** @var LinkRecommendationTaskType */ |
65 | private $recommendationTaskType; |
66 | |
67 | /** @var User */ |
68 | private $searchUser; |
69 | |
70 | public function __construct() { |
71 | parent::__construct(); |
72 | $this->requireExtension( 'GrowthExperiments' ); |
73 | $this->requireExtension( 'CirrusSearch' ); |
74 | |
75 | $this->addDescription( 'Update the growthexperiments_link_recommendations table to ensure ' |
76 | . 'there are enough recommendations for all topics.' ); |
77 | $this->addOption( 'topic', 'Only update articles in the given ORES topic.', false, true ); |
78 | $this->addOption( 'page', 'Only update a specific page.', false, true ); |
79 | $this->addOption( 'force', 'Generate recommendations even if they fail quality criteria.' ); |
80 | $this->addOption( 'verbose', 'Show debug output.' ); |
81 | $this->setBatchSize( 500 ); |
82 | } |
83 | |
84 | public function checkRequiredExtensions() { |
85 | // Hack: must be early enough for requireExtension to work but late enough for config |
86 | // to be available. |
87 | $growthServices = GrowthExperimentsServices::wrap( MediaWikiServices::getInstance() ); |
88 | if ( $growthServices->getGrowthConfig()->get( 'GELinkRecommendationsUseEventGate' ) ) { |
89 | $this->requireExtension( 'EventBus' ); |
90 | } |
91 | parent::checkRequiredExtensions(); |
92 | } |
93 | |
94 | public function execute() { |
95 | $this->initGrowthConfig(); |
96 | if ( !$this->growthConfig->get( 'GENewcomerTasksLinkRecommendationsEnabled' ) ) { |
97 | $this->output( "Disabled\n" ); |
98 | return; |
99 | } elseif ( $this->growthConfig->get( 'GENewcomerTasksRemoteApiUrl' ) ) { |
100 | $this->output( "Local tasks disabled\n" ); |
101 | return; |
102 | } |
103 | $this->initServices(); |
104 | $this->initConfig(); |
105 | $lockName = 'GrowthExperiments-RefreshLinkRecommendations-' . WikiMap::getCurrentWikiId(); |
106 | if ( !$this->linkRecommendationStore->getDB( DB_PRIMARY )->lock( $lockName, __METHOD__, 0 ) ) { |
107 | $this->output( "Previous invocation of the script is still running\n" ); |
108 | return; |
109 | } |
110 | |
111 | $force = $this->hasOption( 'force' ); |
112 | $this->output( "Refreshing link recommendations...\n" ); |
113 | |
114 | $pageName = $this->getOption( 'page' ); |
115 | if ( $pageName ) { |
116 | $title = $this->titleFactory->newFromText( $pageName ); |
117 | if ( $title ) { |
118 | $this->processCandidate( $title, $force ); |
119 | } else { |
120 | $this->fatalError( 'Invalid title: ' . $pageName ); |
121 | } |
122 | return; |
123 | } |
124 | |
125 | $oresTopics = $this->getOresTopics(); |
126 | foreach ( $oresTopics as $oresTopic ) { |
127 | $this->output( " processing topic $oresTopic...\n" ); |
128 | $suggestions = $this->taskSuggester->suggest( |
129 | $this->searchUser, |
130 | new TaskSetFilters( |
131 | [ LinkRecommendationTaskTypeHandler::TASK_TYPE_ID ], |
132 | [ $oresTopic ] |
133 | ), |
134 | 1, |
135 | 0, |
136 | // Enabling the debug flag is relatively harmless, and disables all caching, |
137 | // which we need here. useCache would prevent reading the cache, but would |
138 | // still write it, which would be just a waste of space. |
139 | [ 'debug' => true ] |
140 | ); |
141 | |
142 | // TaskSuggester::suggest() only returns StatusValue when there's an error. |
143 | if ( $suggestions instanceof StatusValue ) { |
144 | $this->error( Status::wrap( $suggestions )->getWikiText( false, false, 'en' ) ); |
145 | continue; |
146 | } |
147 | |
148 | $recommendationsNeeded = $this->recommendationTaskType->getMinimumTasksPerTopic() |
149 | - $suggestions->getTotalCount(); |
150 | |
151 | if ( $recommendationsNeeded <= 0 ) { |
152 | $this->output( " no new tasks needed\n" ); |
153 | continue; |
154 | } |
155 | $this->output( " $recommendationsNeeded new tasks needed\n" ); |
156 | foreach ( $this->findArticlesInTopic( $oresTopic ) as $titleBatch ) { |
157 | $recommendationsFound = 0; |
158 | foreach ( $titleBatch as $title ) { |
159 | // TODO filter out protected pages. Needs to be batched. Or wait for T259346. |
160 | $success = $this->processCandidate( $title, $force ); |
161 | if ( $success ) { |
162 | $recommendationsFound++; |
163 | $recommendationsNeeded--; |
164 | if ( $recommendationsNeeded <= 0 ) { |
165 | break 2; |
166 | } |
167 | } |
168 | } |
169 | $this->waitForReplication(); |
170 | // findArticlesInTopic() picks articles at random, so we need to abort the loop |
171 | // at some point. Do it when no new tasks were generated from the current batch. |
172 | if ( $recommendationsFound === 0 ) { |
173 | break; |
174 | } |
175 | } |
176 | $this->output( ( $recommendationsNeeded === 0 ) ? " task pool filled\n" |
177 | : " topic exhausted, $recommendationsNeeded tasks still needed\n" ); |
178 | } |
179 | } |
180 | |
181 | protected function initGrowthConfig(): void { |
182 | // Needs to be separate from initServices/initConfig as checking whether the script |
183 | // should run on a given wiki relies on this, but initServices/initConfig will break |
184 | // on some wikis where the script is not supposed to run and the task configuration |
185 | // is missing. |
186 | $services = MediaWikiServices::getInstance(); |
187 | $growthServices = GrowthExperimentsServices::wrap( $services ); |
188 | $this->growthConfig = $growthServices->getGrowthConfig(); |
189 | } |
190 | |
191 | protected function initServices(): void { |
192 | // Extend the task type configuration with a custom "candidate" task type, which |
193 | // finds articles which do not have link recommendations. |
194 | $linkRecommendationCandidateTaskType = NullTaskTypeHandler::getNullTaskType( |
195 | '_nolinkrecommendations', '-hasrecommendation:link' ); |
196 | |
197 | $services = MediaWikiServices::getInstance(); |
198 | $growthServices = GrowthExperimentsServices::wrap( MediaWikiServices::getInstance() ); |
199 | $newcomerTaskConfigurationLoader = $growthServices->getNewcomerTasksConfigurationLoader(); |
200 | $this->configurationLoader = new TopicDecorator( |
201 | $newcomerTaskConfigurationLoader, |
202 | true, |
203 | [ $linkRecommendationCandidateTaskType ] |
204 | ); |
205 | $this->titleFactory = $services->getTitleFactory(); |
206 | $this->linkBatchFactory = $services->getLinkBatchFactory(); |
207 | $this->taskSuggester = $growthServices->getTaskSuggesterFactory()->create( $this->configurationLoader ); |
208 | $this->linkRecommendationStore = $growthServices->getLinkRecommendationStore(); |
209 | $this->linkRecommendationUpdater = $growthServices->getLinkRecommendationUpdater(); |
210 | } |
211 | |
212 | protected function initConfig(): void { |
213 | $taskTypes = $this->configurationLoader->getTaskTypes(); |
214 | $taskType = $taskTypes[LinkRecommendationTaskTypeHandler::TASK_TYPE_ID] ?? null; |
215 | if ( !$taskType || !$taskType instanceof LinkRecommendationTaskType ) { |
216 | $this->fatalError( sprintf( "'%s' is not a link recommendation task type", |
217 | LinkRecommendationTaskTypeHandler::TASK_TYPE_ID ) ); |
218 | } else { |
219 | $this->recommendationTaskType = $taskType; |
220 | } |
221 | $this->searchUser = User::newSystemUser( 'Maintenance script', [ 'steal' => true ] ); |
222 | } |
223 | |
224 | /** |
225 | * @return string[] |
226 | */ |
227 | private function getOresTopics(): array { |
228 | $topic = $this->getOption( 'topic' ); |
229 | $oresTopics = array_keys( ArticleTopicFeature::TERMS_TO_LABELS ); |
230 | if ( $topic ) { |
231 | $oresTopics = array_intersect( $oresTopics, [ $topic ] ); |
232 | if ( !$oresTopics ) { |
233 | $this->fatalError( "invalid topic $topic" ); |
234 | } |
235 | } |
236 | return $oresTopics; |
237 | } |
238 | |
239 | /** |
240 | * @param string $oresTopic |
241 | * @return Generator<Title[]> |
242 | */ |
243 | private function findArticlesInTopic( $oresTopic ) { |
244 | $batchSize = $this->getBatchSize(); |
245 | do { |
246 | $this->output( " fetching $batchSize tasks...\n" ); |
247 | $candidates = $this->taskSuggester->suggest( |
248 | $this->searchUser, |
249 | new TaskSetFilters( |
250 | [ '_nolinkrecommendations' ], |
251 | [ $oresTopic ] |
252 | ), |
253 | $batchSize, |
254 | null, |
255 | [ 'debug' => true ] |
256 | ); |
257 | if ( $candidates instanceof StatusValue ) { |
258 | // FIXME exiting will make the cronjob unreliable. Not exiting might result |
259 | // in an infinite error loop. Neither looks like a great option. |
260 | throw new RuntimeException( 'Search error: ' |
261 | . Status::wrap( $candidates )->getWikiText( false, false, 'en' ) ); |
262 | } |
263 | |
264 | $linkTargets = $titles = []; |
265 | foreach ( $candidates as $candidate ) { |
266 | $linkTargets[] = $candidate->getTitle(); |
267 | } |
268 | $this->linkBatchFactory->newLinkBatch( $linkTargets )->execute(); |
269 | foreach ( $linkTargets as $linkTarget ) { |
270 | $titles[] = $this->titleFactory->newFromLinkTarget( $linkTarget ); |
271 | } |
272 | yield $titles; |
273 | } while ( $candidates->count() ); |
274 | } |
275 | |
276 | /** |
277 | * Evaluate a task candidate and potentially generate the task. |
278 | * @param Title $title |
279 | * @param bool $force Ignore all failed conditions that can be safely ignored. |
280 | * @return bool Whether a new task was generated. |
281 | */ |
282 | private function processCandidate( Title $title, bool $force = false ): bool { |
283 | $this->verboseLog( " checking candidate " . $title->getPrefixedDBkey() . "... " ); |
284 | try { |
285 | $status = $this->linkRecommendationUpdater->processCandidate( $title, $force ); |
286 | if ( $status->isOK() ) { |
287 | $this->verboseLog( "success, updating index\n" ); |
288 | return true; |
289 | } else { |
290 | $error = Status::wrap( $status )->getWikiText( false, false, 'en' ); |
291 | $this->verboseLog( "$error\n" ); |
292 | } |
293 | } catch ( DBReadOnlyError $e ) { |
294 | // This is a long-running script, read-only state can change in the middle. |
295 | // It's run frequently so just do the easy thing and abort. |
296 | $this->fatalError( 'DB is readonly, aborting' ); |
297 | } catch ( WikiConfigException $e ) { |
298 | // Link recommendations are not configured correctly. |
299 | $this->fatalError( $e->getMessage() ); |
300 | } |
301 | return false; |
302 | } |
303 | |
304 | private function verboseLog( string $message ): void { |
305 | if ( $this->hasOption( 'verbose' ) ) { |
306 | $this->output( $message ); |
307 | } |
308 | } |
309 | |
310 | } |
311 | |
312 | $maintClass = RefreshLinkRecommendations::class; |
313 | require_once RUN_MAINTENANCE_IF_MAIN; |