Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 150 |
|
0.00% |
0 / 10 |
CRAP | |
0.00% |
0 / 1 |
RefreshLinkRecommendations | |
0.00% |
0 / 144 |
|
0.00% |
0 / 10 |
1406 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
checkRequiredExtensions | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
execute | |
0.00% |
0 / 59 |
|
0.00% |
0 / 1 |
240 | |||
initGrowthConfig | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
initServices | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
6 | |||
initConfig | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
getOresTopics | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
findArticlesInTopic | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
20 | |||
processCandidate | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
20 | |||
verboseLog | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\Maintenance; |
4 | |
5 | use CirrusSearch\Query\ArticleTopicFeature; |
6 | use Generator; |
7 | use GrowthExperiments\GrowthExperimentsServices; |
8 | use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationStore; |
9 | use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationUpdater; |
10 | use GrowthExperiments\NewcomerTasks\ConfigurationLoader\AbstractDataConfigurationLoader; |
11 | use GrowthExperiments\NewcomerTasks\ConfigurationLoader\ConfigurationLoader; |
12 | use GrowthExperiments\NewcomerTasks\ConfigurationLoader\TopicDecorator; |
13 | use GrowthExperiments\NewcomerTasks\Task\TaskSetFilters; |
14 | use GrowthExperiments\NewcomerTasks\TaskSuggester\TaskSuggester; |
15 | use GrowthExperiments\NewcomerTasks\TaskType\LinkRecommendationTaskType; |
16 | use GrowthExperiments\NewcomerTasks\TaskType\LinkRecommendationTaskTypeHandler; |
17 | use GrowthExperiments\NewcomerTasks\TaskType\NullTaskTypeHandler; |
18 | use GrowthExperiments\WikiConfigException; |
19 | use MediaWiki\Cache\LinkBatchFactory; |
20 | use MediaWiki\Config\Config; |
21 | use MediaWiki\Maintenance\Maintenance; |
22 | use MediaWiki\Status\Status; |
23 | use MediaWiki\Title\Title; |
24 | use MediaWiki\Title\TitleFactory; |
25 | use MediaWiki\User\User; |
26 | use MediaWiki\WikiMap\WikiMap; |
27 | use RuntimeException; |
28 | use StatusValue; |
29 | use Wikimedia\Rdbms\DBReadOnlyError; |
30 | |
31 | $IP = getenv( 'MW_INSTALL_PATH' ); |
32 | if ( $IP === false ) { |
33 | $IP = __DIR__ . '/../../..'; |
34 | } |
35 | require_once "$IP/maintenance/Maintenance.php"; |
36 | |
37 | /** |
38 | * Update the growthexperiments_link_recommendations table to ensure there are enough |
39 | * recommendations for all topics |
40 | */ |
41 | class RefreshLinkRecommendations extends Maintenance { |
42 | |
43 | /** @var Config */ |
44 | private $growthConfig; |
45 | |
46 | /** @var TitleFactory */ |
47 | private $titleFactory; |
48 | |
49 | /** @var LinkBatchFactory */ |
50 | private $linkBatchFactory; |
51 | |
52 | /** @var ConfigurationLoader */ |
53 | private $configurationLoader; |
54 | |
55 | /** @var TaskSuggester */ |
56 | private $taskSuggester; |
57 | |
58 | /** @var LinkRecommendationStore */ |
59 | private $linkRecommendationStore; |
60 | |
61 | /** @var LinkRecommendationUpdater */ |
62 | private $linkRecommendationUpdater; |
63 | |
64 | /** @var LinkRecommendationTaskType */ |
65 | private $recommendationTaskType; |
66 | |
67 | /** @var User */ |
68 | private $searchUser; |
69 | |
70 | public function __construct() { |
71 | parent::__construct(); |
72 | $this->requireExtension( 'GrowthExperiments' ); |
73 | $this->requireExtension( 'CirrusSearch' ); |
74 | |
75 | $this->addDescription( 'Update the growthexperiments_link_recommendations table to ensure ' |
76 | . 'there are enough recommendations for all topics.' ); |
77 | $this->addOption( 'topic', 'Only update articles in the given ORES topic.', false, true ); |
78 | $this->addOption( 'page', 'Only update a specific page.', false, true ); |
79 | $this->addOption( 'force', 'Generate recommendations even if they fail quality criteria.' ); |
80 | $this->addOption( 'verbose', 'Show debug output.' ); |
81 | $this->setBatchSize( 500 ); |
82 | } |
83 | |
84 | public function checkRequiredExtensions() { |
85 | // Hack: must be early enough for requireExtension to work but late enough for config |
86 | // to be available. |
87 | $growthServices = GrowthExperimentsServices::wrap( $this->getServiceContainer() ); |
88 | if ( $growthServices->getGrowthConfig()->get( 'GELinkRecommendationsUseEventGate' ) ) { |
89 | $this->requireExtension( 'EventBus' ); |
90 | } |
91 | parent::checkRequiredExtensions(); |
92 | } |
93 | |
94 | public function execute() { |
95 | $this->initGrowthConfig(); |
96 | if ( !$this->growthConfig->get( 'GENewcomerTasksLinkRecommendationsEnabled' ) ) { |
97 | $this->output( "Disabled\n" ); |
98 | return; |
99 | } elseif ( $this->growthConfig->get( 'GENewcomerTasksRemoteApiUrl' ) ) { |
100 | $this->output( "Local tasks disabled\n" ); |
101 | return; |
102 | } |
103 | $this->initServices(); |
104 | $this->initConfig(); |
105 | $lockName = 'GrowthExperiments-RefreshLinkRecommendations-' . WikiMap::getCurrentWikiId(); |
106 | if ( !$this->linkRecommendationStore->getDB( DB_PRIMARY )->lock( $lockName, __METHOD__, 0 ) ) { |
107 | $this->output( "Previous invocation of the script is still running\n" ); |
108 | return; |
109 | } |
110 | |
111 | $force = $this->hasOption( 'force' ); |
112 | $this->output( "Refreshing link recommendations...\n" ); |
113 | |
114 | $pageName = $this->getOption( 'page' ); |
115 | if ( $pageName ) { |
116 | $title = $this->titleFactory->newFromText( $pageName ); |
117 | if ( $title ) { |
118 | $this->processCandidate( $title, $force ); |
119 | } else { |
120 | $this->fatalError( 'Invalid title: ' . $pageName ); |
121 | } |
122 | return; |
123 | } |
124 | |
125 | $oresTopics = $this->getOresTopics(); |
126 | foreach ( $oresTopics as $oresTopic ) { |
127 | $this->output( " processing topic $oresTopic...\n" ); |
128 | $suggestions = $this->taskSuggester->suggest( |
129 | $this->searchUser, |
130 | new TaskSetFilters( |
131 | [ LinkRecommendationTaskTypeHandler::TASK_TYPE_ID ], |
132 | [ $oresTopic ] |
133 | ), |
134 | 1, |
135 | 0, |
136 | // Enabling the debug flag is relatively harmless, and disables all caching, |
137 | // which we need here. useCache would prevent reading the cache, but would |
138 | // still write it, which would be just a waste of space. |
139 | [ 'debug' => true ] |
140 | ); |
141 | |
142 | // TaskSuggester::suggest() only returns StatusValue when there's an error. |
143 | if ( $suggestions instanceof StatusValue ) { |
144 | $this->error( Status::wrap( $suggestions )->getWikiText( false, false, 'en' ) ); |
145 | continue; |
146 | } |
147 | |
148 | $totalExistingSuggestionsCount = $suggestions->getTotalCount(); |
149 | $recommendationsNeeded = $this->recommendationTaskType->getMinimumTasksPerTopic() |
150 | - $totalExistingSuggestionsCount; |
151 | |
152 | if ( $recommendationsNeeded <= 0 ) { |
153 | $this->output( " no new tasks needed, $totalExistingSuggestionsCount existing suggestions\n" ); |
154 | continue; |
155 | } |
156 | $this->output( " $recommendationsNeeded new tasks needed\n" ); |
157 | foreach ( $this->findArticlesInTopic( $oresTopic ) as $titleBatch ) { |
158 | $recommendationsFound = 0; |
159 | foreach ( $titleBatch as $title ) { |
160 | // TODO filter out protected pages. Needs to be batched. Or wait for T259346. |
161 | $success = $this->processCandidate( $title, $force ); |
162 | if ( $success ) { |
163 | $recommendationsFound++; |
164 | $recommendationsNeeded--; |
165 | if ( $recommendationsNeeded <= 0 ) { |
166 | break 2; |
167 | } |
168 | } |
169 | } |
170 | $this->waitForReplication(); |
171 | // findArticlesInTopic() picks articles at random, so we need to abort the loop |
172 | // at some point. Do it when no new tasks were generated from the current batch. |
173 | if ( $recommendationsFound === 0 ) { |
174 | break; |
175 | } |
176 | } |
177 | $this->output( ( $recommendationsNeeded === 0 ) ? " task pool filled\n" |
178 | : " topic exhausted, $recommendationsNeeded tasks still needed\n" ); |
179 | } |
180 | } |
181 | |
182 | protected function initGrowthConfig(): void { |
183 | // Needs to be separate from initServices/initConfig as checking whether the script |
184 | // should run on a given wiki relies on this, but initServices/initConfig will break |
185 | // on some wikis where the script is not supposed to run and the task configuration |
186 | // is missing. |
187 | $services = $this->getServiceContainer(); |
188 | $growthServices = GrowthExperimentsServices::wrap( $services ); |
189 | $this->growthConfig = $growthServices->getGrowthConfig(); |
190 | } |
191 | |
192 | protected function initServices(): void { |
193 | // Extend the task type configuration with a custom "candidate" task type, which |
194 | // finds articles which do not have link recommendations. |
195 | $linkRecommendationCandidateTaskType = NullTaskTypeHandler::getNullTaskType( |
196 | '_nolinkrecommendations', '-hasrecommendation:link' ); |
197 | |
198 | $services = $this->getServiceContainer(); |
199 | $growthServices = GrowthExperimentsServices::wrap( $services ); |
200 | $newcomerTaskConfigurationLoader = $growthServices->getNewcomerTasksConfigurationLoader(); |
201 | if ( $newcomerTaskConfigurationLoader instanceof AbstractDataConfigurationLoader ) { |
202 | // Pretend link-recommendation is enabled (T371316) |
203 | // Task suggester cannot be adapted to query disabled task types, because it is also |
204 | // used in Homepage (where the disabled flag has to be honored). |
205 | $newcomerTaskConfigurationLoader->enableTaskType( LinkRecommendationTaskTypeHandler::TASK_TYPE_ID ); |
206 | } |
207 | $this->configurationLoader = new TopicDecorator( |
208 | $newcomerTaskConfigurationLoader, |
209 | true, |
210 | [ $linkRecommendationCandidateTaskType ] |
211 | ); |
212 | $this->titleFactory = $services->getTitleFactory(); |
213 | $this->linkBatchFactory = $services->getLinkBatchFactory(); |
214 | $this->taskSuggester = $growthServices->getTaskSuggesterFactory()->create( $this->configurationLoader ); |
215 | $this->linkRecommendationStore = $growthServices->getLinkRecommendationStore(); |
216 | $this->linkRecommendationUpdater = $growthServices->getLinkRecommendationUpdater(); |
217 | } |
218 | |
219 | protected function initConfig(): void { |
220 | $taskTypes = $this->configurationLoader->getTaskTypes(); |
221 | $taskType = $taskTypes[LinkRecommendationTaskTypeHandler::TASK_TYPE_ID] ?? null; |
222 | if ( !$taskType || !$taskType instanceof LinkRecommendationTaskType ) { |
223 | $this->fatalError( sprintf( "'%s' is not a link recommendation task type", |
224 | LinkRecommendationTaskTypeHandler::TASK_TYPE_ID ) ); |
225 | } else { |
226 | $this->recommendationTaskType = $taskType; |
227 | } |
228 | $this->searchUser = User::newSystemUser( 'Maintenance script', [ 'steal' => true ] ); |
229 | } |
230 | |
231 | /** |
232 | * @return string[] |
233 | */ |
234 | private function getOresTopics(): array { |
235 | $topic = $this->getOption( 'topic' ); |
236 | $oresTopics = array_keys( ArticleTopicFeature::TERMS_TO_LABELS ); |
237 | if ( $topic ) { |
238 | $oresTopics = array_intersect( $oresTopics, [ $topic ] ); |
239 | if ( !$oresTopics ) { |
240 | $this->fatalError( "invalid topic $topic" ); |
241 | } |
242 | } |
243 | return $oresTopics; |
244 | } |
245 | |
246 | /** |
247 | * @param string $oresTopic |
248 | * @return Generator<Title[]> |
249 | */ |
250 | private function findArticlesInTopic( $oresTopic ) { |
251 | $batchSize = $this->getBatchSize(); |
252 | do { |
253 | $this->output( " fetching $batchSize tasks...\n" ); |
254 | $candidates = $this->taskSuggester->suggest( |
255 | $this->searchUser, |
256 | new TaskSetFilters( |
257 | [ '_nolinkrecommendations' ], |
258 | [ $oresTopic ] |
259 | ), |
260 | $batchSize, |
261 | null, |
262 | [ 'debug' => true ] |
263 | ); |
264 | if ( $candidates instanceof StatusValue ) { |
265 | // FIXME exiting will make the cronjob unreliable. Not exiting might result |
266 | // in an infinite error loop. Neither looks like a great option. |
267 | throw new RuntimeException( 'Search error: ' |
268 | . Status::wrap( $candidates )->getWikiText( false, false, 'en' ) ); |
269 | } |
270 | |
271 | $linkTargets = $titles = []; |
272 | foreach ( $candidates as $candidate ) { |
273 | $linkTargets[] = $candidate->getTitle(); |
274 | } |
275 | $this->linkBatchFactory->newLinkBatch( $linkTargets )->execute(); |
276 | foreach ( $linkTargets as $linkTarget ) { |
277 | $titles[] = $this->titleFactory->newFromLinkTarget( $linkTarget ); |
278 | } |
279 | yield $titles; |
280 | } while ( $candidates->count() ); |
281 | } |
282 | |
283 | /** |
284 | * Evaluate a task candidate and potentially generate the task. |
285 | * @param Title $title |
286 | * @param bool $force Ignore all failed conditions that can be safely ignored. |
287 | * @return bool Whether a new task was generated. |
288 | */ |
289 | private function processCandidate( Title $title, bool $force = false ): bool { |
290 | $this->verboseLog( " checking candidate " . $title->getPrefixedDBkey() . "... " ); |
291 | try { |
292 | $status = $this->linkRecommendationUpdater->processCandidate( $title, $force ); |
293 | if ( $status->isOK() ) { |
294 | $this->verboseLog( "success, updating index\n" ); |
295 | return true; |
296 | } else { |
297 | $error = Status::wrap( $status )->getWikiText( false, false, 'en' ); |
298 | $this->verboseLog( "$error\n" ); |
299 | } |
300 | } catch ( DBReadOnlyError $e ) { |
301 | // This is a long-running script, read-only state can change in the middle. |
302 | // It's run frequently so just do the easy thing and abort. |
303 | $this->fatalError( 'DB is readonly, aborting' ); |
304 | } catch ( WikiConfigException $e ) { |
305 | // Link recommendations are not configured correctly. |
306 | $this->fatalError( $e->getMessage() ); |
307 | } |
308 | return false; |
309 | } |
310 | |
311 | private function verboseLog( string $message ): void { |
312 | if ( $this->hasOption( 'verbose' ) ) { |
313 | $this->output( $message ); |
314 | } |
315 | } |
316 | |
317 | } |
318 | |
319 | $maintClass = RefreshLinkRecommendations::class; |
320 | require_once RUN_MAINTENANCE_IF_MAIN; |