Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 122 |
|
0.00% |
0 / 11 |
CRAP | |
0.00% |
0 / 1 |
RevalidateLinkRecommendations | |
0.00% |
0 / 116 |
|
0.00% |
0 / 11 |
1406 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
2 | |||
checkRequiredExtensions | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
execute | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
132 | |||
initGrowthConfig | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
initServices | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
validateRecommendation | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
72 | |||
regenerateRecommendation | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
getTitle | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
verboseLog | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
getAllowedChecksums | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
20 | |||
getOlderThanTimestamp | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\Maintenance; |
4 | |
5 | use GrowthExperiments\GrowthExperimentsServices; |
6 | use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendation; |
7 | use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationHelper; |
8 | use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationLink; |
9 | use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationStore; |
10 | use GrowthExperiments\NewcomerTasks\AddLink\LinkRecommendationUpdater; |
11 | use GrowthExperiments\WikiConfigException; |
12 | use LogicException; |
13 | use Maintenance; |
14 | use MediaWiki\Config\Config; |
15 | use MediaWiki\MediaWikiServices; |
16 | use MediaWiki\Status\Status; |
17 | use MediaWiki\Title\Title; |
18 | use MediaWiki\Title\TitleFactory; |
19 | use UnexpectedValueException; |
20 | use Wikimedia\Rdbms\DBReadOnlyError; |
21 | |
22 | $path = dirname( dirname( dirname( __DIR__ ) ) ); |
23 | |
24 | if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { |
25 | $path = getenv( 'MW_INSTALL_PATH' ); |
26 | } |
27 | |
28 | require_once $path . '/maintenance/Maintenance.php'; |
29 | |
30 | /** |
31 | * Iterate through the growthexperiments_link_recommendations table and regenerate the ones which |
32 | * do not match the specified criteria. If a valid task cannot be generated, the existing task will |
33 | * be discarded. |
34 | * This is mainly meant for updating tasks after the recommendation algorithm changes. |
35 | */ |
36 | class RevalidateLinkRecommendations extends Maintenance { |
37 | |
38 | /** @var TitleFactory */ |
39 | private $titleFactory; |
40 | |
41 | /** @var LinkRecommendationStore */ |
42 | private $linkRecommendationStore; |
43 | |
44 | /** @var LinkRecommendationHelper */ |
45 | private $linkRecommendationHelper; |
46 | |
47 | /** @var LinkRecommendationUpdater */ |
48 | private $linkRecommendationUpdater; |
49 | |
50 | /** @var Config */ |
51 | private $growthConfig; |
52 | |
53 | /** @var string[] */ |
54 | private $allowedChecksums; |
55 | |
56 | private ?int $olderThanTimestamp = null; |
57 | |
58 | public function __construct() { |
59 | parent::__construct(); |
60 | $this->requireExtension( 'GrowthExperiments' ); |
61 | |
62 | $this->addDescription( 'Iterate through the growthexperiments_link_recommendations table and ' |
63 | . 'regenerate the ones which do not match the specified criteria. If a valid task cannot be ' |
64 | . 'generated, the existing task will be discarded.' ); |
65 | $this->addOption( 'fromPageId', 'Start iterating upwards from this page ID.', false, true ); |
66 | $this->addOption( 'all', 'Regenerate all tasks.' ); |
67 | $this->addOption( 'exceptDatasetChecksums', 'Regenerate a task unless its ' |
68 | . 'model checksum appears in the given file (one checksum per line)', false, true ); |
69 | $this->addOption( 'olderThan', 'Regenerate a task which was generated ' |
70 | . 'before this date', false, true ); |
71 | $this->addOption( 'scoreLessThan', 'Regenerate a task when any suggested link has ' |
72 | . 'a lower score than this one.', false, true ); |
73 | $this->addOption( 'limit', 'Limit the number of changes.', false, true ); |
74 | $this->addOption( 'force', 'Store the new recommendation even if it fails quality criteria.' ); |
75 | $this->addOption( 'dry-run', 'Do not actually make any changes.' ); |
76 | $this->addOption( 'verbose', 'Show debug output.' ); |
77 | $this->setBatchSize( 500 ); |
78 | } |
79 | |
80 | public function checkRequiredExtensions() { |
81 | // Hack: must be early enough for requireExtension to work but late enough for config |
82 | // to be available. |
83 | $growthServices = GrowthExperimentsServices::wrap( MediaWikiServices::getInstance() ); |
84 | if ( $growthServices->getGrowthConfig()->get( 'GELinkRecommendationsUseEventGate' ) ) { |
85 | $this->requireExtension( 'EventBus' ); |
86 | } else { |
87 | $this->requireExtension( 'CirrusSearch' ); |
88 | } |
89 | parent::checkRequiredExtensions(); |
90 | } |
91 | |
92 | public function execute() { |
93 | $this->initGrowthConfig(); |
94 | if ( !$this->growthConfig->get( 'GENewcomerTasksLinkRecommendationsEnabled' ) ) { |
95 | $this->output( "Disabled\n" ); |
96 | return; |
97 | } elseif ( $this->growthConfig->get( 'GENewcomerTasksRemoteApiUrl' ) ) { |
98 | $this->output( "Local tasks disabled\n" ); |
99 | return; |
100 | } |
101 | $this->initServices(); |
102 | |
103 | $this->output( "Revalidating link recommendations:\n" ); |
104 | |
105 | $replaced = $discarded = 0; |
106 | $fromPageId = (int)$this->getOption( 'fromPageId', 0 ); |
107 | while ( $fromPageId !== false ) { |
108 | $this->output( " fetching task batch starting with page $fromPageId\n" ); |
109 | $linkRecommendations = $this->linkRecommendationStore->getAllRecommendations( |
110 | $this->getBatchSize(), $fromPageId ); |
111 | foreach ( $linkRecommendations as $linkRecommendation ) { |
112 | if ( !$this->validateRecommendation( $linkRecommendation ) ) { |
113 | $this->verboseLog( ' ' . $this->getTitle( $linkRecommendation )->getPrefixedText() |
114 | . ' is outdated, regenerating... ' ); |
115 | if ( $this->getOption( 'dry-run' ) ) { |
116 | $replaced++; |
117 | $this->verboseLog( "(dry-run)\n" ); |
118 | } else { |
119 | $status = $this->regenerateRecommendation( $linkRecommendation ); |
120 | $this->verboseLog( $status->isOK() ? "success\n" |
121 | : $status->getWikiText( false, false, 'en' ) . "\n" ); |
122 | $replaced += $status->isOK() ? 1 : 0; |
123 | $discarded += $status->isOK() ? 0 : 1; |
124 | } |
125 | if ( $replaced + $discarded == $this->getOption( 'limit', -1 ) ) { |
126 | $this->verboseLog( "Limit reached, aborting.\n" ); |
127 | break 2; |
128 | } |
129 | } |
130 | } |
131 | } |
132 | $this->output( "Done; replaced $replaced, discarded $discarded\n" ); |
133 | } |
134 | |
135 | protected function initGrowthConfig(): void { |
136 | // Needs to be separate from initServices/initConfig as checking whether the script |
137 | // should run on a given wiki relies on this, but initServices/initConfig will break |
138 | // on some wikis where the script is not supposed to run and the task configuration |
139 | // is missing. |
140 | $services = MediaWikiServices::getInstance(); |
141 | $growthServices = GrowthExperimentsServices::wrap( $services ); |
142 | $this->growthConfig = $growthServices->getGrowthConfig(); |
143 | } |
144 | |
145 | protected function initServices(): void { |
146 | $services = MediaWikiServices::getInstance(); |
147 | $growthServices = GrowthExperimentsServices::wrap( $services ); |
148 | $this->titleFactory = $services->getTitleFactory(); |
149 | $this->linkRecommendationStore = $growthServices->getLinkRecommendationStore(); |
150 | $this->linkRecommendationHelper = $growthServices->getLinkRecommendationHelper(); |
151 | $this->linkRecommendationUpdater = $growthServices->getLinkRecommendationUpdater(); |
152 | } |
153 | |
154 | /** |
155 | * Check whether the recommendation still meets our standards. |
156 | * @param LinkRecommendation $linkRecommendation |
157 | * @return bool |
158 | */ |
159 | private function validateRecommendation( LinkRecommendation $linkRecommendation ): bool { |
160 | if ( $this->hasOption( 'all' ) ) { |
161 | return false; |
162 | } |
163 | if ( $this->hasOption( 'exceptDatasetChecksums' ) ) { |
164 | $allowedChecksums = $this->getAllowedChecksums(); |
165 | $actualChecksum = $linkRecommendation->getMetadata()->getDatasetChecksums()['model'] ?? 'wrong'; |
166 | |
167 | // Abort if the recommendation is invalid and give chance to other checks |
168 | if ( !in_array( $actualChecksum, $allowedChecksums, true ) ) { |
169 | return false; |
170 | } |
171 | } |
172 | if ( $this->hasOption( 'olderThan' ) ) { |
173 | // Abort if the recommendation is invalid and give chance to other checks |
174 | if ( |
175 | $linkRecommendation->getMetadata()->getTaskTimestamp() < |
176 | $this->getOlderThanTimestamp() |
177 | ) { |
178 | return false; |
179 | } |
180 | } |
181 | if ( $this->hasOption( 'scoreLessThan' ) ) { |
182 | $recommendationScore = min( array_map( static function ( LinkRecommendationLink $link ) { |
183 | return $link->getScore(); |
184 | }, $linkRecommendation->getLinks() ) ); |
185 | |
186 | // Abort if the recommendation is invalid and give chance to other checks |
187 | if ( $recommendationScore < (float)$this->getOption( 'scoreLessThan' ) ) { |
188 | return false; |
189 | } |
190 | } |
191 | return true; |
192 | } |
193 | |
194 | /** |
195 | * Discard the existing recommendation and try to fetch a new one. |
196 | * @param LinkRecommendation $linkRecommendation |
197 | * @return Status |
198 | */ |
199 | private function regenerateRecommendation( LinkRecommendation $linkRecommendation ): Status { |
200 | $title = $this->titleFactory->newFromLinkTarget( $linkRecommendation->getTitle() ); |
201 | // Deleting from the search index is instantaneous, adding to the search index takes a few |
202 | // hours, so in theory the script could deplete the existing pool temporarily. In practice |
203 | // fetching a new candidate takes about a second, so the script shouldn't progess fast |
204 | // enough for that to be a problem. |
205 | $this->linkRecommendationHelper->deleteLinkRecommendation( $title->toPageIdentity(), true ); |
206 | try { |
207 | $force = $this->hasOption( 'force' ); |
208 | return Status::wrap( $this->linkRecommendationUpdater->processCandidate( $title, $force ) ); |
209 | } catch ( DBReadOnlyError $e ) { |
210 | $this->fatalError( 'DB is readonly, aborting' ); |
211 | } catch ( WikiConfigException $e ) { |
212 | $this->fatalError( $e->getMessage() ); |
213 | } |
214 | throw new LogicException( 'Cannot reach here' ); |
215 | } |
216 | |
217 | private function getTitle( LinkRecommendation $linkRecommendation ): Title { |
218 | // The title is already cached by this point so no need for a LinkBatch. |
219 | return $this->titleFactory->newFromLinkTarget( $linkRecommendation->getTitle() ); |
220 | } |
221 | |
222 | private function verboseLog( string $message ): void { |
223 | if ( $this->hasOption( 'verbose' ) ) { |
224 | $this->output( $message ); |
225 | } |
226 | } |
227 | |
228 | /** |
229 | * Helper method to handle caching of the checksum file. |
230 | * @return string[] |
231 | */ |
232 | private function getAllowedChecksums(): array { |
233 | if ( !$this->allowedChecksums ) { |
234 | $filename = $this->getOption( 'exceptDatasetChecksums' ); |
235 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
236 | $content = @file_get_contents( $filename ); |
237 | if ( $content === false ) { |
238 | throw new UnexpectedValueException( "File $filename could not be opened" ); |
239 | } |
240 | $this->allowedChecksums = array_filter( |
241 | array_map( |
242 | 'trim', |
243 | file( $filename, FILE_IGNORE_NEW_LINES ) |
244 | ) |
245 | ); |
246 | if ( !$this->allowedChecksums ) { |
247 | throw new UnexpectedValueException( "File $filename did not contain checksums" ); |
248 | } |
249 | } |
250 | return $this->allowedChecksums; |
251 | } |
252 | |
253 | /** |
254 | * Helper method to handle caching/fetching of the older than timestamp |
255 | * @return int |
256 | */ |
257 | private function getOlderThanTimestamp(): int { |
258 | if ( !$this->olderThanTimestamp ) { |
259 | $rawTS = wfTimestamp( |
260 | TS_UNIX, |
261 | $this->getOption( 'olderThan' ) |
262 | ); |
263 | if ( !$rawTS ) { |
264 | throw new UnexpectedValueException( "Parameter olderThan does not contain a valid timestamp" ); |
265 | } |
266 | $this->olderThanTimestamp = (int)$rawTS; |
267 | } |
268 | return $this->olderThanTimestamp; |
269 | } |
270 | |
271 | } |
272 | |
273 | $maintClass = RevalidateLinkRecommendations::class; |
274 | require_once RUN_MAINTENANCE_IF_MAIN; |