Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 127 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
LinkRecommendationUpdater | |
0.00% |
0 / 127 |
|
0.00% |
0 / 6 |
1056 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
processCandidate | |
0.00% |
0 / 41 |
|
0.00% |
0 / 1 |
42 | |||
evaluateTitle | |
0.00% |
0 / 43 |
|
0.00% |
0 / 1 |
182 | |||
evaluateRecommendation | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
56 | |||
getLinkRecommendationTaskType | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
20 | |||
failure | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\NewcomerTasks\AddLink; |
4 | |
5 | use ChangeTags; |
6 | use Exception; |
7 | use GrowthExperiments\NewcomerTasks\ConfigurationLoader\ConfigurationLoader; |
8 | use GrowthExperiments\NewcomerTasks\TaskType\LinkRecommendationTaskType; |
9 | use GrowthExperiments\NewcomerTasks\TaskType\LinkRecommendationTaskTypeHandler; |
10 | use GrowthExperiments\WikiConfigException; |
11 | use MediaWiki\ChangeTags\ChangeTagsStore; |
12 | use MediaWiki\Content\WikitextContent; |
13 | use MediaWiki\Language\RawMessage; |
14 | use MediaWiki\Page\PageIdentityValue; |
15 | use MediaWiki\Page\PageProps; |
16 | use MediaWiki\Revision\RevisionRecord; |
17 | use MediaWiki\Revision\RevisionStore; |
18 | use MediaWiki\Revision\SlotRecord; |
19 | use MediaWiki\Status\Status; |
20 | use MediaWiki\Storage\NameTableStore; |
21 | use MediaWiki\Title\Title; |
22 | use MediaWiki\Utils\MWTimestamp; |
23 | use Psr\Log\LoggerInterface; |
24 | use StatusValue; |
25 | use Wikimedia\Rdbms\DBReadOnlyError; |
26 | use Wikimedia\Rdbms\IConnectionProvider; |
27 | use Wikimedia\Rdbms\IDatabase; |
28 | use Wikimedia\Rdbms\IDBAccessObject; |
29 | |
30 | /** |
31 | * Handles creating or updating a link recommendation entry. |
32 | * This includes fetching a recommendation from the service, validating it, and updating |
33 | * the database and the search index. |
34 | */ |
35 | class LinkRecommendationUpdater { |
36 | |
37 | private LoggerInterface $logger; |
38 | private IConnectionProvider $connectionProvider; |
39 | private RevisionStore $revisionStore; |
40 | private NameTableStore $changeDefNameTableStore; |
41 | private PageProps $pageProps; |
42 | private ConfigurationLoader $configurationLoader; |
43 | private ChangeTagsStore $changeTagsStore; |
44 | /** |
45 | * @var callable returning {@link \CirrusSearch\WeightedTagsUpdater} |
46 | */ |
47 | private $weightedTagsUpdaterProvider; |
48 | private LinkRecommendationStore $linkRecommendationStore; |
49 | private LinkRecommendationProvider $linkRecommendationProvider; |
50 | private ?LinkRecommendationTaskType $linkRecommendationTaskType = null; |
51 | |
52 | /** |
53 | * @param LoggerInterface $logger |
54 | * @param IConnectionProvider $connectionProvider |
55 | * @param RevisionStore $revisionStore |
56 | * @param NameTableStore $changeDefNameTableStore |
57 | * @param PageProps $pageProps |
58 | * @param ChangeTagsStore $changeTagsStore |
59 | * @param ConfigurationLoader $configurationLoader |
60 | * @param callable(): \CirrusSearch\WeightedTagsUpdater $weightedTagsUpdaterProvider |
61 | * @param LinkRecommendationProvider $linkRecommendationProvider Note that this needs to be |
62 | * the uncached provider, as caching is done by LinkRecommendationUpdater. |
63 | * @param LinkRecommendationStore $linkRecommendationStore |
64 | */ |
65 | public function __construct( |
66 | LoggerInterface $logger, |
67 | IConnectionProvider $connectionProvider, |
68 | RevisionStore $revisionStore, |
69 | NameTableStore $changeDefNameTableStore, |
70 | PageProps $pageProps, |
71 | ChangeTagsStore $changeTagsStore, |
72 | ConfigurationLoader $configurationLoader, |
73 | callable $weightedTagsUpdaterProvider, |
74 | LinkRecommendationProvider $linkRecommendationProvider, |
75 | LinkRecommendationStore $linkRecommendationStore |
76 | ) { |
77 | $this->logger = $logger; |
78 | $this->connectionProvider = $connectionProvider; |
79 | $this->revisionStore = $revisionStore; |
80 | $this->changeDefNameTableStore = $changeDefNameTableStore; |
81 | $this->pageProps = $pageProps; |
82 | $this->changeTagsStore = $changeTagsStore; |
83 | |
84 | $this->configurationLoader = $configurationLoader; |
85 | $this->weightedTagsUpdaterProvider = $weightedTagsUpdaterProvider; |
86 | $this->linkRecommendationStore = $linkRecommendationStore; |
87 | $this->linkRecommendationProvider = $linkRecommendationProvider; |
88 | } |
89 | |
90 | /** |
91 | * Evaluate a task candidate and generate the task if the candidate is viable. |
92 | * If a link recommendation task already exists for the given page, it will be overwritten. |
93 | * @param Title $title |
94 | * @param bool $force Ignore all failed conditions that can be safely ignored. |
95 | * @return StatusValue Success status. Note that the error messages are not intended |
96 | * for users (and as such not localizable). |
97 | * @throws WikiConfigException if the task type is not properly configured. |
98 | * @throws DBReadOnlyError |
99 | */ |
100 | public function processCandidate( Title $title, bool $force = false ): StatusValue { |
101 | $lastRevision = $this->revisionStore->getRevisionByTitle( $title ); |
102 | $status = $this->evaluateTitle( $title, $lastRevision, $force ); |
103 | if ( !$status->isOK() ) { |
104 | return $status; |
105 | } |
106 | |
107 | // Prevent infinite loop. Cirrus updates are not realtime so pages we have |
108 | // just created recommendations for will be included again in the next batch. |
109 | // Skip them to ensure $recommendationsFound is only nonzero then we have |
110 | // actually added a new recommendation. |
111 | // FIXME there is probably a better way to do this via search offsets. |
112 | if ( $this->linkRecommendationStore->getByRevId( $lastRevision->getId(), |
113 | IDBAccessObject::READ_LATEST ) |
114 | ) { |
115 | return $this->failure( 'link recommendation already stored' ); |
116 | } |
117 | |
118 | $recommendation = $this->linkRecommendationProvider->get( $title, |
119 | $this->getLinkRecommendationTaskType() ); |
120 | if ( $recommendation instanceof StatusValue ) { |
121 | // Returning a StatusValue is always an error for the provider. When returning it |
122 | // from this class, it isn't necessarily interpreted that way. |
123 | $recommendation->setOK( false ); |
124 | return $recommendation; |
125 | } |
126 | $status = $this->evaluateRecommendation( $recommendation, $lastRevision, $force ); |
127 | if ( !$status->isOK() ) { |
128 | return $status; |
129 | } |
130 | |
131 | // If an error happens later, uncommitted DB writes get discarded, while |
132 | // updateCirrusSearchIndex() is immediate. Minimize the likelihood of the DB |
133 | // and the search index getting out of sync by wrapping the insert into a |
134 | // transaction (in general start/endAtomic doesn't guarantee that but this method |
135 | // will usually be called from maintenance scripts). |
136 | $db = $this->linkRecommendationStore->getDB( DB_PRIMARY ); |
137 | $db->startAtomic( __METHOD__, IDatabase::ATOMIC_CANCELABLE ); |
138 | $this->linkRecommendationStore->insert( $recommendation ); |
139 | |
140 | $pageIdentity = new PageIdentityValue( |
141 | $lastRevision->getPageId( $lastRevision->getWikiId() ), |
142 | $lastRevision->getPage()->getNamespace(), |
143 | $lastRevision->getPage()->getDBkey(), |
144 | $lastRevision->getWikiId() |
145 | ); |
146 | |
147 | try { |
148 | ( $this->weightedTagsUpdaterProvider )()->updateWeightedTags( |
149 | $pageIdentity, |
150 | LinkRecommendationTaskTypeHandler::WEIGHTED_TAG_PREFIX |
151 | ); |
152 | } catch ( Exception $e ) { |
153 | $db->cancelAtomic( __METHOD__ ); |
154 | |
155 | $this->logger->error( __METHOD__ . ' failed to update weighted tags', [ |
156 | 'exception' => $e, |
157 | 'pageTitle' => $title->getPrefixedText(), |
158 | ] ); |
159 | return Status::newFatal( |
160 | 'Failed to request weighted tags update', |
161 | LinkRecommendationTaskTypeHandler::WEIGHTED_TAG_PREFIX, |
162 | (string)$e |
163 | ); |
164 | } |
165 | |
166 | $db->endAtomic( __METHOD__ ); |
167 | return StatusValue::newGood(); |
168 | } |
169 | |
170 | /** |
171 | * Check all conditions which are not related to the recommendation. |
172 | * @param Title $title The title for which a recommendation is being requested. |
173 | * @param RevisionRecord|null $revision The current revision of the title. |
174 | * @param bool $force Ignore all failed conditions that can be safely ignored. |
175 | * @return StatusValue Success status. Note that the error messages are not intended |
176 | * for users (and as such not localizable). |
177 | */ |
178 | private function evaluateTitle( Title $title, ?RevisionRecord $revision, bool $force ): StatusValue { |
179 | // 1. the revision must exist and the mwaddlink service must be able to interpret it. |
180 | if ( $revision === null ) { |
181 | // Maybe the article has just been deleted and the search index is behind? |
182 | return $this->failure( 'page not found' ); |
183 | } |
184 | $content = $revision->getContent( SlotRecord::MAIN ); |
185 | if ( !$content instanceof WikitextContent ) { |
186 | return $this->failure( 'content not found' ); |
187 | } |
188 | |
189 | if ( $force ) { |
190 | return StatusValue::newGood(); |
191 | } |
192 | |
193 | // 2. the article must match size conditions. |
194 | $wordCount = preg_match_all( '/\w+/', $content->getText() ); |
195 | if ( $wordCount < $this->getLinkRecommendationTaskType()->getMinimumWordCount() ) { |
196 | return $this->failure( "word count too small ($wordCount)" ); |
197 | } elseif ( $wordCount > $this->getLinkRecommendationTaskType()->getMaximumWordCount() ) { |
198 | return $this->failure( "word count too large ($wordCount)" ); |
199 | } |
200 | |
201 | // 3. exclude articles which have been edited very recently. |
202 | $revisionTime = (int)MWTimestamp::convert( TS_UNIX, $revision->getTimestamp() ); |
203 | if ( time() - $revisionTime < $this->getLinkRecommendationTaskType()->getMinimumTimeSinceLastEdit() ) { |
204 | return $this->failure( 'minimum time since last edit did not pass' ); |
205 | } |
206 | |
207 | // 4. exclude disambiguation pages. |
208 | if ( $this->pageProps->getProperties( $title, 'disambiguation' ) ) { |
209 | return $this->failure( 'disambiguation page' ); |
210 | } |
211 | |
212 | // 5. exclude pages where the last edit is a link recommendation edit or its revert. |
213 | $dbr = $this->connectionProvider->getReplicaDatabase(); |
214 | $tags = $this->changeTagsStore->getTagsWithData( $dbr, null, $revision->getId() ); |
215 | if ( array_key_exists( LinkRecommendationTaskTypeHandler::CHANGE_TAG, $tags ) ) { |
216 | return $this->failure( 'last edit is a link recommendation' ); |
217 | } |
218 | $revertTagData = null; |
219 | foreach ( ChangeTags::REVERT_TAGS as $revertTagName ) { |
220 | if ( !empty( $tags[$revertTagName] ) ) { |
221 | $revertTagData = json_decode( $tags[$revertTagName], true ); |
222 | break; |
223 | } |
224 | } |
225 | if ( is_array( $revertTagData ) ) { |
226 | $linkRecommendationChangeTagId = $this->changeDefNameTableStore |
227 | ->acquireId( LinkRecommendationTaskTypeHandler::CHANGE_TAG ); |
228 | $revertedAddLinkEditCount = $dbr->newSelectQueryBuilder() |
229 | ->from( 'revision' ) |
230 | ->join( 'change_tag', null, [ 'rev_id = ct_rev_id' ] ) |
231 | ->where( [ |
232 | 'rev_page' => $title->getArticleID(), |
233 | $dbr->expr( 'rev_id', '<=', (int)$revertTagData['newestRevertedRevId'] ), |
234 | $dbr->expr( 'rev_id', '>=', (int)$revertTagData['oldestRevertedRevId'] ), |
235 | 'ct_tag_id' => $linkRecommendationChangeTagId, |
236 | ] ) |
237 | ->caller( __METHOD__ ) |
238 | ->fetchRowCount(); |
239 | if ( $revertedAddLinkEditCount > 0 ) { |
240 | return $this->failure( 'last edit reverts a link recommendation edit' ); |
241 | } |
242 | } |
243 | return StatusValue::newGood(); |
244 | } |
245 | |
246 | /** |
247 | * Validate a recommendation against the criteria in the task type and safety checks. |
248 | * @param LinkRecommendation $recommendation |
249 | * @param RevisionRecord $revision The current revision of the page the recommendation is for. |
250 | * @param bool $force Ignore all failed conditions that can be safely ignored. |
251 | * @return StatusValue Success status. Note that the error messages are not intended |
252 | * for users (and as such not localizable). |
253 | */ |
254 | private function evaluateRecommendation( |
255 | LinkRecommendation $recommendation, |
256 | RevisionRecord $revision, |
257 | bool $force |
258 | ): StatusValue { |
259 | if ( $recommendation->getRevisionId() !== $revision->getId() ) { |
260 | // Some kind of race condition? Generating another task is easy so just discard this. |
261 | return $this->failure( 'revision ID mismatch' ); |
262 | } |
263 | |
264 | // T291253 |
265 | if ( !$force && $this->linkRecommendationStore->hasSubmission( $recommendation, |
266 | IDBAccessObject::READ_LATEST ) |
267 | ) { |
268 | return $this->failure( 'submission already exists for revision ' . $revision->getId() ); |
269 | } |
270 | |
271 | // We could check here for more race conditions, ie. whether the revision in the |
272 | // recommendation matches the live revision. But there are plenty of other ways for race |
273 | // conditions to happen, so we'll have to deal with them on the client side anyway. No |
274 | // point in getting a primary database connection just for that. |
275 | |
276 | $goodLinks = array_filter( $recommendation->getLinks(), function ( LinkRecommendationLink $link ) { |
277 | return $link->getScore() >= $this->getLinkRecommendationTaskType()->getMinimumLinkScore(); |
278 | } ); |
279 | $recommendation = new LinkRecommendation( |
280 | $recommendation->getTitle(), |
281 | $recommendation->getPageId(), |
282 | $recommendation->getRevisionId(), |
283 | $goodLinks, |
284 | $recommendation->getMetadata() |
285 | ); |
286 | $goodLinkCount = count( $recommendation->getLinks() ); |
287 | if ( $goodLinkCount === 0 |
288 | || ( !$force && $goodLinkCount < $this->getLinkRecommendationTaskType()->getMinimumLinksPerTask() ) |
289 | ) { |
290 | return $this->failure( "number of good links too small ($goodLinkCount)" ); |
291 | } |
292 | |
293 | return StatusValue::newGood(); |
294 | } |
295 | |
296 | /** |
297 | * Internal helper for loading the Add Link task type. Due to the involvement of on-wiki |
298 | * configuration, this is not available at setup time so it cannot be dependency-injected. |
299 | * @return LinkRecommendationTaskType |
300 | * @throws WikiConfigException if the task type is not properly configured. |
301 | */ |
302 | private function getLinkRecommendationTaskType(): LinkRecommendationTaskType { |
303 | if ( !$this->linkRecommendationTaskType ) { |
304 | $taskTypes = $this->configurationLoader->loadTaskTypes(); |
305 | if ( $taskTypes instanceof StatusValue ) { |
306 | throw new WikiConfigException( 'Could not load task types: ' . |
307 | Status::wrap( $taskTypes )->getWikiText( false, false, 'en' ) ); |
308 | } |
309 | $taskTypes = $this->configurationLoader->getTaskTypes() + |
310 | $this->configurationLoader->getDisabledTaskTypes(); |
311 | $taskType = $taskTypes[LinkRecommendationTaskTypeHandler::TASK_TYPE_ID] ?? null; |
312 | if ( !( $taskType instanceof LinkRecommendationTaskType ) ) { |
313 | throw new WikiConfigException( 'Could not load link recommendation task type' ); |
314 | } |
315 | $this->linkRecommendationTaskType = $taskType; |
316 | } |
317 | return $this->linkRecommendationTaskType; |
318 | } |
319 | |
320 | /** |
321 | * Convenience shortcut for making StatusValue objects with non-localized messages. |
322 | * @param string $error |
323 | * @return StatusValue |
324 | */ |
325 | private function failure( string $error ): StatusValue { |
326 | return StatusValue::newFatal( new RawMessage( $error ) ); |
327 | } |
328 | |
329 | } |