Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 108 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
LinkRecommendationUpdater | |
0.00% |
0 / 108 |
|
0.00% |
0 / 6 |
1056 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
processCandidate | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
42 | |||
evaluateTitle | |
0.00% |
0 / 43 |
|
0.00% |
0 / 1 |
182 | |||
evaluateRecommendation | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
56 | |||
getLinkRecommendationTaskType | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
20 | |||
failure | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\NewcomerTasks\AddLink; |
4 | |
5 | use ChangeTags; |
6 | use GrowthExperiments\NewcomerTasks\AddLink\SearchIndexUpdater\SearchIndexUpdater; |
7 | use GrowthExperiments\NewcomerTasks\ConfigurationLoader\ConfigurationLoader; |
8 | use GrowthExperiments\NewcomerTasks\TaskType\LinkRecommendationTaskType; |
9 | use GrowthExperiments\NewcomerTasks\TaskType\LinkRecommendationTaskTypeHandler; |
10 | use GrowthExperiments\WikiConfigException; |
11 | use IDBAccessObject; |
12 | use MediaWiki\Language\RawMessage; |
13 | use MediaWiki\Page\PageProps; |
14 | use MediaWiki\Revision\RevisionRecord; |
15 | use MediaWiki\Revision\RevisionStore; |
16 | use MediaWiki\Revision\SlotRecord; |
17 | use MediaWiki\Status\Status; |
18 | use MediaWiki\Storage\NameTableStore; |
19 | use MediaWiki\Title\Title; |
20 | use MediaWiki\Utils\MWTimestamp; |
21 | use StatusValue; |
22 | use Wikimedia\Rdbms\DBReadOnlyError; |
23 | use Wikimedia\Rdbms\IConnectionProvider; |
24 | use Wikimedia\Rdbms\IDatabase; |
25 | use WikitextContent; |
26 | |
27 | /** |
28 | * Handles creating or updating a link recommendation entry. |
29 | * This includes fetching a recommendation from the service, validating it, and updating |
30 | * the database and the search index. |
31 | */ |
32 | class LinkRecommendationUpdater { |
33 | |
34 | private IConnectionProvider $connectionProvider; |
35 | private RevisionStore $revisionStore; |
36 | private NameTableStore $changeDefNameTableStore; |
37 | private PageProps $pageProps; |
38 | private ConfigurationLoader $configurationLoader; |
39 | private SearchIndexUpdater $searchIndexUpdater; |
40 | private LinkRecommendationStore $linkRecommendationStore; |
41 | private LinkRecommendationProvider $linkRecommendationProvider; |
42 | private ?LinkRecommendationTaskType $linkRecommendationTaskType = null; |
43 | |
44 | /** |
45 | * @param IConnectionProvider $connectionProvider |
46 | * @param RevisionStore $revisionStore |
47 | * @param NameTableStore $changeDefNameTableStore |
48 | * @param PageProps $pageProps |
49 | * @param ConfigurationLoader $configurationLoader |
50 | * @param SearchIndexUpdater $searchIndexUpdater |
51 | * @param LinkRecommendationProvider $linkRecommendationProvider Note that this needs to be |
52 | * the uncached provider, as caching is done by LinkRecommendationUpdater. |
53 | * @param LinkRecommendationStore $linkRecommendationStore |
54 | */ |
55 | public function __construct( |
56 | IConnectionProvider $connectionProvider, |
57 | RevisionStore $revisionStore, |
58 | NameTableStore $changeDefNameTableStore, |
59 | PageProps $pageProps, |
60 | ConfigurationLoader $configurationLoader, |
61 | SearchIndexUpdater $searchIndexUpdater, |
62 | LinkRecommendationProvider $linkRecommendationProvider, |
63 | LinkRecommendationStore $linkRecommendationStore |
64 | ) { |
65 | $this->connectionProvider = $connectionProvider; |
66 | $this->revisionStore = $revisionStore; |
67 | $this->changeDefNameTableStore = $changeDefNameTableStore; |
68 | $this->pageProps = $pageProps; |
69 | $this->configurationLoader = $configurationLoader; |
70 | $this->searchIndexUpdater = $searchIndexUpdater; |
71 | $this->linkRecommendationStore = $linkRecommendationStore; |
72 | $this->linkRecommendationProvider = $linkRecommendationProvider; |
73 | } |
74 | |
75 | /** |
76 | * Evaluate a task candidate and generate the task if the candidate is viable. |
77 | * If a link recommendation task already exists for the given page, it will be overwritten. |
78 | * @param Title $title |
79 | * @param bool $force Ignore all failed conditions that can be safely ignored. |
80 | * @return StatusValue Success status. Note that the error messages are not intended |
81 | * for users (and as such not localizable). |
82 | * @throws WikiConfigException if the task type is not properly configured. |
83 | * @throws DBReadOnlyError |
84 | */ |
85 | public function processCandidate( Title $title, bool $force = false ): StatusValue { |
86 | $lastRevision = $this->revisionStore->getRevisionByTitle( $title ); |
87 | $status = $this->evaluateTitle( $title, $lastRevision, $force ); |
88 | if ( !$status->isOK() ) { |
89 | return $status; |
90 | } |
91 | |
92 | // Prevent infinite loop. Cirrus updates are not realtime so pages we have |
93 | // just created recommendations for will be included again in the next batch. |
94 | // Skip them to ensure $recommendationsFound is only nonzero then we have |
95 | // actually added a new recommendation. |
96 | // FIXME there is probably a better way to do this via search offsets. |
97 | if ( $this->linkRecommendationStore->getByRevId( $lastRevision->getId(), |
98 | IDBAccessObject::READ_LATEST ) |
99 | ) { |
100 | return $this->failure( 'link recommendation already stored' ); |
101 | } |
102 | |
103 | $recommendation = $this->linkRecommendationProvider->get( $title, |
104 | $this->getLinkRecommendationTaskType() ); |
105 | if ( $recommendation instanceof StatusValue ) { |
106 | // Returning a StatusValue is always an error for the provider. When returning it |
107 | // from this class, it isn't necessarily interpreted that way. |
108 | $recommendation->setOK( false ); |
109 | return $recommendation; |
110 | } |
111 | $status = $this->evaluateRecommendation( $recommendation, $lastRevision, $force ); |
112 | if ( !$status->isOK() ) { |
113 | return $status; |
114 | } |
115 | |
116 | // If an error happens later, uncommitted DB writes get discarded, while |
117 | // updateCirrusSearchIndex() is immediate. Minimize the likelihood of the DB |
118 | // and the search index getting out of sync by wrapping the insert into a |
119 | // transaction (in general start/endAtomic doesn't guarantee that but this method |
120 | // will usually be called from maintenance scripts). |
121 | $db = $this->linkRecommendationStore->getDB( DB_PRIMARY ); |
122 | $db->startAtomic( __METHOD__, IDatabase::ATOMIC_CANCELABLE ); |
123 | $this->linkRecommendationStore->insert( $recommendation ); |
124 | $status = $this->searchIndexUpdater->update( $lastRevision ); |
125 | if ( !$status->isOK() ) { |
126 | $db->cancelAtomic( __METHOD__ ); |
127 | return $status; |
128 | } |
129 | $db->endAtomic( __METHOD__ ); |
130 | return StatusValue::newGood(); |
131 | } |
132 | |
133 | /** |
134 | * Check all conditions which are not related to the recommendation. |
135 | * @param Title $title The title for which a recommendation is being requested. |
136 | * @param RevisionRecord|null $revision The current revision of the title. |
137 | * @param bool $force Ignore all failed conditions that can be safely ignored. |
138 | * @return StatusValue Success status. Note that the error messages are not intended |
139 | * for users (and as such not localizable). |
140 | */ |
141 | private function evaluateTitle( Title $title, ?RevisionRecord $revision, bool $force ): StatusValue { |
142 | // 1. the revision must exist and the mwaddlink service must be able to interpret it. |
143 | if ( $revision === null ) { |
144 | // Maybe the article has just been deleted and the search index is behind? |
145 | return $this->failure( 'page not found' ); |
146 | } |
147 | $content = $revision->getContent( SlotRecord::MAIN ); |
148 | if ( !$content instanceof WikitextContent ) { |
149 | return $this->failure( 'content not found' ); |
150 | } |
151 | |
152 | if ( $force ) { |
153 | return StatusValue::newGood(); |
154 | } |
155 | |
156 | // 2. the article must match size conditions. |
157 | $wordCount = preg_match_all( '/\w+/', $content->getText() ); |
158 | if ( $wordCount < $this->getLinkRecommendationTaskType()->getMinimumWordCount() ) { |
159 | return $this->failure( "word count too small ($wordCount)" ); |
160 | } elseif ( $wordCount > $this->getLinkRecommendationTaskType()->getMaximumWordCount() ) { |
161 | return $this->failure( "word count too large ($wordCount)" ); |
162 | } |
163 | |
164 | // 3. exclude articles which have been edited very recently. |
165 | $revisionTime = (int)MWTimestamp::convert( TS_UNIX, $revision->getTimestamp() ); |
166 | if ( time() - $revisionTime < $this->getLinkRecommendationTaskType()->getMinimumTimeSinceLastEdit() ) { |
167 | return $this->failure( 'minimum time since last edit did not pass' ); |
168 | } |
169 | |
170 | // 4. exclude disambiguation pages. |
171 | if ( $this->pageProps->getProperties( $title, 'disambiguation' ) ) { |
172 | return $this->failure( 'disambiguation page' ); |
173 | } |
174 | |
175 | // 5. exclude pages where the last edit is a link recommendation edit or its revert. |
176 | $dbr = $this->connectionProvider->getReplicaDatabase(); |
177 | $tags = ChangeTags::getTagsWithData( $dbr, null, $revision->getId() ); |
178 | if ( array_key_exists( LinkRecommendationTaskTypeHandler::CHANGE_TAG, $tags ) ) { |
179 | return $this->failure( 'last edit is a link recommendation' ); |
180 | } |
181 | $revertTagData = null; |
182 | foreach ( ChangeTags::REVERT_TAGS as $revertTagName ) { |
183 | if ( !empty( $tags[$revertTagName] ) ) { |
184 | $revertTagData = json_decode( $tags[$revertTagName], true ); |
185 | break; |
186 | } |
187 | } |
188 | if ( is_array( $revertTagData ) ) { |
189 | $linkRecommendationChangeTagId = $this->changeDefNameTableStore |
190 | ->acquireId( LinkRecommendationTaskTypeHandler::CHANGE_TAG ); |
191 | $revertedAddLinkEditCount = $dbr->newSelectQueryBuilder() |
192 | ->from( 'revision' ) |
193 | ->join( 'change_tag', null, [ 'rev_id = ct_rev_id' ] ) |
194 | ->where( [ |
195 | 'rev_page' => $title->getArticleID(), |
196 | 'rev_id <=' . (int)$revertTagData['newestRevertedRevId'], |
197 | 'rev_id >=' . (int)$revertTagData['oldestRevertedRevId'], |
198 | 'ct_tag_id' => $linkRecommendationChangeTagId, |
199 | ] ) |
200 | ->caller( __METHOD__ ) |
201 | ->fetchRowCount(); |
202 | if ( $revertedAddLinkEditCount > 0 ) { |
203 | return $this->failure( 'last edit reverts a link recommendation edit' ); |
204 | } |
205 | } |
206 | return StatusValue::newGood(); |
207 | } |
208 | |
209 | /** |
210 | * Validate a recommendation against the criteria in the task type and safety checks. |
211 | * @param LinkRecommendation $recommendation |
212 | * @param RevisionRecord $revision The current revision of the page the recommendation is for. |
213 | * @param bool $force Ignore all failed conditions that can be safely ignored. |
214 | * @return StatusValue Success status. Note that the error messages are not intended |
215 | * for users (and as such not localizable). |
216 | */ |
217 | private function evaluateRecommendation( |
218 | LinkRecommendation $recommendation, |
219 | RevisionRecord $revision, |
220 | bool $force |
221 | ): StatusValue { |
222 | if ( $recommendation->getRevisionId() !== $revision->getId() ) { |
223 | // Some kind of race condition? Generating another task is easy so just discard this. |
224 | return $this->failure( 'revision ID mismatch' ); |
225 | } |
226 | |
227 | // T291253 |
228 | if ( !$force && $this->linkRecommendationStore->hasSubmission( $recommendation, |
229 | IDBAccessObject::READ_LATEST ) |
230 | ) { |
231 | return $this->failure( 'submission already exists for revision ' . $revision->getId() ); |
232 | } |
233 | |
234 | // We could check here for more race conditions, ie. whether the revision in the |
235 | // recommendation matches the live revision. But there are plenty of other ways for race |
236 | // conditions to happen, so we'll have to deal with them on the client side anyway. No |
237 | // point in getting a primary database connection just for that. |
238 | |
239 | $goodLinks = array_filter( $recommendation->getLinks(), function ( LinkRecommendationLink $link ) { |
240 | return $link->getScore() >= $this->getLinkRecommendationTaskType()->getMinimumLinkScore(); |
241 | } ); |
242 | $recommendation = new LinkRecommendation( |
243 | $recommendation->getTitle(), |
244 | $recommendation->getPageId(), |
245 | $recommendation->getRevisionId(), |
246 | $goodLinks, |
247 | $recommendation->getMetadata() |
248 | ); |
249 | $goodLinkCount = count( $recommendation->getLinks() ); |
250 | if ( $goodLinkCount === 0 |
251 | || ( !$force && $goodLinkCount < $this->getLinkRecommendationTaskType()->getMinimumLinksPerTask() ) |
252 | ) { |
253 | return $this->failure( "number of good links too small ($goodLinkCount)" ); |
254 | } |
255 | |
256 | return StatusValue::newGood(); |
257 | } |
258 | |
259 | /** |
260 | * Internal helper for loading the Add Link task type. Due to the involvement of on-wiki |
261 | * configuration, this is not available at setup time so it cannot be dependency-injected. |
262 | * @return LinkRecommendationTaskType |
263 | * @throws WikiConfigException if the task type is not properly configured. |
264 | */ |
265 | private function getLinkRecommendationTaskType(): LinkRecommendationTaskType { |
266 | if ( !$this->linkRecommendationTaskType ) { |
267 | $taskTypes = $this->configurationLoader->loadTaskTypes(); |
268 | if ( $taskTypes instanceof StatusValue ) { |
269 | throw new WikiConfigException( 'Could not load task types: ' . |
270 | Status::wrap( $taskTypes )->getWikiText( false, false, 'en' ) ); |
271 | } |
272 | $taskTypes = $this->configurationLoader->getTaskTypes() + |
273 | $this->configurationLoader->getDisabledTaskTypes(); |
274 | $taskType = $taskTypes[LinkRecommendationTaskTypeHandler::TASK_TYPE_ID] ?? null; |
275 | if ( !( $taskType instanceof LinkRecommendationTaskType ) ) { |
276 | throw new WikiConfigException( 'Could not load link recommendation task type' ); |
277 | } |
278 | $this->linkRecommendationTaskType = $taskType; |
279 | } |
280 | return $this->linkRecommendationTaskType; |
281 | } |
282 | |
283 | /** |
284 | * Convenience shortcut for making StatusValue objects with non-localized messages. |
285 | * @param string $error |
286 | * @return StatusValue |
287 | */ |
288 | private function failure( string $error ): StatusValue { |
289 | return StatusValue::newFatal( new RawMessage( $error ) ); |
290 | } |
291 | |
292 | } |