Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
17.80% |
34 / 191 |
|
25.00% |
4 / 16 |
CRAP | |
0.00% |
0 / 1 |
LinkRecommendationStore | |
17.80% |
34 / 191 |
|
25.00% |
4 / 16 |
974.61 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
getByCondition | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
12 | |||
getByRevId | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getByPageId | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getByLinkTarget | |
80.00% |
8 / 10 |
|
0.00% |
0 / 1 |
4.13 | |||
getAllRecommendations | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
6 | |||
filterPageIds | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
20 | |||
listPageIds | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
insert | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
1 | |||
deleteByPageIds | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
deleteByLinkTarget | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
getExcludedLinkIds | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
recordSubmission | |
0.00% |
0 / 46 |
|
0.00% |
0 / 1 |
72 | |||
hasSubmission | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
getDB | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getLinkRecommendationsFromRows | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
56 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\NewcomerTasks\AddLink; |
4 | |
5 | use DomainException; |
6 | use GrowthExperiments\Util; |
7 | use IDBAccessObject; |
8 | use MediaWiki\Cache\LinkBatchFactory; |
9 | use MediaWiki\Linker\LinkTarget; |
10 | use MediaWiki\Page\PageRecord; |
11 | use MediaWiki\Page\PageStore; |
12 | use MediaWiki\Title\TitleFactory; |
13 | use MediaWiki\Title\TitleValue; |
14 | use MediaWiki\User\UserIdentity; |
15 | use RuntimeException; |
16 | use stdClass; |
17 | use Wikimedia\Rdbms\IDatabase; |
18 | use Wikimedia\Rdbms\ILoadBalancer; |
19 | use Wikimedia\Rdbms\OrExpressionGroup; |
20 | use Wikimedia\Rdbms\SelectQueryBuilder; |
21 | |
22 | /** |
23 | * Service that handles access to the link recommendation related database tables. |
24 | */ |
25 | class LinkRecommendationStore { |
26 | |
27 | private ILoadBalancer $loadBalancer; |
28 | private TitleFactory $titleFactory; |
29 | private LinkBatchFactory $linkBatchFactory; |
30 | private PageStore $pageStore; |
31 | |
32 | /** |
33 | * @param ILoadBalancer $loadBalancer |
34 | * @param TitleFactory $titleFactory |
35 | * @param LinkBatchFactory $linkBatchFactory |
36 | * @param PageStore $pageStore |
37 | */ |
38 | public function __construct( |
39 | ILoadBalancer $loadBalancer, |
40 | TitleFactory $titleFactory, |
41 | LinkBatchFactory $linkBatchFactory, |
42 | PageStore $pageStore |
43 | ) { |
44 | $this->loadBalancer = $loadBalancer; |
45 | $this->titleFactory = $titleFactory; |
46 | $this->linkBatchFactory = $linkBatchFactory; |
47 | $this->pageStore = $pageStore; |
48 | } |
49 | |
50 | // growthexperiments_link_recommendations |
51 | |
52 | /** |
53 | * Get a link recommendation by some condition. |
54 | * @param array $condition A Database::select() condition array. |
55 | * @param int $flags IDBAccessObject flags |
56 | * @return LinkRecommendation|null |
57 | */ |
58 | protected function getByCondition( array $condition, int $flags = 0 ): ?LinkRecommendation { |
59 | if ( ( $flags & IDBAccessObject::READ_LATEST ) == IDBAccessObject::READ_LATEST ) { |
60 | $db = $this->getDB( DB_PRIMARY ); |
61 | } else { |
62 | $db = $this->getDB( DB_REPLICA ); |
63 | } |
64 | $row = $db->newSelectQueryBuilder() |
65 | ->select( [ 'gelr_page', 'gelr_revision', 'gelr_data' ] ) |
66 | ->from( 'growthexperiments_link_recommendations' ) |
67 | ->where( $condition ) |
68 | ->caller( __METHOD__ ) |
69 | ->recency( $flags ) |
70 | // $condition is supposed to be unique, but if somehow that isn't the case, |
71 | // use the most up-to-date recommendation. |
72 | ->orderBy( 'gelr_revision', SelectQueryBuilder::SORT_DESC ) |
73 | ->fetchRow(); |
74 | if ( $row === false ) { |
75 | return null; |
76 | } |
77 | return $this->getLinkRecommendationsFromRows( [ $row ], $flags )[0] ?? null; |
78 | } |
79 | |
80 | /** |
81 | * Get a link recommendation by revision ID. |
82 | * @param int $revId |
83 | * @param int $flags IDBAccessObject flags |
84 | * @return LinkRecommendation|null |
85 | */ |
86 | public function getByRevId( int $revId, int $flags = 0 ): ?LinkRecommendation { |
87 | return $this->getByCondition( [ 'gelr_revision' => $revId ], $flags ); |
88 | } |
89 | |
90 | /** |
91 | * Get a link recommendation by page ID. |
92 | * @param int $pageId |
93 | * @param int $flags IDBAccessObject flags |
94 | * @return LinkRecommendation|null |
95 | */ |
96 | public function getByPageId( int $pageId, int $flags = 0 ): ?LinkRecommendation { |
97 | return $this->getByCondition( [ 'gelr_page' => $pageId ], $flags ); |
98 | } |
99 | |
100 | /** |
101 | * Get a link recommendation by link target. |
102 | * @param LinkTarget $linkTarget |
103 | * @param int $flags IDBAccessObject flags |
104 | * @param bool $allowOldRevision When true, return any recommendation for the given page; |
105 | * otherwise, only use a recommendation if it's for the current revision. |
106 | * @return LinkRecommendation|null |
107 | */ |
108 | public function getByLinkTarget( |
109 | LinkTarget $linkTarget, |
110 | int $flags = 0, |
111 | bool $allowOldRevision = false |
112 | ): ?LinkRecommendation { |
113 | $title = $this->titleFactory->newFromLinkTarget( $linkTarget ); |
114 | if ( $allowOldRevision ) { |
115 | $pageId = $title->getArticleID( $flags ); |
116 | if ( $pageId === 0 ) { |
117 | return null; |
118 | } |
119 | return $this->getByPageId( $pageId, $flags ); |
120 | } else { |
121 | $revId = $title->getLatestRevID( $flags ); |
122 | if ( $revId === 0 ) { |
123 | return null; |
124 | } |
125 | return $this->getByRevId( $revId, $flags ); |
126 | } |
127 | } |
128 | |
129 | /** |
130 | * Iterate through all link recommendations, in ascending page ID order. |
131 | * @param int $limit |
132 | * @param int &$fromPageId Starting page ID. Will be set to the last fetched page ID plus one. |
133 | * (This cannot be done on the caller side because records with non-existing page IDs are |
134 | * omitted from the result.) Will be set to false when there are no more rows. |
135 | * @return LinkRecommendation[] |
136 | */ |
137 | public function getAllRecommendations( int $limit, int &$fromPageId ): array { |
138 | $dbr = $this->getDB( DB_REPLICA ); |
139 | $res = $dbr->newSelectQueryBuilder() |
140 | ->select( [ 'gelr_revision', 'gelr_page', 'gelr_data' ] ) |
141 | ->from( 'growthexperiments_link_recommendations' ) |
142 | ->where( $dbr->expr( 'gelr_page', '>=', $fromPageId ) ) |
143 | ->orderBy( 'gelr_page ASC' ) |
144 | ->limit( $limit ) |
145 | ->caller( __METHOD__ )->fetchResultSet(); |
146 | $rows = iterator_to_array( $res ); |
147 | $fromPageId = ( $res->numRows() === $limit ) ? end( $rows )->gelr_page + 1 : false; |
148 | reset( $rows ); |
149 | return $this->getLinkRecommendationsFromRows( $rows ); |
150 | } |
151 | |
152 | /** |
153 | * Given a set of page IDs, return the ones which have a valid link recommendation |
154 | * (valid as in it's for the latest revision). |
155 | * @param int[] $pageIds |
156 | * @return int[] |
157 | */ |
158 | public function filterPageIds( array $pageIds ): array { |
159 | $pageRecords = $this->pageStore |
160 | ->newSelectQueryBuilder() |
161 | ->wherePageIds( $pageIds ) |
162 | ->caller( __METHOD__ ) |
163 | ->fetchPageRecords(); |
164 | |
165 | $conds = []; |
166 | $dbr = $this->loadBalancer->getConnection( DB_REPLICA ); |
167 | /** @var PageRecord $pageRecord */ |
168 | foreach ( $pageRecords as $pageRecord ) { |
169 | $pageId = $pageRecord->getId(); |
170 | $revId = $pageRecord->getLatest(); |
171 | if ( !$pageId || !$revId ) { |
172 | continue; |
173 | } |
174 | // $revId can be outdated due to replag; we don't want to delete the record then. |
175 | $conds[] = $dbr->expr( 'gelr_page', '=', $pageId )->and( 'gelr_revision', '>=', $revId ); |
176 | } |
177 | return array_map( 'intval', $dbr->newSelectQueryBuilder() |
178 | ->select( 'gelr_page' ) |
179 | ->from( 'growthexperiments_link_recommendations' ) |
180 | ->where( new OrExpressionGroup( ...$conds ) ) |
181 | ->caller( __METHOD__ ) |
182 | ->fetchFieldValues() ); |
183 | } |
184 | |
185 | /** |
186 | * List all pages with link recommendations, by page ID. |
187 | * @param int $limit |
188 | * @param int|null $from ID to list from, exclusive |
189 | * @return int[] |
190 | */ |
191 | public function listPageIds( int $limit, int $from = null ): array { |
192 | $dbr = $this->loadBalancer->getConnection( DB_REPLICA ); |
193 | return array_map( 'intval', $dbr->newSelectQueryBuilder() |
194 | ->select( 'gelr_page' ) |
195 | ->from( 'growthexperiments_link_recommendations' ) |
196 | ->where( $from ? $dbr->expr( 'gelr_page', '>', $from ) : [] ) |
197 | ->groupBy( 'gelr_page' ) |
198 | ->orderBy( 'gelr_page ASC' ) |
199 | ->limit( $limit ) |
200 | ->caller( __METHOD__ )->fetchFieldValues() |
201 | ); |
202 | } |
203 | |
204 | /** |
205 | * Insert a new link recommendation. |
206 | * @param LinkRecommendation $linkRecommendation |
207 | */ |
208 | public function insert( LinkRecommendation $linkRecommendation ): void { |
209 | $pageId = $linkRecommendation->getPageId(); |
210 | $revisionId = $linkRecommendation->getRevisionId(); |
211 | $row = [ |
212 | 'gelr_revision' => $revisionId, |
213 | 'gelr_page' => $pageId, |
214 | 'gelr_data' => json_encode( $linkRecommendation->toArray() ), |
215 | ]; |
216 | $this->loadBalancer->getConnection( DB_PRIMARY )->newReplaceQueryBuilder() |
217 | ->replaceInto( 'growthexperiments_link_recommendations' ) |
218 | ->uniqueIndexFields( 'gelr_revision' ) |
219 | ->row( $row ) |
220 | ->caller( __METHOD__ ) |
221 | ->execute(); |
222 | } |
223 | |
224 | /** |
225 | * Delete all link recommendations for the given pages. |
226 | * @param int[] $pageIds |
227 | * @return int The number of deleted rows. |
228 | */ |
229 | public function deleteByPageIds( array $pageIds ): int { |
230 | $dbw = $this->loadBalancer->getConnection( DB_PRIMARY ); |
231 | $dbw->newDeleteQueryBuilder() |
232 | ->deleteFrom( 'growthexperiments_link_recommendations' ) |
233 | ->where( [ 'gelr_page' => $pageIds ] ) |
234 | ->caller( __METHOD__ ) |
235 | ->execute(); |
236 | return $dbw->affectedRows(); |
237 | } |
238 | |
239 | /** |
240 | * Delete all link recommendations for the given page. |
241 | * @param LinkTarget $linkTarget |
242 | * @return bool |
243 | */ |
244 | public function deleteByLinkTarget( LinkTarget $linkTarget ): bool { |
245 | $pageId = $this->titleFactory->newFromLinkTarget( $linkTarget ) |
246 | ->getArticleID( IDBAccessObject::READ_LATEST ); |
247 | if ( $pageId === 0 ) { |
248 | return false; |
249 | } |
250 | return (bool)$this->deleteByPageIds( [ $pageId ] ); |
251 | } |
252 | |
253 | // growthexperiments_link_submissions |
254 | |
255 | /** |
256 | * Get the list of link targets for a given page which should not be recommended anymore, |
257 | * as they have been rejected by users too many times. |
258 | * @param int $pageId |
259 | * @param int $limit Link targets rejected at least this many times are included. |
260 | * @return int[] |
261 | */ |
262 | public function getExcludedLinkIds( int $pageId, int $limit ): array { |
263 | $pageIdsToExclude = $this->loadBalancer->getConnection( DB_REPLICA ) |
264 | ->newSelectQueryBuilder() |
265 | ->select( 'gels_target' ) |
266 | ->from( 'growthexperiments_link_submissions' ) |
267 | ->where( [ 'gels_page' => $pageId, 'gels_feedback' => 'r' ] ) |
268 | ->groupBy( 'gels_target' ) |
269 | ->having( "COUNT(*) >= $limit" ) |
270 | ->caller( __METHOD__ )->fetchFieldValues(); |
271 | return array_map( 'intval', $pageIdsToExclude ); |
272 | } |
273 | |
274 | /** |
275 | * Record user feedback about a set for recommended links. |
276 | * Caller should make sure there is no feedback recorded for this revision yet. |
277 | * @param UserIdentity $user |
278 | * @param LinkRecommendation $linkRecommendation |
279 | * @param int[] $acceptedTargetIds Page IDs of accepted link targets. |
280 | * @param int[] $rejectedTargetIds Page IDs of rejected link targets. |
281 | * @param int[] $skippedTargetIds Page IDs of skipped link targets. |
282 | * @param int|null $editRevId Revision ID of the edit adding the links (might be null since |
283 | * it's not necessary that any links have been added). |
284 | */ |
285 | public function recordSubmission( |
286 | UserIdentity $user, |
287 | LinkRecommendation $linkRecommendation, |
288 | array $acceptedTargetIds, |
289 | array $rejectedTargetIds, |
290 | array $skippedTargetIds, |
291 | ?int $editRevId |
292 | ): void { |
293 | $pageId = $linkRecommendation->getPageId(); |
294 | $revId = $linkRecommendation->getRevisionId(); |
295 | $links = $linkRecommendation->getLinks(); |
296 | $allTargetIds = [ 'a' => $acceptedTargetIds, 'r' => $rejectedTargetIds, 's' => $skippedTargetIds ]; |
297 | |
298 | // correlate LinkRecommendation link data with the target IDs |
299 | $linkBatch = $this->linkBatchFactory->newLinkBatch(); |
300 | $linkIndexToTitleText = []; |
301 | foreach ( $links as $i => $link ) { |
302 | $title = $this->titleFactory->newFromTextThrow( $link->getLinkTarget() ); |
303 | $linkIndexToTitleText[$i] = $title->getPrefixedDBkey(); |
304 | $linkBatch->addObj( $title ); |
305 | } |
306 | $titleTextToLinkIndex = array_flip( $linkIndexToTitleText ); |
307 | $titleTextToPageId = $linkBatch->execute(); |
308 | $pageIdToTitleText = array_flip( $titleTextToPageId ); |
309 | $pageIdToLink = []; |
310 | foreach ( array_merge( ...array_values( $allTargetIds ) ) as $targetId ) { |
311 | $titleText = $pageIdToTitleText[$targetId] ?? null; |
312 | if ( $titleText === null ) { |
313 | // User-submitted page ID does not exist. Could be some kind of race condition. |
314 | Util::logException( new RuntimeException( 'Page ID does not exist ' ), [ |
315 | 'pageID' => $targetId, |
316 | ] ); |
317 | continue; |
318 | } |
319 | $pageIdToLink[$targetId] = $links[$titleTextToLinkIndex[$titleText]]; |
320 | } |
321 | |
322 | $rowData = [ |
323 | 'gels_page' => $pageId, |
324 | 'gels_revision' => $revId, |
325 | 'gels_edit_revision' => $editRevId, |
326 | 'gels_user' => $user->getId(), |
327 | ]; |
328 | $rows = []; |
329 | foreach ( $allTargetIds as $feedback => $targetIds ) { |
330 | foreach ( $targetIds as $targetId ) { |
331 | $link = $pageIdToLink[$targetId] ?? null; |
332 | if ( !$link ) { |
333 | continue; |
334 | } |
335 | $rows[] = $rowData + [ |
336 | 'gels_target' => $targetId, |
337 | 'gels_feedback' => $feedback, |
338 | 'gels_anchor_offset' => $link->getWikitextOffset(), |
339 | 'gels_anchor_length' => mb_strlen( $link->getText(), 'UTF-8' ), |
340 | ]; |
341 | } |
342 | } |
343 | if ( $rows ) { |
344 | $this->loadBalancer->getConnection( DB_PRIMARY )->newInsertQueryBuilder() |
345 | ->insertInto( 'growthexperiments_link_submissions' ) |
346 | ->rows( $rows ) |
347 | ->caller( __METHOD__ ) |
348 | ->execute(); |
349 | } |
350 | } |
351 | |
352 | /** |
353 | * Check if there is already a submission for a given recommendation. |
354 | * @param LinkRecommendation $linkRecommendation |
355 | * @param int $flags IDBAccessObject flags |
356 | * @return bool |
357 | */ |
358 | public function hasSubmission( LinkRecommendation $linkRecommendation, int $flags ): bool { |
359 | if ( ( $flags & IDBAccessObject::READ_LATEST ) == IDBAccessObject::READ_LATEST ) { |
360 | $db = $this->getDB( DB_PRIMARY ); |
361 | } else { |
362 | $db = $this->getDB( DB_REPLICA ); |
363 | } |
364 | return (bool)$db->newSelectQueryBuilder() |
365 | ->select( '*' ) |
366 | ->from( 'growthexperiments_link_submissions' ) |
367 | ->where( [ 'gels_revision' => $linkRecommendation->getRevisionId() ] ) |
368 | ->caller( __METHOD__ )->fetchRowCount(); |
369 | } |
370 | |
371 | // common |
372 | |
373 | /** |
374 | * @param int $index DB_PRIMARY or DB_REPLICA |
375 | * @return IDatabase |
376 | */ |
377 | public function getDB( int $index ): IDatabase { |
378 | return $this->loadBalancer->getConnection( $index ); |
379 | } |
380 | |
381 | /** |
382 | * Convert growthexperiments_link_recommendations rows into objects. |
383 | * Rows with no matching page are skipped. |
384 | * @param stdClass[] $rows |
385 | * @param int $flags IDBAccessObject flags |
386 | * @return LinkRecommendation[] |
387 | */ |
388 | private function getLinkRecommendationsFromRows( array $rows, int $flags = 0 ): array { |
389 | if ( !$rows ) { |
390 | return []; |
391 | } |
392 | |
393 | $pageIds = $linkTargets = []; |
394 | foreach ( $rows as $row ) { |
395 | $pageIds[] = $row->gelr_page; |
396 | } |
397 | |
398 | $pageRecords = $this->pageStore |
399 | ->newSelectQueryBuilder( $flags ) |
400 | ->wherePageIds( $pageIds ) |
401 | ->caller( __METHOD__ ) |
402 | ->fetchPageRecords(); |
403 | |
404 | /** @var PageRecord $pageRecord */ |
405 | foreach ( $pageRecords as $pageRecord ) { |
406 | $linkTarget = TitleValue::castPageToLinkTarget( $pageRecord ); |
407 | $linkTargets[$pageRecord->getId()] = $linkTarget; |
408 | } |
409 | |
410 | $linkRecommendations = []; |
411 | foreach ( $rows as $row ) { |
412 | // TODO use JSON_THROW_ON_ERROR once we require PHP 7.3 |
413 | $data = json_decode( $row->gelr_data, true ); |
414 | if ( $data === null ) { |
415 | throw new DomainException( 'Invalid JSON: ' . json_last_error_msg() ); |
416 | } |
417 | $linkTarget = $linkTargets[$row->gelr_page] ?? null; |
418 | if ( !$linkTarget ) { |
419 | continue; |
420 | } |
421 | |
422 | $linkRecommendations[] = new LinkRecommendation( |
423 | $linkTarget, |
424 | $row->gelr_page, |
425 | $row->gelr_revision, |
426 | LinkRecommendation::getLinksFromArray( $data['links'] ), |
427 | // Backwards compatibility for recommendations added before metadata was included in output and stored. |
428 | LinkRecommendation::getMetadataFromArray( $data['meta'] ?? [] ) |
429 | ); |
430 | } |
431 | return $linkRecommendations; |
432 | } |
433 | |
434 | } |