Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
85.71% |
132 / 154 |
|
50.00% |
5 / 10 |
CRAP | |
0.00% |
0 / 1 |
ArticleMetadata | |
85.71% |
132 / 154 |
|
50.00% |
5 / 10 |
40.99 | |
0.00% |
0 / 1 |
__construct | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
deleteMetadata | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
flushMetadataFromCache | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
getMetadataForArticles | |
100.00% |
40 / 40 |
|
100.00% |
1 / 1 |
4 | |||
getMetadata | |
68.42% |
26 / 38 |
|
0.00% |
0 / 1 |
8.54 | |||
getPagesWithoutMetadata | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
getValidTags | |
95.45% |
21 / 22 |
|
0.00% |
0 / 1 |
3 | |||
clearStaticCache | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
validatePageIds | |
96.00% |
24 / 25 |
|
0.00% |
0 / 1 |
8 | |||
isValidMetadata | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\PageTriage; |
4 | |
5 | use MediaWiki\Context\RequestContext; |
6 | use MediaWiki\Extension\PageTriage\ArticleCompile\ArticleCompileProcessor; |
7 | use MediaWiki\Logger\LoggerFactory; |
8 | use MediaWiki\MediaWikiServices; |
9 | use MediaWiki\Title\Title; |
10 | use Wikimedia\ObjectCache\WANObjectCache; |
11 | use Wikimedia\Rdbms\Database; |
12 | |
13 | /** |
14 | * Handles article metadata retrieval and saving to cache |
15 | */ |
16 | class ArticleMetadata { |
17 | /** @var int[] List of page IDs */ |
18 | protected $pageIds; |
19 | |
20 | /** |
21 | * @var array Page IDs that are known to exist in the queue |
22 | */ |
23 | private static $cache = []; |
24 | |
25 | /** @var string */ |
26 | private const KEY_COLLECTION = 'pagetriage-article-metadata'; |
27 | |
28 | /** |
29 | * @param int[] $pageIds List of page IDs. |
30 | * @param bool $validated whether the page ids have been validated |
31 | * @param int $validateDb const DB_PRIMARY/DB_REPLICA |
32 | */ |
33 | public function __construct( array $pageIds, $validated = true, $validateDb = DB_PRIMARY ) { |
34 | if ( $validated ) { |
35 | $this->pageIds = $pageIds; |
36 | } else { |
37 | $this->pageIds = self::validatePageIds( $pageIds, $validateDb ); |
38 | } |
39 | } |
40 | |
41 | /** |
42 | * Delete all the metadata for an article |
43 | * |
44 | * @return bool |
45 | */ |
46 | public function deleteMetadata() { |
47 | if ( $this->pageIds ) { |
48 | $dbw = PageTriageUtil::getPrimaryConnection(); |
49 | $dbw->newDeleteQueryBuilder() |
50 | ->deleteFrom( 'pagetriage_page_tags' ) |
51 | ->where( [ 'ptrpt_page_id' => $this->pageIds ] ) |
52 | ->caller( __METHOD__ ) |
53 | ->execute(); |
54 | // also remove it from the cache |
55 | $this->flushMetadataFromCache(); |
56 | } |
57 | |
58 | return true; |
59 | } |
60 | |
61 | /** |
62 | * Flush the metadata in cache |
63 | * @param int|null $pageId page id to be flushed, if null is provided, all |
64 | * page id in $this->mPageId will be flushed |
65 | */ |
66 | public function flushMetadataFromCache( $pageId = null ) { |
67 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
68 | |
69 | $pageIdsPurge = ( $pageId === null ) ? $this->pageIds : [ $pageId ]; |
70 | foreach ( $pageIdsPurge as $pageIdPurge ) { |
71 | $cache->delete( $cache->makeKey( self::KEY_COLLECTION, $pageIdPurge ) ); |
72 | // For Hooks::isNewEnoughToNoIndex |
73 | $cache->delete( $cache->makeKey( 'pagetriage-page-created', $pageIdPurge ) ); |
74 | } |
75 | } |
76 | |
77 | /** |
78 | * Get metadata from the replica for an array of article IDs. |
79 | * |
80 | * @param int[] $pageIds |
81 | * @return array[] Map of (page ID => article metadata) |
82 | */ |
83 | public static function getMetadataForArticles( array $pageIds ) { |
84 | $dbr = PageTriageUtil::getReplicaConnection(); |
85 | |
86 | $res = $dbr->newSelectQueryBuilder() |
87 | ->select( [ |
88 | 'ptrpt_page_id', |
89 | 'ptrt_tag_name', |
90 | 'ptrpt_value', |
91 | 'ptrp_reviewed', |
92 | 'ptrp_created', |
93 | 'page_title', |
94 | 'page_namespace', |
95 | 'page_is_redirect', |
96 | 'ptrp_last_reviewed_by', |
97 | 'ptrp_reviewed_updated', |
98 | 'reviewer' => 'user_name' |
99 | ] ) |
100 | ->from( 'pagetriage_page_tags' ) |
101 | ->join( 'pagetriage_tags', null, 'ptrpt_tag_id = ptrt_tag_id' ) |
102 | ->join( 'pagetriage_page', null, 'ptrpt_page_id = ptrp_page_id' ) |
103 | ->join( 'page', null, 'page_id = ptrp_page_id' ) |
104 | ->leftJoin( 'user', 'user', 'user_id = ptrp_last_reviewed_by' ) |
105 | ->where( [ 'ptrpt_page_id' => $pageIds ] ) |
106 | ->caller( __METHOD__ ) |
107 | ->fetchResultSet(); |
108 | |
109 | $pageData = []; |
110 | // One row per tag per page. So 2 pages with 3 tags each will generate 6 rows. |
111 | foreach ( $res as $row ) { |
112 | // Set the tag |
113 | $pageData[$row->ptrpt_page_id][$row->ptrt_tag_name] = $row->ptrpt_value; |
114 | |
115 | // If not set yet, add some other basic page data too |
116 | if ( !isset( $pageData[$row->ptrpt_page_id]['creation_date'] ) ) { |
117 | $pageData[$row->ptrpt_page_id]['creation_date'] = wfTimestamp( TS_MW, $row->ptrp_created ); |
118 | // The patrol_status has 4 possible values: |
119 | // 0 = unreviewed, 1 = reviewed, 2 = patrolled, 3 = autopatrolled |
120 | $pageData[$row->ptrpt_page_id]['patrol_status'] = $row->ptrp_reviewed; |
121 | $pageData[$row->ptrpt_page_id]['is_redirect'] = $row->page_is_redirect; |
122 | $pageData[$row->ptrpt_page_id]['ptrp_last_reviewed_by'] = $row->ptrp_last_reviewed_by; |
123 | $pageData[$row->ptrpt_page_id]['ptrp_reviewed_updated'] = wfTimestamp( |
124 | TS_MW, |
125 | $row->ptrp_reviewed_updated |
126 | ); |
127 | $pageData[$row->ptrpt_page_id]['reviewer'] = $row->reviewer; |
128 | $title = Title::makeTitle( $row->page_namespace, $row->page_title ); |
129 | if ( $title ) { |
130 | $pageData[$row->ptrpt_page_id]['title'] = $title->getPrefixedText(); |
131 | } |
132 | } |
133 | } |
134 | return $pageData; |
135 | } |
136 | |
137 | /** |
138 | * Get the metadata for a single or list of articles. |
139 | * |
140 | * First attempt to load metadata from the cache (memcached backend). If not found, then |
141 | * attempt to load compiled metadata from the replica. If that fails, recompile the metadata |
142 | * and either save to DB at end of request (if in a POST context) or add a job to the queue |
143 | * to save to the DB at a later point in time. |
144 | * |
145 | * @return array $metadata: key (page Ids) => value (metadata) pairs |
146 | */ |
147 | public function getMetadata() { |
148 | // @TODO: inject this from somewhere |
149 | $wasPosted = RequestContext::getMain()->getRequest()->wasPosted(); |
150 | |
151 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
152 | $metadataByKey = $cache->getMultiWithUnionSetCallback( |
153 | $cache->makeMultiKeys( |
154 | $this->pageIds, |
155 | static function ( $pageId ) use ( $cache ) { |
156 | return $cache->makeKey( self::KEY_COLLECTION, $pageId ); |
157 | } |
158 | ), |
159 | $cache::TTL_DAY, |
160 | function ( array $pageIds, array &$ttls, array &$setOpts ) use ( $wasPosted ) { |
161 | $dbr = PageTriageUtil::getReplicaConnection(); |
162 | |
163 | $setOpts += Database::getCacheSetOptions( $dbr ); |
164 | |
165 | // Grab metadata from database after cache attempt |
166 | $metadataByPageId = self::getMetadataForArticles( $pageIds ); |
167 | $pageIdsCompile = self::getPagesWithoutMetadata( $pageIds, $metadataByPageId ); |
168 | // Compile the denormalized metadata for pages that still don't have it |
169 | if ( $pageIdsCompile ) { |
170 | $acp = ArticleCompileProcessor::newFromPageId( |
171 | $pageIdsCompile, |
172 | // skip validation |
173 | false, |
174 | DB_REPLICA |
175 | ); |
176 | if ( $acp ) { |
177 | // Update the DB in a POSTSEND deferred update if the context is that |
178 | // of an HTTP POST request. Otherwise, enqueue a job to update the DB. |
179 | $mode = $wasPosted ? $acp::SAVE_DEFERRED : $acp::SAVE_JOB; |
180 | $metadataByPageId += $acp->compileMetadata( $mode ); |
181 | } |
182 | } |
183 | |
184 | $placeholderMetadata = array_fill_keys( array_keys( self::getValidTags() ), '' ); |
185 | |
186 | foreach ( $metadataByPageId as &$metadata ) { |
187 | $metadata += $placeholderMetadata; |
188 | } |
189 | |
190 | foreach ( $pageIds as $_ => $pageId ) { |
191 | if ( !isset( $metadataByPageId[ $pageId ] ) ) { |
192 | LoggerFactory::getInstance( 'PageTriage' ) |
193 | ->warning( 'Expected metadata to be cached for page ID {pageId}, but no metadata found.', |
194 | [ 'pageId' => $pageId ] ); |
195 | // Set an uncacheable value so that WANObjectCache doesn't break (T303092). |
196 | $metadataByPageId[ $pageId ] = false; |
197 | } |
198 | } |
199 | |
200 | return $metadataByPageId; |
201 | }, |
202 | [ 'version' => PageTriage::CACHE_VERSION ] |
203 | ); |
204 | return $cache->multiRemap( $this->pageIds, $metadataByKey ); |
205 | } |
206 | |
207 | /** |
208 | * Get the pages without metadata yet |
209 | * @param int[] $articles |
210 | * @param array[] $data |
211 | * @return array |
212 | */ |
213 | private static function getPagesWithoutMetadata( array $articles, array $data ) { |
214 | foreach ( $articles as $key => $pageId ) { |
215 | if ( isset( $data[$pageId] ) ) { |
216 | unset( $articles[$key] ); |
217 | } |
218 | } |
219 | return $articles; |
220 | } |
221 | |
222 | /** |
223 | * Return a complete list of metadata tag names and IDs in the pagetriage_tags table |
224 | * @return string[] Map of tag name to tag ID |
225 | */ |
226 | public static function getValidTags() { |
227 | $fname = __METHOD__; |
228 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
229 | |
230 | return $cache->getWithSetCallback( |
231 | $cache->makeKey( 'pagetriage-valid-tags' ), |
232 | 2 * $cache::TTL_DAY, |
233 | static function ( $oldValue, &$ttl, &$setOpts ) use ( $fname ) { |
234 | $dbr = PageTriageUtil::getReplicaConnection(); |
235 | $setOpts += Database::getCacheSetOptions( $dbr ); |
236 | |
237 | $res = $dbr->newSelectQueryBuilder() |
238 | ->select( [ 'ptrt_tag_id', 'ptrt_tag_name' ] ) |
239 | ->from( 'pagetriage_tags' ) |
240 | ->caller( $fname ) |
241 | ->fetchResultSet(); |
242 | |
243 | $tags = []; |
244 | foreach ( $res as $row ) { |
245 | $tags[$row->ptrt_tag_name] = $row->ptrt_tag_id; |
246 | } |
247 | |
248 | // Only set to cache if the result from db is not empty |
249 | if ( !$tags ) { |
250 | $ttl = WANObjectCache::TTL_UNCACHEABLE; |
251 | } |
252 | |
253 | return $tags; |
254 | }, |
255 | [ 'version' => PageTriage::CACHE_VERSION ] |
256 | ); |
257 | } |
258 | |
259 | /** |
260 | * Used to clear the cache between tests. |
261 | */ |
262 | public static function clearStaticCache() { |
263 | self::$cache = []; |
264 | } |
265 | |
266 | /** |
267 | * Typecast the value in page id array to int and verify that it's |
268 | * in page triage queue |
269 | * @param int[] $pageIds List of page IDs. |
270 | * @param int $validateDb const DB_PRIMARY/DB_REPLICA |
271 | * @return int[] The valid page IDs. |
272 | */ |
273 | public static function validatePageIds( array $pageIds, $validateDb = DB_PRIMARY ) { |
274 | $cleanUp = []; |
275 | foreach ( $pageIds as $key => $val ) { |
276 | $casted = (int)$val; |
277 | if ( $casted ) { |
278 | if ( isset( self::$cache[$casted] ) ) { |
279 | if ( self::$cache[$casted] ) { |
280 | $cleanUp[] = $casted; |
281 | } |
282 | unset( $pageIds[$key] ); |
283 | } else { |
284 | $pageIds[$key] = $casted; |
285 | self::$cache[$casted] = false; |
286 | } |
287 | } else { |
288 | unset( $pageIds[$key] ); |
289 | } |
290 | } |
291 | |
292 | if ( $pageIds ) { |
293 | if ( $validateDb == DB_PRIMARY ) { |
294 | $db = PageTriageUtil::getPrimaryConnection(); |
295 | } else { |
296 | $db = PageTriageUtil::getReplicaConnection(); |
297 | } |
298 | |
299 | $res = $db->newSelectQueryBuilder() |
300 | ->select( [ 'ptrp_page_id' ] ) |
301 | ->from( 'pagetriage_page' ) |
302 | ->where( [ 'ptrp_page_id' => $pageIds ] ) |
303 | ->caller( __METHOD__ ) |
304 | ->fetchResultSet(); |
305 | |
306 | foreach ( $res as $row ) { |
307 | $cleanUp[] = $row->ptrp_page_id; |
308 | self::$cache[$row->ptrp_page_id] = true; |
309 | } |
310 | } |
311 | |
312 | return array_unique( $cleanUp ); |
313 | } |
314 | |
315 | /** |
316 | * Check if required metadata generated by ArticleMetadata#getMetadata is set. |
317 | * |
318 | * This is intended to help prevent the UI from breaking if metadata compilation fails. |
319 | * |
320 | * @param array $metadata |
321 | * @return bool |
322 | */ |
323 | public static function isValidMetadata( array $metadata ) { |
324 | $required_populated_fields = [ 'user_name', 'title' ]; |
325 | foreach ( $required_populated_fields as $field ) { |
326 | if ( !isset( $metadata[$field] ) || $metadata[$field] === '' ) { |
327 | LoggerFactory::getInstance( 'PageTriage' )->debug( 'Incomplete metadata for page.', |
328 | [ 'metadata' => json_encode( $metadata ) ] ); |
329 | return false; |
330 | } |
331 | } |
332 | return true; |
333 | } |
334 | |
335 | } |