Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
85.71% covered (warning)
85.71%
132 / 154
50.00% covered (danger)
50.00%
5 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
ArticleMetadata
85.71% covered (warning)
85.71%
132 / 154
50.00% covered (danger)
50.00%
5 / 10
40.99
0.00% covered (danger)
0.00%
0 / 1
 __construct
66.67% covered (warning)
66.67%
2 / 3
0.00% covered (danger)
0.00%
0 / 1
2.15
 deleteMetadata
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
2
 flushMetadataFromCache
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 getMetadataForArticles
100.00% covered (success)
100.00%
40 / 40
100.00% covered (success)
100.00%
1 / 1
4
 getMetadata
68.42% covered (warning)
68.42%
26 / 38
0.00% covered (danger)
0.00%
0 / 1
8.54
 getPagesWithoutMetadata
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
3
 getValidTags
95.45% covered (success)
95.45%
21 / 22
0.00% covered (danger)
0.00%
0 / 1
3
 clearStaticCache
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 validatePageIds
96.00% covered (success)
96.00%
24 / 25
0.00% covered (danger)
0.00%
0 / 1
8
 isValidMetadata
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
20
1<?php
2
3namespace MediaWiki\Extension\PageTriage;
4
5use MediaWiki\Context\RequestContext;
6use MediaWiki\Extension\PageTriage\ArticleCompile\ArticleCompileProcessor;
7use MediaWiki\Logger\LoggerFactory;
8use MediaWiki\MediaWikiServices;
9use MediaWiki\Title\Title;
10use WANObjectCache;
11use Wikimedia\Rdbms\Database;
12
13/**
14 * Handles article metadata retrieval and saving to cache
15 */
16class ArticleMetadata {
17    /** @var int[] List of page IDs */
18    protected $pageIds;
19
20    /**
21     * @var array Page IDs that are known to exist in the queue
22     */
23    private static $cache = [];
24
25    /** @var string */
26    private const KEY_COLLECTION = 'pagetriage-article-metadata';
27
28    /**
29     * @param int[] $pageIds List of page IDs.
30     * @param bool $validated whether the page ids have been validated
31     * @param int $validateDb const DB_PRIMARY/DB_REPLICA
32     */
33    public function __construct( array $pageIds, $validated = true, $validateDb = DB_PRIMARY ) {
34        if ( $validated ) {
35            $this->pageIds = $pageIds;
36        } else {
37            $this->pageIds = self::validatePageIds( $pageIds, $validateDb );
38        }
39    }
40
41    /**
42     * Delete all the metadata for an article
43     *
44     * @return bool
45     */
46    public function deleteMetadata() {
47        if ( $this->pageIds ) {
48            $dbw = PageTriageUtil::getPrimaryConnection();
49            $dbw->newDeleteQueryBuilder()
50                ->deleteFrom( 'pagetriage_page_tags' )
51                ->where( [ 'ptrpt_page_id' => $this->pageIds ] )
52                ->caller( __METHOD__ )
53                ->execute();
54            // also remove it from the cache
55            $this->flushMetadataFromCache();
56        }
57
58        return true;
59    }
60
61    /**
62     * Flush the metadata in cache
63     * @param int|null $pageId page id to be flushed, if null is provided, all
64     *  page id in $this->mPageId will be flushed
65     */
66    public function flushMetadataFromCache( $pageId = null ) {
67        $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
68
69        $pageIdsPurge = ( $pageId === null ) ? $this->pageIds : [ $pageId ];
70        foreach ( $pageIdsPurge as $pageIdPurge ) {
71            $cache->delete( $cache->makeKey( self::KEY_COLLECTION, $pageIdPurge ) );
72            // For Hooks::isNewEnoughToNoIndex
73            $cache->delete( $cache->makeKey( 'pagetriage-page-created', $pageIdPurge ) );
74        }
75    }
76
77    /**
78     * Get metadata from the replica for an array of article IDs.
79     *
80     * @param int[] $pageIds
81     * @return array[] Map of (page ID => article metadata)
82     */
83    public static function getMetadataForArticles( array $pageIds ) {
84        $dbr = PageTriageUtil::getReplicaConnection();
85
86        $res = $dbr->newSelectQueryBuilder()
87            ->select( [
88                'ptrpt_page_id',
89                'ptrt_tag_name',
90                'ptrpt_value',
91                'ptrp_reviewed',
92                'ptrp_created',
93                'page_title',
94                'page_namespace',
95                'page_is_redirect',
96                'ptrp_last_reviewed_by',
97                'ptrp_reviewed_updated',
98                'reviewer' => 'user_name'
99            ] )
100            ->from( 'pagetriage_page_tags' )
101            ->join( 'pagetriage_tags', null, 'ptrpt_tag_id = ptrt_tag_id' )
102            ->join( 'pagetriage_page', null, 'ptrpt_page_id = ptrp_page_id' )
103            ->join( 'page', null, 'page_id = ptrp_page_id' )
104            ->leftJoin( 'user', 'user', 'user_id = ptrp_last_reviewed_by' )
105            ->where( [ 'ptrpt_page_id' => $pageIds ] )
106            ->caller( __METHOD__ )
107            ->fetchResultSet();
108
109        $pageData = [];
110        // One row per tag per page. So 2 pages with 3 tags each will generate 6 rows.
111        foreach ( $res as $row ) {
112            // Set the tag
113            $pageData[$row->ptrpt_page_id][$row->ptrt_tag_name] = $row->ptrpt_value;
114
115            // If not set yet, add some other basic page data too
116            if ( !isset( $pageData[$row->ptrpt_page_id]['creation_date'] ) ) {
117                $pageData[$row->ptrpt_page_id]['creation_date'] = wfTimestamp( TS_MW, $row->ptrp_created );
118                // The patrol_status has 4 possible values:
119                // 0 = unreviewed, 1 = reviewed, 2 = patrolled, 3 = autopatrolled
120                $pageData[$row->ptrpt_page_id]['patrol_status'] = $row->ptrp_reviewed;
121                $pageData[$row->ptrpt_page_id]['is_redirect'] = $row->page_is_redirect;
122                $pageData[$row->ptrpt_page_id]['ptrp_last_reviewed_by'] = $row->ptrp_last_reviewed_by;
123                $pageData[$row->ptrpt_page_id]['ptrp_reviewed_updated'] = wfTimestamp(
124                    TS_MW,
125                    $row->ptrp_reviewed_updated
126                );
127                $pageData[$row->ptrpt_page_id]['reviewer'] = $row->reviewer;
128                $title = Title::makeTitle( $row->page_namespace, $row->page_title );
129                if ( $title ) {
130                    $pageData[$row->ptrpt_page_id]['title'] = $title->getPrefixedText();
131                }
132            }
133        }
134        return $pageData;
135    }
136
137    /**
138     * Get the metadata for a single or list of articles.
139     *
140     * First attempt to load metadata from the cache (memcached backend). If not found, then
141     * attempt to load compiled metadata from the replica. If that fails, recompile the metadata
142     * and either save to DB at end of request (if in a POST context) or add a job to the queue
143     * to save to the DB at a later point in time.
144     *
145     * @return array $metadata: key (page Ids) => value (metadata) pairs
146     */
147    public function getMetadata() {
148        // @TODO: inject this from somewhere
149        $wasPosted = RequestContext::getMain()->getRequest()->wasPosted();
150
151        $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
152        $metadataByKey = $cache->getMultiWithUnionSetCallback(
153            $cache->makeMultiKeys(
154                $this->pageIds,
155                static function ( $pageId ) use ( $cache ) {
156                    return $cache->makeKey( self::KEY_COLLECTION, $pageId );
157                }
158            ),
159            $cache::TTL_DAY,
160            function ( array $pageIds, array &$ttls, array &$setOpts ) use ( $wasPosted ) {
161                $dbr = PageTriageUtil::getReplicaConnection();
162
163                $setOpts += Database::getCacheSetOptions( $dbr );
164
165                // Grab metadata from database after cache attempt
166                $metadataByPageId = self::getMetadataForArticles( $pageIds );
167                $pageIdsCompile = self::getPagesWithoutMetadata( $pageIds, $metadataByPageId );
168                // Compile the denormalized metadata for pages that still don't have it
169                if ( $pageIdsCompile ) {
170                    $acp = ArticleCompileProcessor::newFromPageId(
171                        $pageIdsCompile,
172                        // skip validation
173                        false,
174                        DB_REPLICA
175                    );
176                    if ( $acp ) {
177                        // Update the DB in a POSTSEND deferred update if the context is that
178                        // of an HTTP POST request. Otherwise, enqueue a job to update the DB.
179                        $mode = $wasPosted ? $acp::SAVE_DEFERRED : $acp::SAVE_JOB;
180                        $metadataByPageId += $acp->compileMetadata( $mode );
181                    }
182                }
183
184                $placeholderMetadata = array_fill_keys( array_keys( self::getValidTags() ), '' );
185
186                foreach ( $metadataByPageId as &$metadata ) {
187                    $metadata += $placeholderMetadata;
188                }
189
190                foreach ( $pageIds as $_ => $pageId ) {
191                    if ( !isset( $metadataByPageId[ $pageId ] ) ) {
192                        LoggerFactory::getInstance( 'PageTriage' )
193                            ->warning( 'Expected metadata to be cached for page ID {pageId}, but no metadata found.',
194                                [ 'pageId' => $pageId ] );
195                        // Set an uncacheable value so that WANObjectCache doesn't break (T303092).
196                        $metadataByPageId[ $pageId ] = false;
197                    }
198                }
199
200                return $metadataByPageId;
201            },
202            [ 'version' => PageTriage::CACHE_VERSION ]
203        );
204        return $cache->multiRemap( $this->pageIds, $metadataByKey );
205    }
206
207    /**
208     * Get the pages without metadata yet
209     * @param int[] $articles
210     * @param array[] $data
211     * @return array
212     */
213    private static function getPagesWithoutMetadata( array $articles, array $data ) {
214        foreach ( $articles as $key => $pageId ) {
215            if ( isset( $data[$pageId] ) ) {
216                unset( $articles[$key] );
217            }
218        }
219        return $articles;
220    }
221
222    /**
223     * Return a complete list of metadata tag names and IDs in the pagetriage_tags table
224     * @return string[] Map of tag name to tag ID
225     */
226    public static function getValidTags() {
227        $fname = __METHOD__;
228        $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
229
230        return $cache->getWithSetCallback(
231            $cache->makeKey( 'pagetriage-valid-tags' ),
232            2 * $cache::TTL_DAY,
233            static function ( $oldValue, &$ttl, &$setOpts ) use ( $fname ) {
234                $dbr = PageTriageUtil::getReplicaConnection();
235                $setOpts += Database::getCacheSetOptions( $dbr );
236
237                $res = $dbr->newSelectQueryBuilder()
238                    ->select( [ 'ptrt_tag_id', 'ptrt_tag_name' ] )
239                    ->from( 'pagetriage_tags' )
240                    ->caller( $fname )
241                    ->fetchResultSet();
242
243                $tags = [];
244                foreach ( $res as $row ) {
245                    $tags[$row->ptrt_tag_name] = $row->ptrt_tag_id;
246                }
247
248                // Only set to cache if the result from db is not empty
249                if ( !$tags ) {
250                    $ttl = WANObjectCache::TTL_UNCACHEABLE;
251                }
252
253                return $tags;
254            },
255            [ 'version' => PageTriage::CACHE_VERSION ]
256        );
257    }
258
259    /**
260     * Used to clear the cache between tests.
261     */
262    public static function clearStaticCache() {
263        self::$cache = [];
264    }
265
266    /**
267     * Typecast the value in page id array to int and verify that it's
268     * in page triage queue
269     * @param int[] $pageIds List of page IDs.
270     * @param int $validateDb const DB_PRIMARY/DB_REPLICA
271     * @return int[] The valid page IDs.
272     */
273    public static function validatePageIds( array $pageIds, $validateDb = DB_PRIMARY ) {
274        $cleanUp = [];
275        foreach ( $pageIds as $key => $val ) {
276            $casted = (int)$val;
277            if ( $casted ) {
278                if ( isset( self::$cache[$casted] ) ) {
279                    if ( self::$cache[$casted] ) {
280                        $cleanUp[] = $casted;
281                    }
282                    unset( $pageIds[$key] );
283                } else {
284                    $pageIds[$key] = $casted;
285                    self::$cache[$casted] = false;
286                }
287            } else {
288                unset( $pageIds[$key] );
289            }
290        }
291
292        if ( $pageIds ) {
293            if ( $validateDb == DB_PRIMARY ) {
294                $db = PageTriageUtil::getPrimaryConnection();
295            } else {
296                $db = PageTriageUtil::getReplicaConnection();
297            }
298
299            $res = $db->newSelectQueryBuilder()
300                ->select( [ 'ptrp_page_id' ] )
301                ->from( 'pagetriage_page' )
302                ->where( [ 'ptrp_page_id' => $pageIds ] )
303                ->caller( __METHOD__ )
304                ->fetchResultSet();
305
306            foreach ( $res as $row ) {
307                $cleanUp[] = $row->ptrp_page_id;
308                self::$cache[$row->ptrp_page_id] = true;
309            }
310        }
311
312        return array_unique( $cleanUp );
313    }
314
315    /**
316     * Check if required metadata generated by ArticleMetadata#getMetadata is set.
317     *
318     * This is intended to help prevent the UI from breaking if metadata compilation fails.
319     *
320     * @param array $metadata
321     * @return bool
322     */
323    public static function isValidMetadata( array $metadata ) {
324        $required_populated_fields = [ 'user_name', 'title' ];
325        foreach ( $required_populated_fields as $field ) {
326            if ( !isset( $metadata[$field] ) || $metadata[$field] === '' ) {
327                LoggerFactory::getInstance( 'PageTriage' )->debug( 'Incomplete metadata for page.',
328                    [ 'metadata' => json_encode( $metadata ) ] );
329                return false;
330            }
331        }
332        return true;
333    }
334
335}