Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
85.71% |
132 / 154 |
|
50.00% |
5 / 10 |
CRAP | |
0.00% |
0 / 1 |
| ArticleMetadata | |
85.71% |
132 / 154 |
|
50.00% |
5 / 10 |
40.99 | |
0.00% |
0 / 1 |
| __construct | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
| deleteMetadata | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
| flushMetadataFromCache | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
| getMetadataForArticles | |
100.00% |
40 / 40 |
|
100.00% |
1 / 1 |
4 | |||
| getMetadata | |
68.42% |
26 / 38 |
|
0.00% |
0 / 1 |
8.54 | |||
| getPagesWithoutMetadata | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
| getValidTags | |
95.45% |
21 / 22 |
|
0.00% |
0 / 1 |
3 | |||
| clearStaticCache | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| validatePageIds | |
96.00% |
24 / 25 |
|
0.00% |
0 / 1 |
8 | |||
| isValidMetadata | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Extension\PageTriage; |
| 4 | |
| 5 | use MediaWiki\Context\RequestContext; |
| 6 | use MediaWiki\Extension\PageTriage\ArticleCompile\ArticleCompileProcessor; |
| 7 | use MediaWiki\Logger\LoggerFactory; |
| 8 | use MediaWiki\MediaWikiServices; |
| 9 | use MediaWiki\Title\Title; |
| 10 | use Wikimedia\ObjectCache\WANObjectCache; |
| 11 | use Wikimedia\Rdbms\Database; |
| 12 | |
| 13 | /** |
| 14 | * Handles article metadata retrieval and saving to cache |
| 15 | */ |
| 16 | class ArticleMetadata { |
| 17 | /** @var int[] List of page IDs */ |
| 18 | protected $pageIds; |
| 19 | |
| 20 | /** |
| 21 | * @var array Page IDs that are known to exist in the queue |
| 22 | */ |
| 23 | private static $cache = []; |
| 24 | |
| 25 | /** @var string */ |
| 26 | private const KEY_COLLECTION = 'pagetriage-article-metadata'; |
| 27 | |
| 28 | /** |
| 29 | * @param int[] $pageIds List of page IDs. |
| 30 | * @param bool $validated whether the page ids have been validated |
| 31 | * @param int $validateDb const DB_PRIMARY/DB_REPLICA |
| 32 | */ |
| 33 | public function __construct( array $pageIds, $validated = true, $validateDb = DB_PRIMARY ) { |
| 34 | if ( $validated ) { |
| 35 | $this->pageIds = $pageIds; |
| 36 | } else { |
| 37 | $this->pageIds = self::validatePageIds( $pageIds, $validateDb ); |
| 38 | } |
| 39 | } |
| 40 | |
| 41 | /** |
| 42 | * Delete all the metadata for an article |
| 43 | * |
| 44 | * @return bool |
| 45 | */ |
| 46 | public function deleteMetadata() { |
| 47 | if ( $this->pageIds ) { |
| 48 | $dbw = PageTriageUtil::getPrimaryConnection(); |
| 49 | $dbw->newDeleteQueryBuilder() |
| 50 | ->deleteFrom( 'pagetriage_page_tags' ) |
| 51 | ->where( [ 'ptrpt_page_id' => $this->pageIds ] ) |
| 52 | ->caller( __METHOD__ ) |
| 53 | ->execute(); |
| 54 | // also remove it from the cache |
| 55 | $this->flushMetadataFromCache(); |
| 56 | } |
| 57 | |
| 58 | return true; |
| 59 | } |
| 60 | |
| 61 | /** |
| 62 | * Flush the metadata in cache |
| 63 | * @param int|null $pageId page id to be flushed, if null is provided, all |
| 64 | * page id in $this->mPageId will be flushed |
| 65 | */ |
| 66 | public function flushMetadataFromCache( $pageId = null ) { |
| 67 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
| 68 | |
| 69 | $pageIdsPurge = ( $pageId === null ) ? $this->pageIds : [ $pageId ]; |
| 70 | foreach ( $pageIdsPurge as $pageIdPurge ) { |
| 71 | $cache->delete( $cache->makeKey( self::KEY_COLLECTION, $pageIdPurge ) ); |
| 72 | // For Hooks::isNewEnoughToNoIndex |
| 73 | $cache->delete( $cache->makeKey( 'pagetriage-page-created', $pageIdPurge ) ); |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | /** |
| 78 | * Get metadata from the replica for an array of article IDs. |
| 79 | * |
| 80 | * @param int[] $pageIds |
| 81 | * @return array[] Map of (page ID => article metadata) |
| 82 | */ |
| 83 | public static function getMetadataForArticles( array $pageIds ) { |
| 84 | $dbr = PageTriageUtil::getReplicaConnection(); |
| 85 | |
| 86 | $res = $dbr->newSelectQueryBuilder() |
| 87 | ->select( [ |
| 88 | 'ptrpt_page_id', |
| 89 | 'ptrt_tag_name', |
| 90 | 'ptrpt_value', |
| 91 | 'ptrp_reviewed', |
| 92 | 'ptrp_created', |
| 93 | 'page_title', |
| 94 | 'page_namespace', |
| 95 | 'page_is_redirect', |
| 96 | 'ptrp_last_reviewed_by', |
| 97 | 'ptrp_reviewed_updated', |
| 98 | 'reviewer' => 'user_name' |
| 99 | ] ) |
| 100 | ->from( 'pagetriage_page_tags' ) |
| 101 | ->join( 'pagetriage_tags', null, 'ptrpt_tag_id = ptrt_tag_id' ) |
| 102 | ->join( 'pagetriage_page', null, 'ptrpt_page_id = ptrp_page_id' ) |
| 103 | ->join( 'page', null, 'page_id = ptrp_page_id' ) |
| 104 | ->leftJoin( 'user', 'user', 'user_id = ptrp_last_reviewed_by' ) |
| 105 | ->where( [ 'ptrpt_page_id' => $pageIds ] ) |
| 106 | ->caller( __METHOD__ ) |
| 107 | ->fetchResultSet(); |
| 108 | |
| 109 | $pageData = []; |
| 110 | // One row per tag per page. So 2 pages with 3 tags each will generate 6 rows. |
| 111 | foreach ( $res as $row ) { |
| 112 | // Set the tag |
| 113 | $pageData[$row->ptrpt_page_id][$row->ptrt_tag_name] = $row->ptrpt_value; |
| 114 | |
| 115 | // If not set yet, add some other basic page data too |
| 116 | if ( !isset( $pageData[$row->ptrpt_page_id]['creation_date'] ) ) { |
| 117 | $pageData[$row->ptrpt_page_id]['creation_date'] = wfTimestamp( TS_MW, $row->ptrp_created ); |
| 118 | // The patrol_status has 4 possible values: |
| 119 | // 0 = unreviewed, 1 = reviewed, 2 = patrolled, 3 = autopatrolled |
| 120 | $pageData[$row->ptrpt_page_id]['patrol_status'] = $row->ptrp_reviewed; |
| 121 | $pageData[$row->ptrpt_page_id]['is_redirect'] = $row->page_is_redirect; |
| 122 | $pageData[$row->ptrpt_page_id]['ptrp_last_reviewed_by'] = $row->ptrp_last_reviewed_by; |
| 123 | $pageData[$row->ptrpt_page_id]['ptrp_reviewed_updated'] = wfTimestamp( |
| 124 | TS_MW, |
| 125 | $row->ptrp_reviewed_updated |
| 126 | ); |
| 127 | $pageData[$row->ptrpt_page_id]['reviewer'] = $row->reviewer; |
| 128 | $title = Title::makeTitle( $row->page_namespace, $row->page_title ); |
| 129 | if ( $title ) { |
| 130 | $pageData[$row->ptrpt_page_id]['title'] = $title->getPrefixedText(); |
| 131 | } |
| 132 | } |
| 133 | } |
| 134 | return $pageData; |
| 135 | } |
| 136 | |
| 137 | /** |
| 138 | * Get the metadata for a single or list of articles. |
| 139 | * |
| 140 | * First attempt to load metadata from the cache (memcached backend). If not found, then |
| 141 | * attempt to load compiled metadata from the replica. If that fails, recompile the metadata |
| 142 | * and either save to DB at end of request (if in a POST context) or add a job to the queue |
| 143 | * to save to the DB at a later point in time. |
| 144 | * |
| 145 | * @return array $metadata: key (page Ids) => value (metadata) pairs |
| 146 | */ |
| 147 | public function getMetadata() { |
| 148 | // @TODO: inject this from somewhere |
| 149 | $wasPosted = RequestContext::getMain()->getRequest()->wasPosted(); |
| 150 | |
| 151 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
| 152 | $metadataByKey = $cache->getMultiWithUnionSetCallback( |
| 153 | $cache->makeMultiKeys( |
| 154 | $this->pageIds, |
| 155 | static function ( $pageId ) use ( $cache ) { |
| 156 | return $cache->makeKey( self::KEY_COLLECTION, $pageId ); |
| 157 | } |
| 158 | ), |
| 159 | $cache::TTL_DAY, |
| 160 | function ( array $pageIds, array &$ttls, array &$setOpts ) use ( $wasPosted ) { |
| 161 | $dbr = PageTriageUtil::getReplicaConnection(); |
| 162 | |
| 163 | $setOpts += Database::getCacheSetOptions( $dbr ); |
| 164 | |
| 165 | // Grab metadata from database after cache attempt |
| 166 | $metadataByPageId = self::getMetadataForArticles( $pageIds ); |
| 167 | $pageIdsCompile = self::getPagesWithoutMetadata( $pageIds, $metadataByPageId ); |
| 168 | // Compile the denormalized metadata for pages that still don't have it |
| 169 | if ( $pageIdsCompile ) { |
| 170 | $acp = ArticleCompileProcessor::newFromPageId( |
| 171 | $pageIdsCompile, |
| 172 | // skip validation |
| 173 | false, |
| 174 | DB_REPLICA |
| 175 | ); |
| 176 | if ( $acp ) { |
| 177 | // Update the DB in a POSTSEND deferred update if the context is that |
| 178 | // of an HTTP POST request. Otherwise, enqueue a job to update the DB. |
| 179 | $mode = $wasPosted ? $acp::SAVE_DEFERRED : $acp::SAVE_JOB; |
| 180 | $metadataByPageId += $acp->compileMetadata( $mode ); |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | $placeholderMetadata = array_fill_keys( array_keys( self::getValidTags() ), '' ); |
| 185 | |
| 186 | foreach ( $metadataByPageId as &$metadata ) { |
| 187 | $metadata += $placeholderMetadata; |
| 188 | } |
| 189 | |
| 190 | foreach ( $pageIds as $_ => $pageId ) { |
| 191 | if ( !isset( $metadataByPageId[ $pageId ] ) ) { |
| 192 | LoggerFactory::getInstance( 'PageTriage' ) |
| 193 | ->warning( 'Expected metadata to be cached for page ID {pageId}, but no metadata found.', |
| 194 | [ 'pageId' => $pageId ] ); |
| 195 | // Set an uncacheable value so that WANObjectCache doesn't break (T303092). |
| 196 | $metadataByPageId[ $pageId ] = false; |
| 197 | } |
| 198 | } |
| 199 | |
| 200 | return $metadataByPageId; |
| 201 | }, |
| 202 | [ 'version' => PageTriage::CACHE_VERSION ] |
| 203 | ); |
| 204 | return $cache->multiRemap( $this->pageIds, $metadataByKey ); |
| 205 | } |
| 206 | |
| 207 | /** |
| 208 | * Get the pages without metadata yet |
| 209 | * @param int[] $articles |
| 210 | * @param array[] $data |
| 211 | * @return array |
| 212 | */ |
| 213 | private static function getPagesWithoutMetadata( array $articles, array $data ) { |
| 214 | foreach ( $articles as $key => $pageId ) { |
| 215 | if ( isset( $data[$pageId] ) ) { |
| 216 | unset( $articles[$key] ); |
| 217 | } |
| 218 | } |
| 219 | return $articles; |
| 220 | } |
| 221 | |
| 222 | /** |
| 223 | * Return a complete list of metadata tag names and IDs in the pagetriage_tags table |
| 224 | * @return string[] Map of tag name to tag ID |
| 225 | */ |
| 226 | public static function getValidTags() { |
| 227 | $fname = __METHOD__; |
| 228 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
| 229 | |
| 230 | return $cache->getWithSetCallback( |
| 231 | $cache->makeKey( 'pagetriage-valid-tags' ), |
| 232 | 2 * $cache::TTL_DAY, |
| 233 | static function ( $oldValue, &$ttl, &$setOpts ) use ( $fname ) { |
| 234 | $dbr = PageTriageUtil::getReplicaConnection(); |
| 235 | $setOpts += Database::getCacheSetOptions( $dbr ); |
| 236 | |
| 237 | $res = $dbr->newSelectQueryBuilder() |
| 238 | ->select( [ 'ptrt_tag_id', 'ptrt_tag_name' ] ) |
| 239 | ->from( 'pagetriage_tags' ) |
| 240 | ->caller( $fname ) |
| 241 | ->fetchResultSet(); |
| 242 | |
| 243 | $tags = []; |
| 244 | foreach ( $res as $row ) { |
| 245 | $tags[$row->ptrt_tag_name] = $row->ptrt_tag_id; |
| 246 | } |
| 247 | |
| 248 | // Only set to cache if the result from db is not empty |
| 249 | if ( !$tags ) { |
| 250 | $ttl = WANObjectCache::TTL_UNCACHEABLE; |
| 251 | } |
| 252 | |
| 253 | return $tags; |
| 254 | }, |
| 255 | [ 'version' => PageTriage::CACHE_VERSION ] |
| 256 | ); |
| 257 | } |
| 258 | |
| 259 | /** |
| 260 | * Used to clear the cache between tests. |
| 261 | */ |
| 262 | public static function clearStaticCache() { |
| 263 | self::$cache = []; |
| 264 | } |
| 265 | |
| 266 | /** |
| 267 | * Typecast the value in page id array to int and verify that it's |
| 268 | * in page triage queue |
| 269 | * @param int[] $pageIds List of page IDs. |
| 270 | * @param int $validateDb const DB_PRIMARY/DB_REPLICA |
| 271 | * @return int[] The valid page IDs. |
| 272 | */ |
| 273 | public static function validatePageIds( array $pageIds, $validateDb = DB_PRIMARY ) { |
| 274 | $cleanUp = []; |
| 275 | foreach ( $pageIds as $key => $val ) { |
| 276 | $casted = (int)$val; |
| 277 | if ( $casted ) { |
| 278 | if ( isset( self::$cache[$casted] ) ) { |
| 279 | if ( self::$cache[$casted] ) { |
| 280 | $cleanUp[] = $casted; |
| 281 | } |
| 282 | unset( $pageIds[$key] ); |
| 283 | } else { |
| 284 | $pageIds[$key] = $casted; |
| 285 | self::$cache[$casted] = false; |
| 286 | } |
| 287 | } else { |
| 288 | unset( $pageIds[$key] ); |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | if ( $pageIds ) { |
| 293 | if ( $validateDb == DB_PRIMARY ) { |
| 294 | $db = PageTriageUtil::getPrimaryConnection(); |
| 295 | } else { |
| 296 | $db = PageTriageUtil::getReplicaConnection(); |
| 297 | } |
| 298 | |
| 299 | $res = $db->newSelectQueryBuilder() |
| 300 | ->select( [ 'ptrp_page_id' ] ) |
| 301 | ->from( 'pagetriage_page' ) |
| 302 | ->where( [ 'ptrp_page_id' => $pageIds ] ) |
| 303 | ->caller( __METHOD__ ) |
| 304 | ->fetchResultSet(); |
| 305 | |
| 306 | foreach ( $res as $row ) { |
| 307 | $cleanUp[] = $row->ptrp_page_id; |
| 308 | self::$cache[$row->ptrp_page_id] = true; |
| 309 | } |
| 310 | } |
| 311 | |
| 312 | return array_unique( $cleanUp ); |
| 313 | } |
| 314 | |
| 315 | /** |
| 316 | * Check if required metadata generated by ArticleMetadata#getMetadata is set. |
| 317 | * |
| 318 | * This is intended to help prevent the UI from breaking if metadata compilation fails. |
| 319 | * |
| 320 | * @param array $metadata |
| 321 | * @return bool |
| 322 | */ |
| 323 | public static function isValidMetadata( array $metadata ) { |
| 324 | $required_populated_fields = [ 'user_name', 'title' ]; |
| 325 | foreach ( $required_populated_fields as $field ) { |
| 326 | if ( !isset( $metadata[$field] ) || $metadata[$field] === '' ) { |
| 327 | LoggerFactory::getInstance( 'PageTriage' )->debug( 'Incomplete metadata for page.', |
| 328 | [ 'metadata' => json_encode( $metadata ) ] ); |
| 329 | return false; |
| 330 | } |
| 331 | } |
| 332 | return true; |
| 333 | } |
| 334 | |
| 335 | } |