Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
26.94% |
59 / 219 |
|
35.29% |
6 / 17 |
CRAP | |
0.00% |
0 / 1 |
| PageAssessmentsDAO | |
26.94% |
59 / 219 |
|
35.29% |
6 / 17 |
1234.65 | |
0.00% |
0 / 1 |
| getReplicaDBConnection | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getPrimaryDBConnection | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| doUpdates | |
0.00% |
0 / 57 |
|
0.00% |
0 / 1 |
462 | |||
| updateSearchIndex | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
56 | |||
| importanceToWeight | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
| getProjectName | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 | |||
| extractParentProjectId | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
| getProjectId | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
| insertProject | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
6 | |||
| cleanProjectTitle | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| updateRecord | |
100.00% |
23 / 23 |
|
100.00% |
1 / 1 |
4 | |||
| insertRecord | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
| getAllProjects | |
92.31% |
12 / 13 |
|
0.00% |
0 / 1 |
3.00 | |||
| getAllAssessments | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
6 | |||
| deleteRecord | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
1 | |||
| deleteRecordsForPage | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
| cacheAssessment | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
| 1 | <?php |
| 2 | |
| 3 | /** |
| 4 | * This program is free software; you can redistribute it and/or modify |
| 5 | * it under the terms of the GNU General Public License as published by |
| 6 | * the Free Software Foundation; either version 2 of the License, or |
| 7 | * (at your option) any later version. |
| 8 | * |
| 9 | * This program is distributed in the hope that it will be useful, |
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | * GNU General Public License for more details. |
| 13 | * |
| 14 | * You should have received a copy of the GNU General Public License along |
| 15 | * with this program; if not, write to the Free Software Foundation, Inc., |
| 16 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 17 | * http://www.gnu.org/copyleft/gpl.html |
| 18 | * |
| 19 | * PageAssessments extension body |
| 20 | * |
| 21 | * @file |
| 22 | * @ingroup Extensions |
| 23 | */ |
| 24 | |
| 25 | namespace MediaWiki\Extension\PageAssessments; |
| 26 | |
| 27 | use CirrusSearch\WeightedTagsUpdater; |
| 28 | use MediaWiki\MediaWikiServices; |
| 29 | use MediaWiki\Parser\Parser; |
| 30 | use MediaWiki\Registration\ExtensionRegistry; |
| 31 | use MediaWiki\Title\Title; |
| 32 | use Wikimedia\Rdbms\IDatabase; |
| 33 | use Wikimedia\Rdbms\IDBAccessObject; |
| 34 | use Wikimedia\Rdbms\IReadableDatabase; |
| 35 | |
| 36 | class PageAssessmentsDAO { |
| 37 | |
| 38 | /** @var array Instance cache associating project IDs with project names */ |
| 39 | protected static $projectNames = []; |
| 40 | |
| 41 | private static function getReplicaDBConnection(): IReadableDatabase { |
| 42 | return MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase(); |
| 43 | } |
| 44 | |
| 45 | private static function getPrimaryDBConnection(): IDatabase { |
| 46 | return MediaWikiServices::getInstance()->getConnectionProvider()->getPrimaryDatabase(); |
| 47 | } |
| 48 | |
| 49 | /** |
| 50 | * Driver function that handles updating assessment data in database |
| 51 | * @param Title $titleObj Title object of the subject page |
| 52 | * @param array $assessmentData Data for all assessments compiled |
| 53 | * @param mixed|null $ticket Transaction ticket |
| 54 | */ |
| 55 | public static function doUpdates( $titleObj, $assessmentData, $ticket = null ) { |
| 56 | global $wgUpdateRowsPerQuery, $wgPageAssessmentsSubprojects; |
| 57 | |
| 58 | $dbProvider = MediaWikiServices::getInstance()->getConnectionProvider(); |
| 59 | $ticket = $ticket ?: $dbProvider->getEmptyTransactionTicket( __METHOD__ ); |
| 60 | |
| 61 | $changed = false; |
| 62 | $pageId = $titleObj->getArticleID(); |
| 63 | $revisionId = $titleObj->getLatestRevID(); |
| 64 | // Compile a list of projects found in the parserData to find out which |
| 65 | // assessment records need to be inserted, deleted, or updated. |
| 66 | $projects = []; |
| 67 | foreach ( $assessmentData as $key => $parserData ) { |
| 68 | // If the name of the project is set... |
| 69 | if ( isset( $parserData[0] ) && $parserData[0] !== '' ) { |
| 70 | // Clean the project name. |
| 71 | $projectName = self::cleanProjectTitle( $parserData[0] ); |
| 72 | // Replace the original project name with the cleaned project |
| 73 | // name in the assessment data, since we'll need it to match later. |
| 74 | $assessmentData[$key][0] = $projectName; |
| 75 | // Get the corresponding ID from page_assessments_projects table. |
| 76 | $projectId = self::getProjectId( $projectName ); |
| 77 | // If there is no existing project by that name, add it to the table. |
| 78 | if ( $projectId === false ) { |
| 79 | if ( $wgPageAssessmentsSubprojects ) { |
| 80 | // Extract possible parent from the project name. |
| 81 | $parentId = self::extractParentProjectId( $projectName ); |
| 82 | // Insert project data into the database table. |
| 83 | $projectId = self::insertProject( $projectName, $parentId ); |
| 84 | } else { |
| 85 | $projectId = self::insertProject( $projectName ); |
| 86 | } |
| 87 | } |
| 88 | // Add the project's ID to the array. |
| 89 | $projects[$projectName] = $projectId; |
| 90 | } |
| 91 | } |
| 92 | // Get a list of all the projects previously assigned to the page. |
| 93 | $projectsInDb = self::getAllProjects( $pageId, IDBAccessObject::READ_LATEST ); |
| 94 | |
| 95 | $toInsert = array_diff( $projects, $projectsInDb ); |
| 96 | $toDelete = array_diff( $projectsInDb, $projects ); |
| 97 | $toUpdate = array_intersect( $projects, $projectsInDb ); |
| 98 | |
| 99 | $i = 0; |
| 100 | |
| 101 | // Add and update assessment records to the database |
| 102 | foreach ( $assessmentData as $parserData ) { |
| 103 | // Make sure the name of the project is set. |
| 104 | if ( !isset( $parserData[0] ) || $parserData[0] == '' ) { |
| 105 | continue; |
| 106 | } |
| 107 | $projectId = $projects[$parserData[0]]; |
| 108 | if ( $projectId && $pageId ) { |
| 109 | $class = $parserData[1]; |
| 110 | $importance = $parserData[2]; |
| 111 | $values = [ |
| 112 | 'pa_page_id' => $pageId, |
| 113 | 'pa_project_id' => $projectId, |
| 114 | 'pa_class' => $class, |
| 115 | 'pa_importance' => $importance, |
| 116 | 'pa_page_revision' => $revisionId |
| 117 | ]; |
| 118 | if ( in_array( $projectId, $toInsert ) ) { |
| 119 | self::insertRecord( $values ); |
| 120 | $changed = true; |
| 121 | } elseif ( in_array( $projectId, $toUpdate ) ) { |
| 122 | if ( self::updateRecord( $values ) ) { |
| 123 | $changed = true; |
| 124 | } |
| 125 | } |
| 126 | // Check for database lag if there's a huge number of assessments |
| 127 | if ( $i > 0 && $i % $wgUpdateRowsPerQuery == 0 ) { |
| 128 | $dbProvider->commitAndWaitForReplication( __METHOD__, $ticket ); |
| 129 | } |
| 130 | $i++; |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | // Delete records from the database |
| 135 | foreach ( $toDelete as $project ) { |
| 136 | $values = [ |
| 137 | 'pa_page_id' => $pageId, |
| 138 | 'pa_project_id' => $project |
| 139 | ]; |
| 140 | self::deleteRecord( $values ); |
| 141 | $changed = true; |
| 142 | // Check for database lag if there's a huge number of deleted assessments |
| 143 | if ( $i > 0 && $i % $wgUpdateRowsPerQuery == 0 ) { |
| 144 | $dbProvider->commitAndWaitForReplication( __METHOD__, $ticket ); |
| 145 | } |
| 146 | $i++; |
| 147 | } |
| 148 | |
| 149 | if ( $changed ) { |
| 150 | self::updateSearchIndex( $titleObj, $assessmentData ); |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | /** |
| 155 | * Update projects in the CirrusSearch index. |
| 156 | * |
| 157 | * @param Title $titleObj |
| 158 | * @param array $assessmentData |
| 159 | */ |
| 160 | public static function updateSearchIndex( Title $titleObj, array $assessmentData ) { |
| 161 | if ( !ExtensionRegistry::getInstance()->isLoaded( 'CirrusSearch' ) ) { |
| 162 | return; |
| 163 | } |
| 164 | /** @var WeightedTagsUpdater $updater */ |
| 165 | $updater = MediaWikiServices::getInstance()->getService( WeightedTagsUpdater::SERVICE ); |
| 166 | $tags = []; |
| 167 | foreach ( $assessmentData as $parserData ) { |
| 168 | if ( !isset( $parserData[0] ) || $parserData[0] == '' || str_contains( $parserData[0], '|' ) ) { |
| 169 | // Ignore empty or invalid project names. Pipe character is not allowed in weighted_tags. |
| 170 | continue; |
| 171 | } |
| 172 | // Name already cleaned above in doUpdates() |
| 173 | $name = $parserData[0]; |
| 174 | $weight = self::importanceToWeight( $parserData[ 2 ] ); |
| 175 | $tags[ $name ] = $weight; |
| 176 | } |
| 177 | |
| 178 | if ( $tags === [] ) { |
| 179 | $updater->resetWeightedTags( |
| 180 | $titleObj->toPageIdentity(), |
| 181 | [ 'ext.pageassessments.project' ], |
| 182 | 'page-assessment-update' |
| 183 | ); |
| 184 | } else { |
| 185 | $updater->updateWeightedTags( |
| 186 | $titleObj->toPageIdentity(), |
| 187 | 'ext.pageassessments.project', |
| 188 | $tags, |
| 189 | 'page-assessment-update' |
| 190 | ); |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | private static function importanceToWeight( string $importance ): int { |
| 195 | // TODO: Read from local JSON page in MediaWiki namespace? |
| 196 | $importanceMap = [ |
| 197 | 'top' => 100, |
| 198 | 'high' => 80, |
| 199 | 'mid' => 60, |
| 200 | 'low' => 40, |
| 201 | // Consider unknown as low-importance |
| 202 | 'unknown' => 40, |
| 203 | 'na' => 10 |
| 204 | ]; |
| 205 | return $importanceMap[ strtolower( $importance ) ] ?? 10; |
| 206 | } |
| 207 | |
| 208 | /** |
| 209 | * Get name for the given wikiproject |
| 210 | * @param int $projectId The ID of the project |
| 211 | * @return string|false The name of the project or false if not found |
| 212 | */ |
| 213 | public static function getProjectName( $projectId ) { |
| 214 | // Check for a valid project ID |
| 215 | if ( $projectId > 0 ) { |
| 216 | // See if the project name is already in the instance cache |
| 217 | if ( isset( self::$projectNames[$projectId] ) ) { |
| 218 | return self::$projectNames[$projectId]; |
| 219 | } else { |
| 220 | $dbr = self::getReplicaDBConnection(); |
| 221 | $projectName = $dbr->newSelectQueryBuilder() |
| 222 | ->select( 'pap_project_title' ) |
| 223 | ->from( 'page_assessments_projects' ) |
| 224 | ->where( [ 'pap_project_id' => $projectId ] ) |
| 225 | ->caller( __METHOD__ ) |
| 226 | ->fetchField(); |
| 227 | // Store the project name in instance cache |
| 228 | self::$projectNames[$projectId] = $projectName; |
| 229 | return $projectName; |
| 230 | } |
| 231 | } |
| 232 | return false; |
| 233 | } |
| 234 | |
| 235 | /** |
| 236 | * Extract parent from a project name and return the ID. For example, if the |
| 237 | * project name is "Novels/Crime task force", the parent will be "Novels", |
| 238 | * i.e. WikiProject Novels. |
| 239 | * |
| 240 | * @param string $projectName Project title |
| 241 | * @return int|false project ID or false if not found |
| 242 | */ |
| 243 | protected static function extractParentProjectId( $projectName ) { |
| 244 | $projectNameParts = explode( '/', $projectName ); |
| 245 | if ( count( $projectNameParts ) > 1 && $projectNameParts[0] !== '' ) { |
| 246 | return self::getProjectId( $projectNameParts[0] ); |
| 247 | } |
| 248 | return false; |
| 249 | } |
| 250 | |
| 251 | /** |
| 252 | * Get project ID for a given wikiproject title |
| 253 | * @param string $project Project title |
| 254 | * @return int|false project ID or false if not found |
| 255 | */ |
| 256 | public static function getProjectId( $project ) { |
| 257 | $dbr = self::getReplicaDBConnection(); |
| 258 | return $dbr->newSelectQueryBuilder() |
| 259 | ->select( 'pap_project_id' ) |
| 260 | ->from( 'page_assessments_projects' ) |
| 261 | ->where( [ 'pap_project_title' => $project ] ) |
| 262 | ->caller( __METHOD__ ) |
| 263 | ->fetchField(); |
| 264 | } |
| 265 | |
| 266 | /** |
| 267 | * Insert a new wikiproject into the projects table |
| 268 | * @param string $project Wikiproject title |
| 269 | * @param int|null $parentId ID of the parent project (for subprojects) (optional) |
| 270 | * @return int Insert Id for new project |
| 271 | */ |
| 272 | public static function insertProject( $project, $parentId = null ) { |
| 273 | $dbw = self::getPrimaryDBConnection(); |
| 274 | $values = [ 'pap_project_title' => $project ]; |
| 275 | if ( $parentId ) { |
| 276 | $values[ 'pap_parent_id' ] = (int)$parentId; |
| 277 | } |
| 278 | $dbw->newInsertQueryBuilder() |
| 279 | ->insertInto( 'page_assessments_projects' ) |
| 280 | // Use ignore() in case two projects with the same name are added at once. |
| 281 | // This normally shouldn't happen, but is possible perhaps from clicking |
| 282 | // 'Publish changes' twice in very quick succession. (See T286671) |
| 283 | ->ignore() |
| 284 | ->row( $values ) |
| 285 | ->caller( __METHOD__ ) |
| 286 | ->execute(); |
| 287 | $id = $dbw->insertId(); |
| 288 | return $id; |
| 289 | } |
| 290 | |
| 291 | /** |
| 292 | * Clean up the title of the project (or subproject) |
| 293 | * |
| 294 | * Since the project title comes from a template parameter, it can basically |
| 295 | * be anything. This function accounts for common cases where editors put |
| 296 | * extra stuff into the parameter besides just the name of the project. |
| 297 | * @param string $project WikiProject title |
| 298 | * @return string Cleaned-up WikiProject title |
| 299 | */ |
| 300 | public static function cleanProjectTitle( $project ) { |
| 301 | // Remove any bold formatting. |
| 302 | $project = str_replace( "'''", "", $project ); |
| 303 | // Remove "the" prefix for subprojects (common on English Wikipedia). |
| 304 | // This is case-sensitive on purpose, as there are some legitimate |
| 305 | // subproject titles starting with "The", e.g. "The Canterbury Tales". |
| 306 | $project = str_replace( "/the ", "/", $project ); |
| 307 | // Truncate to 255 characters to avoid DB warnings. |
| 308 | return substr( $project, 0, 255 ); |
| 309 | } |
| 310 | |
| 311 | /** |
| 312 | * Update record in DB if there are new values |
| 313 | * @param array $values New values to be entered into the DB |
| 314 | * @return bool true if an update was performed false otherwise |
| 315 | */ |
| 316 | public static function updateRecord( $values ) { |
| 317 | $dbr = self::getReplicaDBConnection(); |
| 318 | $conds = [ |
| 319 | 'pa_page_id' => $values['pa_page_id'], |
| 320 | 'pa_project_id' => $values['pa_project_id'] |
| 321 | ]; |
| 322 | // Check if there are no updates to be done |
| 323 | $record = $dbr->newSelectQueryBuilder() |
| 324 | ->select( [ 'pa_class', 'pa_importance', 'pa_project_id', 'pa_page_id' ] ) |
| 325 | ->from( 'page_assessments' ) |
| 326 | ->where( $conds ) |
| 327 | ->caller( __METHOD__ ) |
| 328 | ->fetchResultSet(); |
| 329 | foreach ( $record as $row ) { |
| 330 | if ( $row->pa_importance == $values['pa_importance'] && |
| 331 | $row->pa_class == $values['pa_class'] |
| 332 | ) { |
| 333 | // Return if no update is needed |
| 334 | return false; |
| 335 | } |
| 336 | } |
| 337 | // Make updates if there are changes |
| 338 | $dbw = self::getPrimaryDBConnection(); |
| 339 | $dbw->newUpdateQueryBuilder() |
| 340 | ->update( 'page_assessments' ) |
| 341 | ->set( $values ) |
| 342 | ->where( $conds ) |
| 343 | ->caller( __METHOD__ ) |
| 344 | ->execute(); |
| 345 | return true; |
| 346 | } |
| 347 | |
| 348 | /** |
| 349 | * Insert a new record in DB |
| 350 | * @param array $values New values to be entered into the DB |
| 351 | * @return bool true |
| 352 | */ |
| 353 | public static function insertRecord( $values ) { |
| 354 | $dbw = self::getPrimaryDBConnection(); |
| 355 | // Use IGNORE in case 2 records for the same project are added at once. |
| 356 | // This normally shouldn't happen, but is possible. (See T152080) |
| 357 | $dbw->newInsertQueryBuilder() |
| 358 | ->insertInto( 'page_assessments' ) |
| 359 | ->ignore() |
| 360 | ->row( $values ) |
| 361 | ->caller( __METHOD__ ) |
| 362 | ->execute(); |
| 363 | return true; |
| 364 | } |
| 365 | |
| 366 | /** |
| 367 | * Get all projects associated with a given page (as project IDs) |
| 368 | * @param int $pageId Page ID |
| 369 | * @param int $flags IDBAccessObject::READ_* constant. This can be used to |
| 370 | * force reading from the primary database. See docs at IDBAccessObject.php. |
| 371 | * @return array $results All projects associated with given page |
| 372 | */ |
| 373 | public static function getAllProjects( $pageId, $flags = IDBAccessObject::READ_NORMAL ) { |
| 374 | if ( ( $flags & IDBAccessObject::READ_LATEST ) == IDBAccessObject::READ_LATEST ) { |
| 375 | $db = self::getPrimaryDBConnection(); |
| 376 | } else { |
| 377 | $db = self::getReplicaDBConnection(); |
| 378 | } |
| 379 | $res = $db->newSelectQueryBuilder() |
| 380 | ->select( 'pa_project_id' ) |
| 381 | ->from( 'page_assessments' ) |
| 382 | ->where( [ 'pa_page_id' => $pageId ] ) |
| 383 | ->recency( $flags ) |
| 384 | ->caller( __METHOD__ )->fetchResultSet(); |
| 385 | $results = []; |
| 386 | foreach ( $res as $row ) { |
| 387 | $results[] = $row->pa_project_id; |
| 388 | } |
| 389 | return $results; |
| 390 | } |
| 391 | |
| 392 | /** |
| 393 | * Get all assessment data associated with the given page |
| 394 | * |
| 395 | * @param int $pageId Page ID |
| 396 | * @return array $results All projects names and assessments associated with the given page |
| 397 | */ |
| 398 | public static function getAllAssessments( int $pageId ): array { |
| 399 | $db = self::getReplicaDBConnection(); |
| 400 | $res = $db->newSelectQueryBuilder() |
| 401 | ->select( [ 'pap_project_title', 'pa_class', 'pa_importance' ] ) |
| 402 | ->from( 'page_assessments' ) |
| 403 | ->join( 'page_assessments_projects', null, [ 'pap_project_id = pa_project_id' ] ) |
| 404 | ->where( [ 'pa_page_id' => $pageId ] ) |
| 405 | ->caller( __METHOD__ ) |
| 406 | ->fetchResultSet(); |
| 407 | |
| 408 | $results = []; |
| 409 | foreach ( $res as $row ) { |
| 410 | $results[] = [ |
| 411 | 'name' => $row->pap_project_title, |
| 412 | 'class' => $row->pa_class, |
| 413 | 'importance' => $row->pa_importance |
| 414 | ]; |
| 415 | } |
| 416 | return $results; |
| 417 | } |
| 418 | |
| 419 | /** |
| 420 | * Delete a record from DB |
| 421 | * @param array $values Conditions for looking up records to delete |
| 422 | * @return bool true |
| 423 | */ |
| 424 | public static function deleteRecord( $values ) { |
| 425 | $dbw = self::getPrimaryDBConnection(); |
| 426 | $conds = [ |
| 427 | 'pa_page_id' => $values['pa_page_id'], |
| 428 | 'pa_project_id' => $values['pa_project_id'] |
| 429 | ]; |
| 430 | $dbw->newDeleteQueryBuilder() |
| 431 | ->deleteFrom( 'page_assessments' ) |
| 432 | ->where( $conds ) |
| 433 | ->caller( __METHOD__ ) |
| 434 | ->execute(); |
| 435 | return true; |
| 436 | } |
| 437 | |
| 438 | /** |
| 439 | * Delete all records for a given page when page is deleted |
| 440 | * Note: We don't take care of undeletions explicitly, the records are restored |
| 441 | * when the page is parsed again. |
| 442 | * @param int $id Page ID of deleted page |
| 443 | * @return bool true |
| 444 | */ |
| 445 | public static function deleteRecordsForPage( $id ) { |
| 446 | $dbw = self::getPrimaryDBConnection(); |
| 447 | $conds = [ |
| 448 | 'pa_page_id' => $id, |
| 449 | ]; |
| 450 | $dbw->newDeleteQueryBuilder() |
| 451 | ->deleteFrom( 'page_assessments' ) |
| 452 | ->where( $conds ) |
| 453 | ->caller( __METHOD__ ) |
| 454 | ->execute(); |
| 455 | return true; |
| 456 | } |
| 457 | |
| 458 | /** |
| 459 | * Function called on parser init |
| 460 | * @param Parser $parser Parser object |
| 461 | * @param string $project Wikiproject name |
| 462 | * @param string $class Class of article |
| 463 | * @param string $importance Importance of article |
| 464 | */ |
| 465 | public static function cacheAssessment( |
| 466 | Parser $parser, |
| 467 | $project = '', |
| 468 | $class = '', |
| 469 | $importance = '' |
| 470 | ) { |
| 471 | $parserData = $parser->getOutput()->getExtensionData( 'ext-pageassessment-assessmentdata' ); |
| 472 | $values = [ $project, $class, $importance ]; |
| 473 | if ( $parserData == null ) { |
| 474 | $parserData = []; |
| 475 | } |
| 476 | $parserData[] = $values; |
| 477 | $parser->getOutput()->setExtensionData( 'ext-pageassessment-assessmentdata', $parserData ); |
| 478 | } |
| 479 | |
| 480 | } |