Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
26.94% |
59 / 219 |
|
35.29% |
6 / 17 |
CRAP | |
0.00% |
0 / 1 |
PageAssessmentsDAO | |
26.94% |
59 / 219 |
|
35.29% |
6 / 17 |
1234.65 | |
0.00% |
0 / 1 |
getReplicaDBConnection | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getPrimaryDBConnection | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
doUpdates | |
0.00% |
0 / 57 |
|
0.00% |
0 / 1 |
462 | |||
updateSearchIndex | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
56 | |||
importanceToWeight | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
getProjectName | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 | |||
extractParentProjectId | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
getProjectId | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
insertProject | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
6 | |||
cleanProjectTitle | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
updateRecord | |
100.00% |
23 / 23 |
|
100.00% |
1 / 1 |
4 | |||
insertRecord | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
getAllProjects | |
92.31% |
12 / 13 |
|
0.00% |
0 / 1 |
3.00 | |||
getAllAssessments | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
6 | |||
deleteRecord | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
1 | |||
deleteRecordsForPage | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
cacheAssessment | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | /** |
4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. |
8 | * |
9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU General Public License along |
15 | * with this program; if not, write to the Free Software Foundation, Inc., |
16 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
17 | * http://www.gnu.org/copyleft/gpl.html |
18 | * |
19 | * PageAssessments extension body |
20 | * |
21 | * @file |
22 | * @ingroup Extensions |
23 | */ |
24 | |
25 | namespace MediaWiki\Extension\PageAssessments; |
26 | |
27 | use CirrusSearch\WeightedTagsUpdater; |
28 | use MediaWiki\MediaWikiServices; |
29 | use MediaWiki\Parser\Parser; |
30 | use MediaWiki\Registration\ExtensionRegistry; |
31 | use MediaWiki\Title\Title; |
32 | use Wikimedia\Rdbms\IDatabase; |
33 | use Wikimedia\Rdbms\IDBAccessObject; |
34 | use Wikimedia\Rdbms\IReadableDatabase; |
35 | |
36 | class PageAssessmentsDAO { |
37 | |
38 | /** @var array Instance cache associating project IDs with project names */ |
39 | protected static $projectNames = []; |
40 | |
41 | private static function getReplicaDBConnection(): IReadableDatabase { |
42 | return MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase(); |
43 | } |
44 | |
45 | private static function getPrimaryDBConnection(): IDatabase { |
46 | return MediaWikiServices::getInstance()->getConnectionProvider()->getPrimaryDatabase(); |
47 | } |
48 | |
49 | /** |
50 | * Driver function that handles updating assessment data in database |
51 | * @param Title $titleObj Title object of the subject page |
52 | * @param array $assessmentData Data for all assessments compiled |
53 | * @param mixed|null $ticket Transaction ticket |
54 | */ |
55 | public static function doUpdates( $titleObj, $assessmentData, $ticket = null ) { |
56 | global $wgUpdateRowsPerQuery, $wgPageAssessmentsSubprojects; |
57 | |
58 | $dbProvider = MediaWikiServices::getInstance()->getConnectionProvider(); |
59 | $ticket = $ticket ?: $dbProvider->getEmptyTransactionTicket( __METHOD__ ); |
60 | |
61 | $changed = false; |
62 | $pageId = $titleObj->getArticleID(); |
63 | $revisionId = $titleObj->getLatestRevID(); |
64 | // Compile a list of projects found in the parserData to find out which |
65 | // assessment records need to be inserted, deleted, or updated. |
66 | $projects = []; |
67 | foreach ( $assessmentData as $key => $parserData ) { |
68 | // If the name of the project is set... |
69 | if ( isset( $parserData[0] ) && $parserData[0] !== '' ) { |
70 | // Clean the project name. |
71 | $projectName = self::cleanProjectTitle( $parserData[0] ); |
72 | // Replace the original project name with the cleaned project |
73 | // name in the assessment data, since we'll need it to match later. |
74 | $assessmentData[$key][0] = $projectName; |
75 | // Get the corresponding ID from page_assessments_projects table. |
76 | $projectId = self::getProjectId( $projectName ); |
77 | // If there is no existing project by that name, add it to the table. |
78 | if ( $projectId === false ) { |
79 | if ( $wgPageAssessmentsSubprojects ) { |
80 | // Extract possible parent from the project name. |
81 | $parentId = self::extractParentProjectId( $projectName ); |
82 | // Insert project data into the database table. |
83 | $projectId = self::insertProject( $projectName, $parentId ); |
84 | } else { |
85 | $projectId = self::insertProject( $projectName ); |
86 | } |
87 | } |
88 | // Add the project's ID to the array. |
89 | $projects[$projectName] = $projectId; |
90 | } |
91 | } |
92 | // Get a list of all the projects previously assigned to the page. |
93 | $projectsInDb = self::getAllProjects( $pageId, IDBAccessObject::READ_LATEST ); |
94 | |
95 | $toInsert = array_diff( $projects, $projectsInDb ); |
96 | $toDelete = array_diff( $projectsInDb, $projects ); |
97 | $toUpdate = array_intersect( $projects, $projectsInDb ); |
98 | |
99 | $i = 0; |
100 | |
101 | // Add and update assessment records to the database |
102 | foreach ( $assessmentData as $parserData ) { |
103 | // Make sure the name of the project is set. |
104 | if ( !isset( $parserData[0] ) || $parserData[0] == '' ) { |
105 | continue; |
106 | } |
107 | $projectId = $projects[$parserData[0]]; |
108 | if ( $projectId && $pageId ) { |
109 | $class = $parserData[1]; |
110 | $importance = $parserData[2]; |
111 | $values = [ |
112 | 'pa_page_id' => $pageId, |
113 | 'pa_project_id' => $projectId, |
114 | 'pa_class' => $class, |
115 | 'pa_importance' => $importance, |
116 | 'pa_page_revision' => $revisionId |
117 | ]; |
118 | if ( in_array( $projectId, $toInsert ) ) { |
119 | self::insertRecord( $values ); |
120 | $changed = true; |
121 | } elseif ( in_array( $projectId, $toUpdate ) ) { |
122 | if ( self::updateRecord( $values ) ) { |
123 | $changed = true; |
124 | } |
125 | } |
126 | // Check for database lag if there's a huge number of assessments |
127 | if ( $i > 0 && $i % $wgUpdateRowsPerQuery == 0 ) { |
128 | $dbProvider->commitAndWaitForReplication( __METHOD__, $ticket ); |
129 | } |
130 | $i++; |
131 | } |
132 | } |
133 | |
134 | // Delete records from the database |
135 | foreach ( $toDelete as $project ) { |
136 | $values = [ |
137 | 'pa_page_id' => $pageId, |
138 | 'pa_project_id' => $project |
139 | ]; |
140 | self::deleteRecord( $values ); |
141 | $changed = true; |
142 | // Check for database lag if there's a huge number of deleted assessments |
143 | if ( $i > 0 && $i % $wgUpdateRowsPerQuery == 0 ) { |
144 | $dbProvider->commitAndWaitForReplication( __METHOD__, $ticket ); |
145 | } |
146 | $i++; |
147 | } |
148 | |
149 | if ( $changed ) { |
150 | self::updateSearchIndex( $titleObj, $assessmentData ); |
151 | } |
152 | } |
153 | |
154 | /** |
155 | * Update projects in the CirrusSearch index. |
156 | * |
157 | * @param Title $titleObj |
158 | * @param array $assessmentData |
159 | */ |
160 | public static function updateSearchIndex( Title $titleObj, array $assessmentData ) { |
161 | if ( !ExtensionRegistry::getInstance()->isLoaded( 'CirrusSearch' ) ) { |
162 | return; |
163 | } |
164 | /** @var WeightedTagsUpdater $updater */ |
165 | $updater = MediaWikiServices::getInstance()->getService( WeightedTagsUpdater::SERVICE ); |
166 | $tags = []; |
167 | foreach ( $assessmentData as $parserData ) { |
168 | if ( !isset( $parserData[0] ) || $parserData[0] == '' || str_contains( $parserData[0], '|' ) ) { |
169 | // Ignore empty or invalid project names. Pipe character is not allowed in weighted_tags. |
170 | continue; |
171 | } |
172 | // Name already cleaned above in doUpdates() |
173 | $name = $parserData[0]; |
174 | $weight = self::importanceToWeight( $parserData[ 2 ] ); |
175 | $tags[ $name ] = $weight; |
176 | } |
177 | |
178 | if ( $tags === [] ) { |
179 | $updater->resetWeightedTags( |
180 | $titleObj->toPageIdentity(), |
181 | [ 'ext.pageassessments.project' ], |
182 | 'page-assessment-update' |
183 | ); |
184 | } else { |
185 | $updater->updateWeightedTags( |
186 | $titleObj->toPageIdentity(), |
187 | 'ext.pageassessments.project', |
188 | $tags, |
189 | 'page-assessment-update' |
190 | ); |
191 | } |
192 | } |
193 | |
194 | private static function importanceToWeight( string $importance ): int { |
195 | // TODO: Read from local JSON page in MediaWiki namespace? |
196 | $importanceMap = [ |
197 | 'top' => 100, |
198 | 'high' => 80, |
199 | 'mid' => 60, |
200 | 'low' => 40, |
201 | // Consider unknown as low-importance |
202 | 'unknown' => 40, |
203 | 'na' => 10 |
204 | ]; |
205 | return $importanceMap[ strtolower( $importance ) ] ?? 10; |
206 | } |
207 | |
208 | /** |
209 | * Get name for the given wikiproject |
210 | * @param int $projectId The ID of the project |
211 | * @return string|false The name of the project or false if not found |
212 | */ |
213 | public static function getProjectName( $projectId ) { |
214 | // Check for a valid project ID |
215 | if ( $projectId > 0 ) { |
216 | // See if the project name is already in the instance cache |
217 | if ( isset( self::$projectNames[$projectId] ) ) { |
218 | return self::$projectNames[$projectId]; |
219 | } else { |
220 | $dbr = self::getReplicaDBConnection(); |
221 | $projectName = $dbr->newSelectQueryBuilder() |
222 | ->select( 'pap_project_title' ) |
223 | ->from( 'page_assessments_projects' ) |
224 | ->where( [ 'pap_project_id' => $projectId ] ) |
225 | ->caller( __METHOD__ ) |
226 | ->fetchField(); |
227 | // Store the project name in instance cache |
228 | self::$projectNames[$projectId] = $projectName; |
229 | return $projectName; |
230 | } |
231 | } |
232 | return false; |
233 | } |
234 | |
235 | /** |
236 | * Extract parent from a project name and return the ID. For example, if the |
237 | * project name is "Novels/Crime task force", the parent will be "Novels", |
238 | * i.e. WikiProject Novels. |
239 | * |
240 | * @param string $projectName Project title |
241 | * @return int|false project ID or false if not found |
242 | */ |
243 | protected static function extractParentProjectId( $projectName ) { |
244 | $projectNameParts = explode( '/', $projectName ); |
245 | if ( count( $projectNameParts ) > 1 && $projectNameParts[0] !== '' ) { |
246 | return self::getProjectId( $projectNameParts[0] ); |
247 | } |
248 | return false; |
249 | } |
250 | |
251 | /** |
252 | * Get project ID for a given wikiproject title |
253 | * @param string $project Project title |
254 | * @return int|false project ID or false if not found |
255 | */ |
256 | public static function getProjectId( $project ) { |
257 | $dbr = self::getReplicaDBConnection(); |
258 | return $dbr->newSelectQueryBuilder() |
259 | ->select( 'pap_project_id' ) |
260 | ->from( 'page_assessments_projects' ) |
261 | ->where( [ 'pap_project_title' => $project ] ) |
262 | ->caller( __METHOD__ ) |
263 | ->fetchField(); |
264 | } |
265 | |
266 | /** |
267 | * Insert a new wikiproject into the projects table |
268 | * @param string $project Wikiproject title |
269 | * @param int|null $parentId ID of the parent project (for subprojects) (optional) |
270 | * @return int Insert Id for new project |
271 | */ |
272 | public static function insertProject( $project, $parentId = null ) { |
273 | $dbw = self::getPrimaryDBConnection(); |
274 | $values = [ 'pap_project_title' => $project ]; |
275 | if ( $parentId ) { |
276 | $values[ 'pap_parent_id' ] = (int)$parentId; |
277 | } |
278 | $dbw->newInsertQueryBuilder() |
279 | ->insertInto( 'page_assessments_projects' ) |
280 | // Use ignore() in case two projects with the same name are added at once. |
281 | // This normally shouldn't happen, but is possible perhaps from clicking |
282 | // 'Publish changes' twice in very quick succession. (See T286671) |
283 | ->ignore() |
284 | ->row( $values ) |
285 | ->caller( __METHOD__ ) |
286 | ->execute(); |
287 | $id = $dbw->insertId(); |
288 | return $id; |
289 | } |
290 | |
291 | /** |
292 | * Clean up the title of the project (or subproject) |
293 | * |
294 | * Since the project title comes from a template parameter, it can basically |
295 | * be anything. This function accounts for common cases where editors put |
296 | * extra stuff into the parameter besides just the name of the project. |
297 | * @param string $project WikiProject title |
298 | * @return string Cleaned-up WikiProject title |
299 | */ |
300 | public static function cleanProjectTitle( $project ) { |
301 | // Remove any bold formatting. |
302 | $project = str_replace( "'''", "", $project ); |
303 | // Remove "the" prefix for subprojects (common on English Wikipedia). |
304 | // This is case-sensitive on purpose, as there are some legitimate |
305 | // subproject titles starting with "The", e.g. "The Canterbury Tales". |
306 | $project = str_replace( "/the ", "/", $project ); |
307 | // Truncate to 255 characters to avoid DB warnings. |
308 | return substr( $project, 0, 255 ); |
309 | } |
310 | |
311 | /** |
312 | * Update record in DB if there are new values |
313 | * @param array $values New values to be entered into the DB |
314 | * @return bool true if an update was performed false otherwise |
315 | */ |
316 | public static function updateRecord( $values ) { |
317 | $dbr = self::getReplicaDBConnection(); |
318 | $conds = [ |
319 | 'pa_page_id' => $values['pa_page_id'], |
320 | 'pa_project_id' => $values['pa_project_id'] |
321 | ]; |
322 | // Check if there are no updates to be done |
323 | $record = $dbr->newSelectQueryBuilder() |
324 | ->select( [ 'pa_class', 'pa_importance', 'pa_project_id', 'pa_page_id' ] ) |
325 | ->from( 'page_assessments' ) |
326 | ->where( $conds ) |
327 | ->caller( __METHOD__ ) |
328 | ->fetchResultSet(); |
329 | foreach ( $record as $row ) { |
330 | if ( $row->pa_importance == $values['pa_importance'] && |
331 | $row->pa_class == $values['pa_class'] |
332 | ) { |
333 | // Return if no update is needed |
334 | return false; |
335 | } |
336 | } |
337 | // Make updates if there are changes |
338 | $dbw = self::getPrimaryDBConnection(); |
339 | $dbw->newUpdateQueryBuilder() |
340 | ->update( 'page_assessments' ) |
341 | ->set( $values ) |
342 | ->where( $conds ) |
343 | ->caller( __METHOD__ ) |
344 | ->execute(); |
345 | return true; |
346 | } |
347 | |
348 | /** |
349 | * Insert a new record in DB |
350 | * @param array $values New values to be entered into the DB |
351 | * @return bool true |
352 | */ |
353 | public static function insertRecord( $values ) { |
354 | $dbw = self::getPrimaryDBConnection(); |
355 | // Use IGNORE in case 2 records for the same project are added at once. |
356 | // This normally shouldn't happen, but is possible. (See T152080) |
357 | $dbw->newInsertQueryBuilder() |
358 | ->insertInto( 'page_assessments' ) |
359 | ->ignore() |
360 | ->row( $values ) |
361 | ->caller( __METHOD__ ) |
362 | ->execute(); |
363 | return true; |
364 | } |
365 | |
366 | /** |
367 | * Get all projects associated with a given page (as project IDs) |
368 | * @param int $pageId Page ID |
369 | * @param int $flags IDBAccessObject::READ_* constant. This can be used to |
370 | * force reading from the primary database. See docs at IDBAccessObject.php. |
371 | * @return array $results All projects associated with given page |
372 | */ |
373 | public static function getAllProjects( $pageId, $flags = IDBAccessObject::READ_NORMAL ) { |
374 | if ( ( $flags & IDBAccessObject::READ_LATEST ) == IDBAccessObject::READ_LATEST ) { |
375 | $db = self::getPrimaryDBConnection(); |
376 | } else { |
377 | $db = self::getReplicaDBConnection(); |
378 | } |
379 | $res = $db->newSelectQueryBuilder() |
380 | ->select( 'pa_project_id' ) |
381 | ->from( 'page_assessments' ) |
382 | ->where( [ 'pa_page_id' => $pageId ] ) |
383 | ->recency( $flags ) |
384 | ->caller( __METHOD__ )->fetchResultSet(); |
385 | $results = []; |
386 | foreach ( $res as $row ) { |
387 | $results[] = $row->pa_project_id; |
388 | } |
389 | return $results; |
390 | } |
391 | |
392 | /** |
393 | * Get all assessment data associated with the given page |
394 | * |
395 | * @param int $pageId Page ID |
396 | * @return array $results All projects names and assessments associated with the given page |
397 | */ |
398 | public static function getAllAssessments( int $pageId ): array { |
399 | $db = self::getReplicaDBConnection(); |
400 | $res = $db->newSelectQueryBuilder() |
401 | ->select( [ 'pap_project_title', 'pa_class', 'pa_importance' ] ) |
402 | ->from( 'page_assessments' ) |
403 | ->join( 'page_assessments_projects', null, [ 'pap_project_id = pa_project_id' ] ) |
404 | ->where( [ 'pa_page_id' => $pageId ] ) |
405 | ->caller( __METHOD__ ) |
406 | ->fetchResultSet(); |
407 | |
408 | $results = []; |
409 | foreach ( $res as $row ) { |
410 | $results[] = [ |
411 | 'name' => $row->pap_project_title, |
412 | 'class' => $row->pa_class, |
413 | 'importance' => $row->pa_importance |
414 | ]; |
415 | } |
416 | return $results; |
417 | } |
418 | |
419 | /** |
420 | * Delete a record from DB |
421 | * @param array $values Conditions for looking up records to delete |
422 | * @return bool true |
423 | */ |
424 | public static function deleteRecord( $values ) { |
425 | $dbw = self::getPrimaryDBConnection(); |
426 | $conds = [ |
427 | 'pa_page_id' => $values['pa_page_id'], |
428 | 'pa_project_id' => $values['pa_project_id'] |
429 | ]; |
430 | $dbw->newDeleteQueryBuilder() |
431 | ->deleteFrom( 'page_assessments' ) |
432 | ->where( $conds ) |
433 | ->caller( __METHOD__ ) |
434 | ->execute(); |
435 | return true; |
436 | } |
437 | |
438 | /** |
439 | * Delete all records for a given page when page is deleted |
440 | * Note: We don't take care of undeletions explicitly, the records are restored |
441 | * when the page is parsed again. |
442 | * @param int $id Page ID of deleted page |
443 | * @return bool true |
444 | */ |
445 | public static function deleteRecordsForPage( $id ) { |
446 | $dbw = self::getPrimaryDBConnection(); |
447 | $conds = [ |
448 | 'pa_page_id' => $id, |
449 | ]; |
450 | $dbw->newDeleteQueryBuilder() |
451 | ->deleteFrom( 'page_assessments' ) |
452 | ->where( $conds ) |
453 | ->caller( __METHOD__ ) |
454 | ->execute(); |
455 | return true; |
456 | } |
457 | |
458 | /** |
459 | * Function called on parser init |
460 | * @param Parser $parser Parser object |
461 | * @param string $project Wikiproject name |
462 | * @param string $class Class of article |
463 | * @param string $importance Importance of article |
464 | */ |
465 | public static function cacheAssessment( |
466 | Parser $parser, |
467 | $project = '', |
468 | $class = '', |
469 | $importance = '' |
470 | ) { |
471 | $parserData = $parser->getOutput()->getExtensionData( 'ext-pageassessment-assessmentdata' ); |
472 | $values = [ $project, $class, $importance ]; |
473 | if ( $parserData == null ) { |
474 | $parserData = []; |
475 | } |
476 | $parserData[] = $values; |
477 | $parser->getOutput()->setExtensionData( 'ext-pageassessment-assessmentdata', $parserData ); |
478 | } |
479 | |
480 | } |