Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
29.29% |
58 / 198 |
|
31.25% |
5 / 16 |
CRAP | |
0.00% |
0 / 1 |
PageAssessmentsDAO | |
29.29% |
58 / 198 |
|
31.25% |
5 / 16 |
970.45 | |
0.00% |
0 / 1 |
getReplicaDBConnection | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getPrimaryDBConnection | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
doUpdates | |
0.00% |
0 / 52 |
|
0.00% |
0 / 1 |
380 | |||
updateSearchIndex | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
56 | |||
importanceToWeight | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
getProjectName | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 | |||
extractParentProjectId | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
getProjectId | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
insertProject | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
6 | |||
cleanProjectTitle | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
updateRecord | |
95.65% |
22 / 23 |
|
0.00% |
0 / 1 |
4 | |||
insertRecord | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
getAllProjects | |
92.31% |
12 / 13 |
|
0.00% |
0 / 1 |
3.00 | |||
deleteRecord | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
1 | |||
deleteRecordsForPage | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
cacheAssessment | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | /** |
4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. |
8 | * |
9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU General Public License along |
15 | * with this program; if not, write to the Free Software Foundation, Inc., |
16 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
17 | * http://www.gnu.org/copyleft/gpl.html |
18 | * |
19 | * PageAssessments extension body |
20 | * |
21 | * @file |
22 | * @ingroup Extensions |
23 | */ |
24 | |
25 | namespace MediaWiki\Extension\PageAssessments; |
26 | |
27 | use CirrusSearch\WeightedTagsUpdater; |
28 | use MediaWiki\MediaWikiServices; |
29 | use MediaWiki\Parser\Parser; |
30 | use MediaWiki\Registration\ExtensionRegistry; |
31 | use MediaWiki\Title\Title; |
32 | use Wikimedia\Rdbms\IDatabase; |
33 | use Wikimedia\Rdbms\IDBAccessObject; |
34 | use Wikimedia\Rdbms\IReadableDatabase; |
35 | |
36 | class PageAssessmentsDAO { |
37 | |
38 | /** @var array Instance cache associating project IDs with project names */ |
39 | protected static $projectNames = []; |
40 | |
41 | private static function getReplicaDBConnection(): IReadableDatabase { |
42 | return MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase(); |
43 | } |
44 | |
45 | private static function getPrimaryDBConnection(): IDatabase { |
46 | return MediaWikiServices::getInstance()->getConnectionProvider()->getPrimaryDatabase(); |
47 | } |
48 | |
49 | /** |
50 | * Driver function that handles updating assessment data in database |
51 | * @param Title $titleObj Title object of the subject page |
52 | * @param array $assessmentData Data for all assessments compiled |
53 | * @param mixed|null $ticket Transaction ticket |
54 | */ |
55 | public static function doUpdates( $titleObj, $assessmentData, $ticket = null ) { |
56 | global $wgUpdateRowsPerQuery, $wgPageAssessmentsSubprojects; |
57 | |
58 | $dbProvider = MediaWikiServices::getInstance()->getConnectionProvider(); |
59 | $ticket = $ticket ?: $dbProvider->getEmptyTransactionTicket( __METHOD__ ); |
60 | |
61 | $pageId = $titleObj->getArticleID(); |
62 | $revisionId = $titleObj->getLatestRevID(); |
63 | // Compile a list of projects found in the parserData to find out which |
64 | // assessment records need to be inserted, deleted, or updated. |
65 | $projects = []; |
66 | foreach ( $assessmentData as $key => $parserData ) { |
67 | // If the name of the project is set... |
68 | if ( isset( $parserData[0] ) && $parserData[0] !== '' ) { |
69 | // Clean the project name. |
70 | $projectName = self::cleanProjectTitle( $parserData[0] ); |
71 | // Replace the original project name with the cleaned project |
72 | // name in the assessment data, since we'll need it to match later. |
73 | $assessmentData[$key][0] = $projectName; |
74 | // Get the corresponding ID from page_assessments_projects table. |
75 | $projectId = self::getProjectId( $projectName ); |
76 | // If there is no existing project by that name, add it to the table. |
77 | if ( $projectId === false ) { |
78 | if ( $wgPageAssessmentsSubprojects ) { |
79 | // Extract possible parent from the project name. |
80 | $parentId = self::extractParentProjectId( $projectName ); |
81 | // Insert project data into the database table. |
82 | $projectId = self::insertProject( $projectName, $parentId ); |
83 | } else { |
84 | $projectId = self::insertProject( $projectName ); |
85 | } |
86 | } |
87 | // Add the project's ID to the array. |
88 | $projects[$projectName] = $projectId; |
89 | } |
90 | } |
91 | // Get a list of all the projects previously assigned to the page. |
92 | $projectsInDb = self::getAllProjects( $pageId, IDBAccessObject::READ_LATEST ); |
93 | |
94 | $toInsert = array_diff( $projects, $projectsInDb ); |
95 | $toDelete = array_diff( $projectsInDb, $projects ); |
96 | $toUpdate = array_intersect( $projects, $projectsInDb ); |
97 | |
98 | $i = 0; |
99 | |
100 | // Add and update assessment records to the database |
101 | foreach ( $assessmentData as $parserData ) { |
102 | // Make sure the name of the project is set. |
103 | if ( !isset( $parserData[0] ) || $parserData[0] == '' ) { |
104 | continue; |
105 | } |
106 | $projectId = $projects[$parserData[0]]; |
107 | if ( $projectId && $pageId ) { |
108 | $class = $parserData[1]; |
109 | $importance = $parserData[2]; |
110 | $values = [ |
111 | 'pa_page_id' => $pageId, |
112 | 'pa_project_id' => $projectId, |
113 | 'pa_class' => $class, |
114 | 'pa_importance' => $importance, |
115 | 'pa_page_revision' => $revisionId |
116 | ]; |
117 | if ( in_array( $projectId, $toInsert ) ) { |
118 | self::insertRecord( $values ); |
119 | } elseif ( in_array( $projectId, $toUpdate ) ) { |
120 | self::updateRecord( $values ); |
121 | } |
122 | // Check for database lag if there's a huge number of assessments |
123 | if ( $i > 0 && $i % $wgUpdateRowsPerQuery == 0 ) { |
124 | $dbProvider->commitAndWaitForReplication( __METHOD__, $ticket ); |
125 | } |
126 | $i++; |
127 | } |
128 | } |
129 | |
130 | // Delete records from the database |
131 | foreach ( $toDelete as $project ) { |
132 | $values = [ |
133 | 'pa_page_id' => $pageId, |
134 | 'pa_project_id' => $project |
135 | ]; |
136 | self::deleteRecord( $values ); |
137 | // Check for database lag if there's a huge number of deleted assessments |
138 | if ( $i > 0 && $i % $wgUpdateRowsPerQuery == 0 ) { |
139 | $dbProvider->commitAndWaitForReplication( __METHOD__, $ticket ); |
140 | } |
141 | $i++; |
142 | } |
143 | |
144 | // TODO: Do this only if any projects were actually changed |
145 | self::updateSearchIndex( $titleObj, $assessmentData ); |
146 | } |
147 | |
148 | /** |
149 | * Update projects in the CirrusSearch index. |
150 | * |
151 | * @param Title $titleObj |
152 | * @param array $assessmentData |
153 | */ |
154 | public static function updateSearchIndex( Title $titleObj, array $assessmentData ) { |
155 | if ( !ExtensionRegistry::getInstance()->isLoaded( 'CirrusSearch' ) ) { |
156 | return; |
157 | } |
158 | /** @var WeightedTagsUpdater $updater */ |
159 | $updater = MediaWikiServices::getInstance()->getService( WeightedTagsUpdater::SERVICE ); |
160 | $tags = []; |
161 | foreach ( $assessmentData as $parserData ) { |
162 | if ( !isset( $parserData[0] ) || $parserData[0] == '' || str_contains( $parserData[0], '|' ) ) { |
163 | // Ignore empty or invalid project names. Pipe character is not allowed in weighted_tags. |
164 | continue; |
165 | } |
166 | // Name already cleaned above in doUpdates() |
167 | $name = $parserData[0]; |
168 | $weight = self::importanceToWeight( $parserData[ 2 ] ); |
169 | $tags[ $name ] = $weight; |
170 | } |
171 | |
172 | if ( $tags === [] ) { |
173 | $updater->resetWeightedTags( |
174 | $titleObj->toPageIdentity(), |
175 | [ 'ext.pageassessments.project' ], |
176 | 'page-assessment-update' |
177 | ); |
178 | } else { |
179 | $updater->updateWeightedTags( |
180 | $titleObj->toPageIdentity(), |
181 | 'ext.pageassessments.project', |
182 | $tags, |
183 | 'page-assessment-update' |
184 | ); |
185 | } |
186 | } |
187 | |
188 | private static function importanceToWeight( string $importance ): int { |
189 | // TODO: Read from local JSON page in MediaWiki namespace? |
190 | $importanceMap = [ |
191 | 'top' => 100, |
192 | 'high' => 80, |
193 | 'mid' => 60, |
194 | 'low' => 40, |
195 | // Consider unknown as low-importance |
196 | 'unknown' => 40, |
197 | 'na' => 10 |
198 | ]; |
199 | return $importanceMap[ strtolower( $importance ) ] ?? 10; |
200 | } |
201 | |
202 | /** |
203 | * Get name for the given wikiproject |
204 | * @param int $projectId The ID of the project |
205 | * @return string|false The name of the project or false if not found |
206 | */ |
207 | public static function getProjectName( $projectId ) { |
208 | // Check for a valid project ID |
209 | if ( $projectId > 0 ) { |
210 | // See if the project name is already in the instance cache |
211 | if ( isset( self::$projectNames[$projectId] ) ) { |
212 | return self::$projectNames[$projectId]; |
213 | } else { |
214 | $dbr = self::getReplicaDBConnection(); |
215 | $projectName = $dbr->newSelectQueryBuilder() |
216 | ->select( 'pap_project_title' ) |
217 | ->from( 'page_assessments_projects' ) |
218 | ->where( [ 'pap_project_id' => $projectId ] ) |
219 | ->caller( __METHOD__ ) |
220 | ->fetchField(); |
221 | // Store the project name in instance cache |
222 | self::$projectNames[$projectId] = $projectName; |
223 | return $projectName; |
224 | } |
225 | } |
226 | return false; |
227 | } |
228 | |
229 | /** |
230 | * Extract parent from a project name and return the ID. For example, if the |
231 | * project name is "Novels/Crime task force", the parent will be "Novels", |
232 | * i.e. WikiProject Novels. |
233 | * |
234 | * @param string $projectName Project title |
235 | * @return int|false project ID or false if not found |
236 | */ |
237 | protected static function extractParentProjectId( $projectName ) { |
238 | $projectNameParts = explode( '/', $projectName ); |
239 | if ( count( $projectNameParts ) > 1 && $projectNameParts[0] !== '' ) { |
240 | return self::getProjectId( $projectNameParts[0] ); |
241 | } |
242 | return false; |
243 | } |
244 | |
245 | /** |
246 | * Get project ID for a given wikiproject title |
247 | * @param string $project Project title |
248 | * @return int|false project ID or false if not found |
249 | */ |
250 | public static function getProjectId( $project ) { |
251 | $dbr = self::getReplicaDBConnection(); |
252 | return $dbr->newSelectQueryBuilder() |
253 | ->select( 'pap_project_id' ) |
254 | ->from( 'page_assessments_projects' ) |
255 | ->where( [ 'pap_project_title' => $project ] ) |
256 | ->caller( __METHOD__ ) |
257 | ->fetchField(); |
258 | } |
259 | |
260 | /** |
261 | * Insert a new wikiproject into the projects table |
262 | * @param string $project Wikiproject title |
263 | * @param int|null $parentId ID of the parent project (for subprojects) (optional) |
264 | * @return int Insert Id for new project |
265 | */ |
266 | public static function insertProject( $project, $parentId = null ) { |
267 | $dbw = self::getPrimaryDBConnection(); |
268 | $values = [ 'pap_project_title' => $project ]; |
269 | if ( $parentId ) { |
270 | $values[ 'pap_parent_id' ] = (int)$parentId; |
271 | } |
272 | $dbw->newInsertQueryBuilder() |
273 | ->insertInto( 'page_assessments_projects' ) |
274 | // Use ignore() in case two projects with the same name are added at once. |
275 | // This normally shouldn't happen, but is possible perhaps from clicking |
276 | // 'Publish changes' twice in very quick succession. (See T286671) |
277 | ->ignore() |
278 | ->row( $values ) |
279 | ->caller( __METHOD__ ) |
280 | ->execute(); |
281 | $id = $dbw->insertId(); |
282 | return $id; |
283 | } |
284 | |
285 | /** |
286 | * Clean up the title of the project (or subproject) |
287 | * |
288 | * Since the project title comes from a template parameter, it can basically |
289 | * be anything. This function accounts for common cases where editors put |
290 | * extra stuff into the parameter besides just the name of the project. |
291 | * @param string $project WikiProject title |
292 | * @return string Cleaned-up WikiProject title |
293 | */ |
294 | public static function cleanProjectTitle( $project ) { |
295 | // Remove any bold formatting. |
296 | $project = str_replace( "'''", "", $project ); |
297 | // Remove "the" prefix for subprojects (common on English Wikipedia). |
298 | // This is case-sensitive on purpose, as there are some legitimate |
299 | // subproject titles starting with "The", e.g. "The Canterbury Tales". |
300 | $project = str_replace( "/the ", "/", $project ); |
301 | // Truncate to 255 characters to avoid DB warnings. |
302 | return substr( $project, 0, 255 ); |
303 | } |
304 | |
305 | /** |
306 | * Update record in DB if there are new values |
307 | * @param array $values New values to be entered into the DB |
308 | * @return bool true |
309 | */ |
310 | public static function updateRecord( $values ) { |
311 | $dbr = self::getReplicaDBConnection(); |
312 | $conds = [ |
313 | 'pa_page_id' => $values['pa_page_id'], |
314 | 'pa_project_id' => $values['pa_project_id'] |
315 | ]; |
316 | // Check if there are no updates to be done |
317 | $record = $dbr->newSelectQueryBuilder() |
318 | ->select( [ 'pa_class', 'pa_importance', 'pa_project_id', 'pa_page_id' ] ) |
319 | ->from( 'page_assessments' ) |
320 | ->where( $conds ) |
321 | ->caller( __METHOD__ ) |
322 | ->fetchResultSet(); |
323 | foreach ( $record as $row ) { |
324 | if ( $row->pa_importance == $values['pa_importance'] && |
325 | $row->pa_class == $values['pa_class'] |
326 | ) { |
327 | // Return if no update is needed |
328 | return true; |
329 | } |
330 | } |
331 | // Make updates if there are changes |
332 | $dbw = self::getPrimaryDBConnection(); |
333 | $dbw->newUpdateQueryBuilder() |
334 | ->update( 'page_assessments' ) |
335 | ->set( $values ) |
336 | ->where( $conds ) |
337 | ->caller( __METHOD__ ) |
338 | ->execute(); |
339 | return true; |
340 | } |
341 | |
342 | /** |
343 | * Insert a new record in DB |
344 | * @param array $values New values to be entered into the DB |
345 | * @return bool true |
346 | */ |
347 | public static function insertRecord( $values ) { |
348 | $dbw = self::getPrimaryDBConnection(); |
349 | // Use IGNORE in case 2 records for the same project are added at once. |
350 | // This normally shouldn't happen, but is possible. (See T152080) |
351 | $dbw->newInsertQueryBuilder() |
352 | ->insertInto( 'page_assessments' ) |
353 | ->ignore() |
354 | ->row( $values ) |
355 | ->caller( __METHOD__ ) |
356 | ->execute(); |
357 | return true; |
358 | } |
359 | |
360 | /** |
361 | * Get all projects associated with a given page (as project IDs) |
362 | * @param int $pageId Page ID |
363 | * @param int $flags IDBAccessObject::READ_* constant. This can be used to |
364 | * force reading from the primary database. See docs at IDBAccessObject.php. |
365 | * @return array $results All projects associated with given page |
366 | */ |
367 | public static function getAllProjects( $pageId, $flags = IDBAccessObject::READ_NORMAL ) { |
368 | if ( ( $flags & IDBAccessObject::READ_LATEST ) == IDBAccessObject::READ_LATEST ) { |
369 | $db = self::getPrimaryDBConnection(); |
370 | } else { |
371 | $db = self::getReplicaDBConnection(); |
372 | } |
373 | $res = $db->newSelectQueryBuilder() |
374 | ->select( 'pa_project_id' ) |
375 | ->from( 'page_assessments' ) |
376 | ->where( [ 'pa_page_id' => $pageId ] ) |
377 | ->recency( $flags ) |
378 | ->caller( __METHOD__ )->fetchResultSet(); |
379 | $results = []; |
380 | foreach ( $res as $row ) { |
381 | $results[] = $row->pa_project_id; |
382 | } |
383 | return $results; |
384 | } |
385 | |
386 | /** |
387 | * Delete a record from DB |
388 | * @param array $values Conditions for looking up records to delete |
389 | * @return bool true |
390 | */ |
391 | public static function deleteRecord( $values ) { |
392 | $dbw = self::getPrimaryDBConnection(); |
393 | $conds = [ |
394 | 'pa_page_id' => $values['pa_page_id'], |
395 | 'pa_project_id' => $values['pa_project_id'] |
396 | ]; |
397 | $dbw->newDeleteQueryBuilder() |
398 | ->deleteFrom( 'page_assessments' ) |
399 | ->where( $conds ) |
400 | ->caller( __METHOD__ ) |
401 | ->execute(); |
402 | return true; |
403 | } |
404 | |
405 | /** |
406 | * Delete all records for a given page when page is deleted |
407 | * Note: We don't take care of undeletions explicitly, the records are restored |
408 | * when the page is parsed again. |
409 | * @param int $id Page ID of deleted page |
410 | * @return bool true |
411 | */ |
412 | public static function deleteRecordsForPage( $id ) { |
413 | $dbw = self::getPrimaryDBConnection(); |
414 | $conds = [ |
415 | 'pa_page_id' => $id, |
416 | ]; |
417 | $dbw->newDeleteQueryBuilder() |
418 | ->deleteFrom( 'page_assessments' ) |
419 | ->where( $conds ) |
420 | ->caller( __METHOD__ ) |
421 | ->execute(); |
422 | return true; |
423 | } |
424 | |
425 | /** |
426 | * Function called on parser init |
427 | * @param Parser $parser Parser object |
428 | * @param string $project Wikiproject name |
429 | * @param string $class Class of article |
430 | * @param string $importance Importance of article |
431 | */ |
432 | public static function cacheAssessment( |
433 | Parser $parser, |
434 | $project = '', |
435 | $class = '', |
436 | $importance = '' |
437 | ) { |
438 | $parserData = $parser->getOutput()->getExtensionData( 'ext-pageassessment-assessmentdata' ); |
439 | $values = [ $project, $class, $importance ]; |
440 | if ( $parserData == null ) { |
441 | $parserData = []; |
442 | } |
443 | $parserData[] = $values; |
444 | $parser->getOutput()->setExtensionData( 'ext-pageassessment-assessmentdata', $parserData ); |
445 | } |
446 | |
447 | } |