Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
97.44% |
76 / 78 |
|
83.33% |
5 / 6 |
CRAP | |
0.00% |
0 / 1 |
RemoveOldRows | |
97.44% |
76 / 78 |
|
83.33% |
5 / 6 |
12 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
init | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
execute | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
cleanReviewedPagesAndUnusedNamespaces | |
100.00% |
19 / 19 |
|
100.00% |
1 / 1 |
2 | |||
cleanRedirects | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
cleanPageTriagePageTable | |
95.00% |
38 / 40 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\PageTriage\Maintenance; |
4 | |
5 | use Maintenance; |
6 | use MediaWiki\Extension\PageTriage\PageTriageServices; |
7 | use MediaWiki\Extension\PageTriage\PageTriageUtil; |
8 | use MediaWiki\MediaWikiServices; |
9 | use Wikimedia\Rdbms\IDatabase; |
10 | use Wikimedia\Rdbms\SelectQueryBuilder; |
11 | |
12 | /** |
13 | * A maintenance script that updates expired page metadata |
14 | */ |
15 | class RemoveOldRows extends Maintenance { |
16 | |
17 | /** |
18 | * @var IDatabase|null |
19 | */ |
20 | protected $dbr; |
21 | /** |
22 | * @var IDatabase|null |
23 | */ |
24 | protected $dbw; |
25 | |
26 | public function __construct() { |
27 | parent::__construct(); |
28 | $this->addDescription( "Remove reviewed pages from pagetriage queue if they" |
29 | . " are older then 30 days" ); |
30 | $this->requireExtension( 'PageTriage' ); |
31 | $this->setBatchSize( 100 ); |
32 | } |
33 | |
34 | protected function init() { |
35 | $this->dbr = PageTriageUtil::getReplicaConnection(); |
36 | $this->dbw = PageTriageUtil::getPrimaryConnection(); |
37 | } |
38 | |
39 | public function execute() { |
40 | $this->init(); |
41 | $this->output( "Started processing... \n" ); |
42 | |
43 | $this->output( "cleanReviewedPagesAndUnusedNamespaces()... \n" ); |
44 | $this->cleanReviewedPagesAndUnusedNamespaces(); |
45 | |
46 | $this->output( "cleanRedirects()... \n" ); |
47 | $this->cleanRedirects(); |
48 | |
49 | $this->output( "Completed \n" ); |
50 | } |
51 | |
52 | /** |
53 | * Removes pages from the SQL tables pagetriage_page and pagetriage_page_tags |
54 | * if they meet certain criteria. |
55 | * |
56 | * Remove pages older than 30 days, if |
57 | * 1. the page is in the article namespace and has been reviewed, or |
58 | * 2. the page is not in a namespace that PageTriage patrols (not in main, |
59 | * user, or draft) |
60 | * |
61 | * This is to help keep the number of rows in the tables pagetriage_page and |
62 | * pagetriage_page_tags tables reasonable. Pages not in these tables will be |
63 | * treated as reviewed, and the Page Curation toolbar will not show. |
64 | */ |
65 | private function cleanReviewedPagesAndUnusedNamespaces() { |
66 | global $wgPageTriageNamespaces; |
67 | |
68 | $maxAgeInDays = 30; |
69 | |
70 | // This list doesn't include Article or Draft |
71 | // because they have special handling. |
72 | $secondaryNamespaces = array_filter( |
73 | $wgPageTriageNamespaces, |
74 | static function ( $ns ) { |
75 | return $ns !== 0; |
76 | } |
77 | ); |
78 | $startTime = (int)wfTimestamp( TS_UNIX ) - $maxAgeInDays * 60 * 60 * 24; |
79 | |
80 | // the page is in the article namespace and has been reviewed. |
81 | $reviewedMainspaceWhere = $this->dbr->makeList( [ |
82 | 'page_namespace' => NS_MAIN, |
83 | 'ptrp_reviewed > 0' |
84 | ], LIST_AND ); |
85 | $sqlWhere = $reviewedMainspaceWhere; |
86 | if ( count( $secondaryNamespaces ) ) { |
87 | $sqlWhere = $this->dbr->makeList( [ |
88 | $reviewedMainspaceWhere, |
89 | // the page is not in main or draft namespaces |
90 | 'page_namespace' => $secondaryNamespaces, |
91 | ], LIST_OR ); |
92 | } |
93 | |
94 | $this->cleanPageTriagePageTable( $startTime, $sqlWhere ); |
95 | } |
96 | |
97 | /** |
98 | * Removes pages from the SQL tables pagetriage_page and pagetriage_page_tags |
99 | * if they meet certain criteria. |
100 | * |
101 | * Remove pages older than 180 days, if the page is a redirect. This is regardless |
102 | * of its patrol status. |
103 | * |
104 | * This is to help keep the number of rows in the tables pagetriage_page and |
105 | * pagetriage_page_tags tables reasonable. Pages not in these tables will be |
106 | * treated as reviewed, and the Page Curation toolbar will not show. |
107 | */ |
108 | private function cleanRedirects() { |
109 | global $wgPageTriageRedirectAutoreviewAge; |
110 | |
111 | $startTime = (int)wfTimestamp( TS_UNIX ) - $wgPageTriageRedirectAutoreviewAge * 60 * 60 * 24; |
112 | $sqlWhere = $this->dbr->makeList( [ |
113 | 'page_is_redirect' => 1, |
114 | ], LIST_OR ); |
115 | |
116 | $this->cleanPageTriagePageTable( $startTime, $sqlWhere ); |
117 | } |
118 | |
119 | /** |
120 | * Deletes data from the pagetriage_page and pagetriage_page_tags tables that |
121 | * is older than $startTime and that meets the criteria in $sqlWhere. |
122 | * |
123 | * @param int $startTime a UNIX timestamp of the cutoff date |
124 | * @param string $sqlWhere SQL to be injected into the WHERE clause of an SQL query |
125 | * @suppress PhanPossiblyUndeclaredVariable False positive with $row |
126 | */ |
127 | private function cleanPageTriagePageTable( $startTime, $sqlWhere ) { |
128 | // Scan for data with ptrp_created set more than $startTime days ago |
129 | $count = $this->getBatchSize(); |
130 | |
131 | $idRow = $this->dbr->newSelectQueryBuilder() |
132 | ->select( [ 'max_id' => 'MAX(ptrp_page_id)' ] ) |
133 | ->from( 'pagetriage_page' ) |
134 | ->caller( __METHOD__ ) |
135 | ->fetchRow(); |
136 | |
137 | // No data to process, exit |
138 | if ( $idRow === false ) { |
139 | $this->output( "No data to process \n" ); |
140 | return; |
141 | } |
142 | |
143 | $startId = $idRow->max_id + 1; |
144 | $queueManager = PageTriageServices::wrap( MediaWikiServices::getInstance() ) |
145 | ->getQueueManager(); |
146 | |
147 | while ( $count === $this->getBatchSize() ) { |
148 | $count = 0; |
149 | $res = $this->dbr->newSelectQueryBuilder() |
150 | ->select( [ 'ptrp_page_id', 'ptrp_created', 'page_namespace', 'ptrp_reviewed' ] ) |
151 | ->from( 'pagetriage_page' ) |
152 | ->join( 'page', 'page', 'ptrp_page_id = page_id' ) |
153 | ->where( [ |
154 | $this->dbr->buildComparison( '<', [ |
155 | 'ptrp_created' => $this->dbr->timestamp( $startTime ), |
156 | 'ptrp_page_id' => $startId, |
157 | ] ), |
158 | $sqlWhere, |
159 | ] ) |
160 | ->limit( $this->getBatchSize() ) |
161 | ->orderBy( [ 'ptrp_created', 'ptrp_page_id' ], SelectQueryBuilder::SORT_DESC ) |
162 | ->caller( __METHOD__ ) |
163 | ->fetchResultSet(); |
164 | |
165 | $pageIds = []; |
166 | foreach ( $res as $row ) { |
167 | $pageIds[] = $row->ptrp_page_id; |
168 | $count++; |
169 | } |
170 | |
171 | if ( $pageIds ) { |
172 | // update data from last row |
173 | if ( $row->ptrp_created ) { |
174 | $startTime = wfTimestamp( TS_UNIX, $row->ptrp_created ); |
175 | } |
176 | $startId = (int)$row->ptrp_page_id; |
177 | $queueManager->deleteByPageIds( $pageIds ); |
178 | } |
179 | |
180 | $this->output( "processed $count \n" ); |
181 | $this->waitForReplication(); |
182 | } |
183 | } |
184 | } |