Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
97.44% covered (success)
97.44%
76 / 78
83.33% covered (warning)
83.33%
5 / 6
CRAP
0.00% covered (danger)
0.00%
0 / 1
RemoveOldRows
97.44% covered (success)
97.44%
76 / 78
83.33% covered (warning)
83.33%
5 / 6
12
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 init
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 execute
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
1
 cleanReviewedPagesAndUnusedNamespaces
100.00% covered (success)
100.00%
19 / 19
100.00% covered (success)
100.00%
1 / 1
2
 cleanRedirects
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 cleanPageTriagePageTable
95.00% covered (success)
95.00%
38 / 40
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3namespace MediaWiki\Extension\PageTriage\Maintenance;
4
5use Maintenance;
6use MediaWiki\Extension\PageTriage\PageTriageServices;
7use MediaWiki\Extension\PageTriage\PageTriageUtil;
8use MediaWiki\MediaWikiServices;
9use Wikimedia\Rdbms\IDatabase;
10use Wikimedia\Rdbms\SelectQueryBuilder;
11
12/**
13 * A maintenance script that updates expired page metadata
14 */
15class RemoveOldRows extends Maintenance {
16
17    /**
18     * @var IDatabase|null
19     */
20    protected $dbr;
21    /**
22     * @var IDatabase|null
23     */
24    protected $dbw;
25
26    public function __construct() {
27        parent::__construct();
28        $this->addDescription( "Remove reviewed pages from pagetriage queue if they"
29            . " are older then 30 days" );
30        $this->requireExtension( 'PageTriage' );
31        $this->setBatchSize( 100 );
32    }
33
34    protected function init() {
35        $this->dbr = PageTriageUtil::getReplicaConnection();
36        $this->dbw = PageTriageUtil::getPrimaryConnection();
37    }
38
39    public function execute() {
40        $this->init();
41        $this->output( "Started processing... \n" );
42
43        $this->output( "cleanReviewedPagesAndUnusedNamespaces()... \n" );
44        $this->cleanReviewedPagesAndUnusedNamespaces();
45
46        $this->output( "cleanRedirects()... \n" );
47        $this->cleanRedirects();
48
49        $this->output( "Completed \n" );
50    }
51
52    /**
53     * Removes pages from the SQL tables pagetriage_page and pagetriage_page_tags
54     * if they meet certain criteria.
55     *
56     * Remove pages older than 30 days, if
57     * 1. the page is in the article namespace and has been reviewed, or
58     * 2. the page is not in a namespace that PageTriage patrols (not in main,
59     * user, or draft)
60     *
61     * This is to help keep the number of rows in the tables pagetriage_page and
62     * pagetriage_page_tags tables reasonable. Pages not in these tables will be
63     * treated as reviewed, and the Page Curation toolbar will not show.
64     */
65    private function cleanReviewedPagesAndUnusedNamespaces() {
66        global $wgPageTriageNamespaces;
67
68        $maxAgeInDays = 30;
69
70        // This list doesn't include Article or Draft
71        // because they have special handling.
72        $secondaryNamespaces = array_filter(
73            $wgPageTriageNamespaces,
74            static function ( $ns ) {
75                return $ns !== 0;
76            }
77        );
78        $startTime = (int)wfTimestamp( TS_UNIX ) - $maxAgeInDays * 60 * 60 * 24;
79
80        // the page is in the article namespace and has been reviewed.
81        $reviewedMainspaceWhere = $this->dbr->makeList( [
82            'page_namespace' => NS_MAIN,
83            'ptrp_reviewed > 0'
84        ], LIST_AND );
85        $sqlWhere = $reviewedMainspaceWhere;
86        if ( count( $secondaryNamespaces ) ) {
87            $sqlWhere = $this->dbr->makeList( [
88                $reviewedMainspaceWhere,
89                // the page is not in main or draft namespaces
90                'page_namespace' => $secondaryNamespaces,
91            ], LIST_OR );
92        }
93
94        $this->cleanPageTriagePageTable( $startTime, $sqlWhere );
95    }
96
97    /**
98     * Removes pages from the SQL tables pagetriage_page and pagetriage_page_tags
99     * if they meet certain criteria.
100     *
101     * Remove pages older than 180 days, if the page is a redirect. This is regardless
102     * of its patrol status.
103     *
104     * This is to help keep the number of rows in the tables pagetriage_page and
105     * pagetriage_page_tags tables reasonable. Pages not in these tables will be
106     * treated as reviewed, and the Page Curation toolbar will not show.
107     */
108    private function cleanRedirects() {
109        global $wgPageTriageRedirectAutoreviewAge;
110
111        $startTime = (int)wfTimestamp( TS_UNIX ) - $wgPageTriageRedirectAutoreviewAge * 60 * 60 * 24;
112        $sqlWhere = $this->dbr->makeList( [
113                'page_is_redirect' => 1,
114            ], LIST_OR );
115
116        $this->cleanPageTriagePageTable( $startTime, $sqlWhere );
117    }
118
119    /**
120     * Deletes data from the pagetriage_page and pagetriage_page_tags tables that
121     * is older than $startTime and that meets the criteria in $sqlWhere.
122     *
123     * @param int $startTime a UNIX timestamp of the cutoff date
124     * @param string $sqlWhere SQL to be injected into the WHERE clause of an SQL query
125     * @suppress PhanPossiblyUndeclaredVariable False positive with $row
126     */
127    private function cleanPageTriagePageTable( $startTime, $sqlWhere ) {
128        // Scan for data with ptrp_created set more than $startTime days ago
129        $count = $this->getBatchSize();
130
131        $idRow = $this->dbr->newSelectQueryBuilder()
132            ->select( [ 'max_id' => 'MAX(ptrp_page_id)' ] )
133            ->from( 'pagetriage_page' )
134            ->caller( __METHOD__ )
135            ->fetchRow();
136
137        // No data to process, exit
138        if ( $idRow === false ) {
139            $this->output( "No data to process \n" );
140            return;
141        }
142
143        $startId = $idRow->max_id + 1;
144        $queueManager = PageTriageServices::wrap( MediaWikiServices::getInstance() )
145            ->getQueueManager();
146
147        while ( $count === $this->getBatchSize() ) {
148            $count = 0;
149            $res = $this->dbr->newSelectQueryBuilder()
150                ->select( [ 'ptrp_page_id', 'ptrp_created', 'page_namespace', 'ptrp_reviewed' ] )
151                ->from( 'pagetriage_page' )
152                ->join( 'page', 'page', 'ptrp_page_id = page_id' )
153                ->where( [
154                    $this->dbr->buildComparison( '<', [
155                        'ptrp_created' => $this->dbr->timestamp( $startTime ),
156                        'ptrp_page_id' => $startId,
157                    ] ),
158                    $sqlWhere,
159                ] )
160                ->limit( $this->getBatchSize() )
161                ->orderBy( [ 'ptrp_created', 'ptrp_page_id' ], SelectQueryBuilder::SORT_DESC )
162                ->caller( __METHOD__ )
163                ->fetchResultSet();
164
165            $pageIds = [];
166            foreach ( $res as $row ) {
167                $pageIds[] = $row->ptrp_page_id;
168                $count++;
169            }
170
171            if ( $pageIds ) {
172                // update data from last row
173                if ( $row->ptrp_created ) {
174                    $startTime = wfTimestamp( TS_UNIX, $row->ptrp_created );
175                }
176                $startId = (int)$row->ptrp_page_id;
177                $queueManager->deleteByPageIds( $pageIds );
178            }
179
180            $this->output( "processed $count \n" );
181            $this->waitForReplication();
182        }
183    }
184}