Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.44% |
63 / 64 |
|
50.00% |
1 / 2 |
CRAP | |
0.00% |
0 / 1 |
ResetPageRandom | |
98.44% |
63 / 64 |
|
50.00% |
1 / 2 |
10 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
1 | |||
execute | |
98.08% |
51 / 52 |
|
0.00% |
0 / 1 |
9 |
1 | <?php |
2 | /** |
3 | * Resets the page_random field for articles in the provided time range. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Maintenance |
22 | */ |
23 | |
24 | use MediaWiki\Maintenance\Maintenance; |
25 | |
26 | // @codeCoverageIgnoreStart |
27 | require_once __DIR__ . '/Maintenance.php'; |
28 | // @codeCoverageIgnoreEnd |
29 | |
30 | /** |
31 | * Maintenance script that resets page_random over a time range. |
32 | * |
33 | * @ingroup Maintenance |
34 | */ |
35 | class ResetPageRandom extends Maintenance { |
36 | public function __construct() { |
37 | parent::__construct(); |
38 | $this->addDescription( 'Reset the page_random for articles within given date range' ); |
39 | $this->addOption( 'from', |
40 | 'From date range selector to select articles to update, ex: 20041011000000', true, true ); |
41 | $this->addOption( 'to', |
42 | 'To date range selector to select articles to update, ex: 20050708000000', true, true ); |
43 | $this->addOption( 'dry', 'Do not update column' ); |
44 | $this->addOption( 'batch-start', |
45 | 'Optional: Use when you need to restart the reset process from a given page ID offset' |
46 | . ' in case a previous reset failed or was stopped' |
47 | ); |
48 | // Initialize batch size to a good default value and enable the batch size option. |
49 | $this->setBatchSize( 200 ); |
50 | } |
51 | |
52 | public function execute() { |
53 | $batchSize = $this->getBatchSize(); |
54 | $dbw = $this->getPrimaryDB(); |
55 | $dbr = $this->getReplicaDB(); |
56 | $from = wfTimestampOrNull( TS_MW, $this->getOption( 'from' ) ); |
57 | $to = wfTimestampOrNull( TS_MW, $this->getOption( 'to' ) ); |
58 | |
59 | if ( $from === null || $to === null ) { |
60 | $this->output( "--from and --to have to be provided" . PHP_EOL ); |
61 | return false; |
62 | } |
63 | if ( $from >= $to ) { |
64 | $this->output( "--from has to be smaller than --to" . PHP_EOL ); |
65 | return false; |
66 | } |
67 | $batchStart = (int)$this->getOption( 'batch-start', 0 ); |
68 | $changed = 0; |
69 | $dry = (bool)$this->getOption( 'dry' ); |
70 | |
71 | $message = "Resetting page_random column within date range from $from to $to"; |
72 | if ( $batchStart > 0 ) { |
73 | $message .= " starting from page ID $batchStart"; |
74 | } |
75 | $message .= $dry ? ". dry run" : '.'; |
76 | |
77 | $this->output( $message . PHP_EOL ); |
78 | do { |
79 | $this->output( " ...doing chunk of $batchSize from $batchStart " . PHP_EOL ); |
80 | |
81 | // Find the oldest page revision associated with each page_id. Iff it falls in the given |
82 | // time range AND it's greater than $batchStart, yield the page ID. If it falls outside the |
83 | // time range, it was created before or after the occurrence of T208909 and its page_random |
84 | // is considered valid. The replica is used for this read since page_id and the rev_timestamp |
85 | // will not change between queries. |
86 | $queryBuilder = $dbr->newSelectQueryBuilder() |
87 | ->select( 'page_id' ) |
88 | ->from( 'page' ) |
89 | ->where( $dbr->expr( 'page_id', '>', $batchStart ) ) |
90 | ->limit( $batchSize ) |
91 | ->orderBy( 'page_id' ); |
92 | $subquery = $queryBuilder->newSubquery() |
93 | ->select( 'MIN(rev_timestamp)' ) |
94 | ->from( 'revision' ) |
95 | ->where( 'rev_page=page_id' ); |
96 | $queryBuilder->andWhere( |
97 | '(' . $subquery->getSQL() . ') BETWEEN ' . |
98 | $dbr->addQuotes( $dbr->timestamp( $from ) ) . ' AND ' . $dbr->addQuotes( $dbr->timestamp( $to ) ) |
99 | ); |
100 | |
101 | $res = $queryBuilder->caller( __METHOD__ )->fetchResultSet(); |
102 | $row = null; |
103 | foreach ( $res as $row ) { |
104 | if ( !$dry ) { |
105 | # Update the row... |
106 | $dbw->newUpdateQueryBuilder() |
107 | ->update( 'page' ) |
108 | ->set( [ 'page_random' => wfRandom() ] ) |
109 | ->where( [ 'page_id' => $row->page_id ] ) |
110 | ->caller( __METHOD__ ) |
111 | ->execute(); |
112 | $changed += $dbw->affectedRows(); |
113 | } else { |
114 | $changed++; |
115 | } |
116 | } |
117 | if ( $row ) { |
118 | $batchStart = $row->page_id; |
119 | } else { |
120 | // We don't need to set the $batchStart as $res is empty, |
121 | // and we don't need to do another loop |
122 | // the while() condition will evaluate to false and |
123 | // we will leave the do{}while() block. |
124 | } |
125 | |
126 | $this->waitForReplication(); |
127 | } while ( $res->numRows() === $batchSize ); |
128 | $this->output( "page_random reset complete ... changed $changed rows" . PHP_EOL ); |
129 | |
130 | return true; |
131 | } |
132 | } |
133 | |
134 | // @codeCoverageIgnoreStart |
135 | $maintClass = ResetPageRandom::class; |
136 | require_once RUN_MAINTENANCE_IF_MAIN; |
137 | // @codeCoverageIgnoreEnd |