Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 160 |
|
0.00% |
0 / 4 |
CRAP | |
0.00% |
0 / 1 |
RefreshImageMetadata | |
0.00% |
0 / 157 |
|
0.00% |
0 / 4 |
870 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 59 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 65 |
|
0.00% |
0 / 1 |
210 | |||
setConditions | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
42 | |||
newLocalRepo | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
72 |
1 | <?php |
2 | /** |
3 | * Refresh image metadata fields. See also rebuildImages.php |
4 | * |
5 | * Usage: php refreshImageMetadata.php |
6 | * |
7 | * Copyright © 2011 Brian Wolff |
8 | * https://www.mediawiki.org/ |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License as published by |
12 | * the Free Software Foundation; either version 2 of the License, or |
13 | * (at your option) any later version. |
14 | * |
15 | * This program is distributed in the hope that it will be useful, |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 | * GNU General Public License for more details. |
19 | * |
20 | * You should have received a copy of the GNU General Public License along |
21 | * with this program; if not, write to the Free Software Foundation, Inc., |
22 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
23 | * http://www.gnu.org/copyleft/gpl.html |
24 | * |
25 | * @file |
26 | * @author Brian Wolff |
27 | * @ingroup Maintenance |
28 | */ |
29 | |
30 | require_once __DIR__ . '/Maintenance.php'; |
31 | |
32 | use MediaWiki\FileRepo\File\FileSelectQueryBuilder; |
33 | use Wikimedia\Rdbms\IMaintainableDatabase; |
34 | use Wikimedia\Rdbms\IReadableDatabase; |
35 | use Wikimedia\Rdbms\SelectQueryBuilder; |
36 | |
37 | /** |
38 | * Maintenance script to refresh image metadata fields. |
39 | * |
40 | * @ingroup Maintenance |
41 | */ |
42 | class RefreshImageMetadata extends Maintenance { |
43 | |
44 | /** |
45 | * @var IMaintainableDatabase |
46 | */ |
47 | protected $dbw; |
48 | |
49 | public function __construct() { |
50 | parent::__construct(); |
51 | |
52 | $this->addDescription( 'Script to update image metadata records' ); |
53 | $this->setBatchSize( 200 ); |
54 | |
55 | $this->addOption( |
56 | 'force', |
57 | 'Reload metadata from file even if the metadata looks ok', |
58 | false, |
59 | false, |
60 | 'f' |
61 | ); |
62 | $this->addOption( |
63 | 'broken-only', |
64 | 'Only fix really broken records, leave old but still compatible records alone.' |
65 | ); |
66 | $this->addOption( |
67 | 'convert-to-json', |
68 | 'Fix records with an out of date serialization format.' |
69 | ); |
70 | $this->addOption( |
71 | 'split', |
72 | 'Enable splitting out large metadata items to the text table. Implies --convert-to-json.' |
73 | ); |
74 | $this->addOption( |
75 | 'verbose', |
76 | 'Output extra information about each upgraded/non-upgraded file.', |
77 | false, |
78 | false, |
79 | 'v' |
80 | ); |
81 | $this->addOption( 'start', 'Name of file to start with', false, true ); |
82 | $this->addOption( 'end', 'Name of file to end with', false, true ); |
83 | |
84 | $this->addOption( |
85 | 'mediatype', |
86 | 'Only refresh files with this media type, e.g. BITMAP, UNKNOWN etc.', |
87 | false, |
88 | true |
89 | ); |
90 | $this->addOption( |
91 | 'mime', |
92 | "Only refresh files with this MIME type. Can accept wild-card 'image/*'. " |
93 | . "Potentially inefficient unless 'mediatype' is also specified", |
94 | false, |
95 | true |
96 | ); |
97 | $this->addOption( |
98 | 'metadata-contains', |
99 | '(Inefficient!) Only refresh files where the img_metadata field ' |
100 | . 'contains this string. Can be used if its known a specific ' |
101 | . 'property was being extracted incorrectly.', |
102 | false, |
103 | true |
104 | ); |
105 | $this->addOption( |
106 | 'sleep', |
107 | 'Time to sleep between each batch (in seconds). Default: 0', |
108 | false, |
109 | true |
110 | ); |
111 | $this->addOption( 'oldimage', 'Run and refresh on oldimage table.' ); |
112 | } |
113 | |
114 | public function execute() { |
115 | $force = $this->hasOption( 'force' ); |
116 | $brokenOnly = $this->hasOption( 'broken-only' ); |
117 | $verbose = $this->hasOption( 'verbose' ); |
118 | $start = $this->getOption( 'start', false ); |
119 | $split = $this->hasOption( 'split' ); |
120 | $sleep = (int)$this->getOption( 'sleep', 0 ); |
121 | $reserialize = $this->hasOption( 'convert-to-json' ); |
122 | $oldimage = $this->hasOption( 'oldimage' ); |
123 | |
124 | $dbw = $this->getPrimaryDB(); |
125 | if ( $oldimage ) { |
126 | $fieldPrefix = 'oi_'; |
127 | $queryBuilderTemplate = FileSelectQueryBuilder::newForOldFile( $dbw ); |
128 | } else { |
129 | $fieldPrefix = 'img_'; |
130 | $queryBuilderTemplate = FileSelectQueryBuilder::newForFile( $dbw ); |
131 | } |
132 | |
133 | $upgraded = 0; |
134 | $leftAlone = 0; |
135 | $error = 0; |
136 | $batchSize = intval( $this->getBatchSize() ); |
137 | if ( $batchSize <= 0 ) { |
138 | $this->fatalError( "Batch size is too low...", 12 ); |
139 | } |
140 | $repo = $this->newLocalRepo( $force, $brokenOnly, $reserialize, $split ); |
141 | $this->setConditions( $dbw, $queryBuilderTemplate, $fieldPrefix ); |
142 | $queryBuilderTemplate |
143 | ->orderBy( $fieldPrefix . 'name', SelectQueryBuilder::SORT_ASC ) |
144 | ->limit( $batchSize ); |
145 | |
146 | $batchCondition = []; |
147 | // For the WHERE img_name > 'foo' condition that comes after doing a batch |
148 | if ( $start !== false ) { |
149 | $batchCondition[] = $dbw->expr( $fieldPrefix . 'name', '>=', $start ); |
150 | } |
151 | do { |
152 | $queryBuilder = clone $queryBuilderTemplate; |
153 | $res = $queryBuilder->andWhere( $batchCondition ) |
154 | ->caller( __METHOD__ )->fetchResultSet(); |
155 | $nameField = $fieldPrefix . 'name'; |
156 | if ( $res->numRows() > 0 ) { |
157 | $row1 = $res->current(); |
158 | $this->output( "Processing next {$res->numRows()} row(s) starting with {$row1->$nameField}.\n" ); |
159 | $res->rewind(); |
160 | } |
161 | |
162 | foreach ( $res as $row ) { |
163 | try { |
164 | // LocalFile will upgrade immediately here if obsolete |
165 | $file = $repo->newFileFromRow( $row ); |
166 | $file->maybeUpgradeRow(); |
167 | if ( $file->getUpgraded() ) { |
168 | // File was upgraded. |
169 | $upgraded++; |
170 | $this->output( "Refreshed File:{$row->$nameField}.\n" ); |
171 | } else { |
172 | $leftAlone++; |
173 | if ( $force ) { |
174 | $file->upgradeRow(); |
175 | if ( $verbose ) { |
176 | $this->output( "Forcibly refreshed File:{$row->$nameField}.\n" ); |
177 | } |
178 | } else { |
179 | if ( $verbose ) { |
180 | $this->output( "Skipping File:{$row->$nameField}.\n" ); |
181 | } |
182 | } |
183 | } |
184 | } catch ( Exception $e ) { |
185 | $this->output( "{$row->$nameField} failed. {$e->getMessage()}\n" ); |
186 | } |
187 | } |
188 | if ( $res->numRows() > 0 ) { |
189 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item |
190 | $batchCondition = [ $dbw->expr( $fieldPrefix . 'name', '>', $row->$nameField ) ]; |
191 | } |
192 | $this->waitForReplication(); |
193 | if ( $sleep ) { |
194 | sleep( $sleep ); |
195 | } |
196 | } while ( $res->numRows() === $batchSize ); |
197 | |
198 | $total = $upgraded + $leftAlone; |
199 | if ( $force ) { |
200 | $this->output( "\nFinished refreshing file metadata for $total files. " |
201 | . "$upgraded needed to be refreshed, $leftAlone did not need to " |
202 | . "be but were refreshed anyways, and $error refreshes were suspicious.\n" ); |
203 | } else { |
204 | $this->output( "\nFinished refreshing file metadata for $total files. " |
205 | . "$upgraded were refreshed, $leftAlone were already up to date, " |
206 | . "and $error refreshes were suspicious.\n" ); |
207 | } |
208 | } |
209 | |
210 | /** |
211 | * @param IReadableDatabase $dbw |
212 | * @param SelectQueryBuilder $queryBuilder |
213 | * @param string $fieldPrefix like img_ or oi_ |
214 | * @return void |
215 | */ |
216 | private function setConditions( IReadableDatabase $dbw, SelectQueryBuilder $queryBuilder, $fieldPrefix ) { |
217 | $end = $this->getOption( 'end', false ); |
218 | $mime = $this->getOption( 'mime', false ); |
219 | $mediatype = $this->getOption( 'mediatype', false ); |
220 | $like = $this->getOption( 'metadata-contains', false ); |
221 | |
222 | if ( $end !== false ) { |
223 | $queryBuilder->andWhere( $dbw->expr( $fieldPrefix . 'name', '<=', $end ) ); |
224 | } |
225 | if ( $mime !== false ) { |
226 | [ $major, $minor ] = File::splitMime( $mime ); |
227 | $queryBuilder->andWhere( [ $fieldPrefix . 'major_mime' => $major ] ); |
228 | if ( $minor !== '*' ) { |
229 | $queryBuilder->andWhere( [ $fieldPrefix . 'minor_mime' => $minor ] ); |
230 | } |
231 | } |
232 | if ( $mediatype !== false ) { |
233 | $queryBuilder->andWhere( [ $fieldPrefix . 'media_type' => $mediatype ] ); |
234 | } |
235 | if ( $like ) { |
236 | $queryBuilder->andWhere( |
237 | $fieldPrefix . 'metadata ' . $dbw->buildLike( $dbw->anyString(), $like, $dbw->anyString() ) |
238 | ); |
239 | } |
240 | } |
241 | |
242 | /** |
243 | * @param bool $force |
244 | * @param bool $brokenOnly |
245 | * @param bool $reserialize |
246 | * @param bool $split |
247 | * |
248 | * @return LocalRepo |
249 | */ |
250 | private function newLocalRepo( $force, $brokenOnly, $reserialize, $split ): LocalRepo { |
251 | if ( $brokenOnly && $force ) { |
252 | $this->fatalError( 'Cannot use --broken-only and --force together. ', 2 ); |
253 | } |
254 | $reserialize = $reserialize || $split; |
255 | if ( $brokenOnly && $reserialize ) { |
256 | $this->fatalError( 'Cannot use --broken-only with --convert-to-json or --split. ', |
257 | 2 ); |
258 | } |
259 | |
260 | $overrides = [ |
261 | 'updateCompatibleMetadata' => !$brokenOnly, |
262 | ]; |
263 | if ( $reserialize ) { |
264 | $overrides['reserializeMetadata'] = true; |
265 | $overrides['useJsonMetadata'] = true; |
266 | } |
267 | if ( $split ) { |
268 | $overrides['useSplitMetadata'] = true; |
269 | } |
270 | |
271 | return $this->getServiceContainer()->getRepoGroup() |
272 | ->newCustomLocalRepo( $overrides ); |
273 | } |
274 | } |
275 | |
276 | $maintClass = RefreshImageMetadata::class; |
277 | require_once RUN_MAINTENANCE_IF_MAIN; |