Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 157 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
| CleanupImages | |
0.00% |
0 / 157 |
|
0.00% |
0 / 9 |
1122 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
6 | |||
| processRow | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
42 | |||
| killRow | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 | |||
| filePath | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| imageExists | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
6 | |||
| pageExists | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
| pokeFile | |
0.00% |
0 / 57 |
|
0.00% |
0 / 1 |
156 | |||
| appendTitle | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| buildSafeTitle | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Clean up broken, unparseable upload filenames. |
| 4 | * |
| 5 | * Copyright © 2005-2006 Brooke Vibber <bvibber@wikimedia.org> |
| 6 | * https://www.mediawiki.org/ |
| 7 | * |
| 8 | * @license GPL-2.0-or-later |
| 9 | * @file |
| 10 | * @author Brooke Vibber <bvibber@wikimedia.org> |
| 11 | * @ingroup Maintenance |
| 12 | */ |
| 13 | |
| 14 | use MediaWiki\FileRepo\LocalRepo; |
| 15 | use MediaWiki\MainConfigNames; |
| 16 | use MediaWiki\Parser\Sanitizer; |
| 17 | use MediaWiki\Title\Title; |
| 18 | use Wikimedia\Rdbms\IReadableDatabase; |
| 19 | |
| 20 | // @codeCoverageIgnoreStart |
| 21 | require_once __DIR__ . '/TableCleanup.php'; |
| 22 | // @codeCoverageIgnoreEnd |
| 23 | |
| 24 | /** |
| 25 | * Maintenance script to clean up broken, unparseable upload filenames. |
| 26 | * |
| 27 | * @ingroup Maintenance |
| 28 | */ |
| 29 | class CleanupImages extends TableCleanup { |
| 30 | /** @inheritDoc */ |
| 31 | protected $defaultParams; |
| 32 | |
| 33 | /** @var LocalRepo|null */ |
| 34 | private $repo; |
| 35 | |
| 36 | /** @var int file table schema migration stage */ |
| 37 | private $migrationStage; |
| 38 | |
| 39 | public function __construct() { |
| 40 | parent::__construct(); |
| 41 | |
| 42 | $this->migrationStage = $this->getServiceContainer()->getMainConfig()->get( |
| 43 | MainConfigNames::FileSchemaMigrationStage |
| 44 | ); |
| 45 | |
| 46 | if ( $this->migrationStage & SCHEMA_COMPAT_READ_OLD ) { |
| 47 | $this->defaultParams = [ |
| 48 | 'table' => 'image', |
| 49 | 'conds' => [], |
| 50 | 'index' => 'img_name', |
| 51 | 'callback' => 'processRow', |
| 52 | ]; |
| 53 | } else { |
| 54 | $this->defaultParams = [ |
| 55 | 'table' => 'file', |
| 56 | 'conds' => [], |
| 57 | 'index' => 'file_name', |
| 58 | 'callback' => 'processRow', |
| 59 | ]; |
| 60 | } |
| 61 | |
| 62 | $this->addDescription( 'Script to clean up broken, unparseable upload filenames' ); |
| 63 | } |
| 64 | |
| 65 | protected function processRow( \stdClass $row ) { |
| 66 | if ( $this->migrationStage & SCHEMA_COMPAT_READ_OLD ) { |
| 67 | $source = $row->img_name; |
| 68 | } else { |
| 69 | $source = $row->file_name; |
| 70 | } |
| 71 | if ( $source == '' ) { |
| 72 | // Ye olde empty rows. Just kill them. |
| 73 | $this->killRow( $source ); |
| 74 | |
| 75 | $this->progress( 1 ); |
| 76 | return; |
| 77 | } |
| 78 | |
| 79 | $cleaned = $source; |
| 80 | |
| 81 | // About half of old bad image names have percent-codes |
| 82 | $cleaned = rawurldecode( $cleaned ); |
| 83 | |
| 84 | // We also have some HTML entities there |
| 85 | $cleaned = Sanitizer::decodeCharReferences( $cleaned ); |
| 86 | |
| 87 | $contLang = $this->getServiceContainer()->getContentLanguage(); |
| 88 | |
| 89 | // Some are old latin-1 |
| 90 | $cleaned = $contLang->checkTitleEncoding( $cleaned ); |
| 91 | |
| 92 | // Many of remainder look like non-normalized unicode |
| 93 | $cleaned = $contLang->normalize( $cleaned ); |
| 94 | |
| 95 | $title = Title::makeTitleSafe( NS_FILE, $cleaned ); |
| 96 | |
| 97 | if ( $title === null ) { |
| 98 | $this->output( "page $source ($cleaned) is illegal.\n" ); |
| 99 | $safe = $this->buildSafeTitle( $cleaned ); |
| 100 | if ( $safe === false ) { |
| 101 | $this->progress( 0 ); |
| 102 | return; |
| 103 | } |
| 104 | $this->pokeFile( $source, $safe ); |
| 105 | |
| 106 | $this->progress( 1 ); |
| 107 | return; |
| 108 | } |
| 109 | |
| 110 | if ( $title->getDBkey() !== $source ) { |
| 111 | $munged = $title->getDBkey(); |
| 112 | $this->output( "page $source ($munged) doesn't match self.\n" ); |
| 113 | $this->pokeFile( $source, $munged ); |
| 114 | |
| 115 | $this->progress( 1 ); |
| 116 | return; |
| 117 | } |
| 118 | |
| 119 | $this->progress( 0 ); |
| 120 | } |
| 121 | |
| 122 | /** |
| 123 | * @param string $name |
| 124 | */ |
| 125 | private function killRow( $name ) { |
| 126 | if ( $this->dryrun ) { |
| 127 | $this->output( "DRY RUN: would delete bogus row '$name'\n" ); |
| 128 | } else { |
| 129 | $this->output( "deleting bogus row '$name'\n" ); |
| 130 | $db = $this->getPrimaryDB(); |
| 131 | if ( $this->migrationStage & SCHEMA_COMPAT_WRITE_OLD ) { |
| 132 | $db->newDeleteQueryBuilder() |
| 133 | ->deleteFrom( 'image' ) |
| 134 | ->where( [ 'img_name' => $name ] ) |
| 135 | ->caller( __METHOD__ ) |
| 136 | ->execute(); |
| 137 | } |
| 138 | if ( $this->migrationStage & SCHEMA_COMPAT_WRITE_NEW ) { |
| 139 | $db->newDeleteQueryBuilder() |
| 140 | ->deleteFrom( 'file' ) |
| 141 | ->where( [ 'file_name' => $name ] ) |
| 142 | ->caller( __METHOD__ ) |
| 143 | ->execute(); |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | /** |
| 149 | * @param string $name |
| 150 | * @return string |
| 151 | */ |
| 152 | private function filePath( $name ) { |
| 153 | if ( $this->repo === null ) { |
| 154 | $this->repo = $this->getServiceContainer()->getRepoGroup()->getLocalRepo(); |
| 155 | } |
| 156 | |
| 157 | return $this->repo->getRootDirectory() . '/' . $this->repo->getHashPath( $name ) . $name; |
| 158 | } |
| 159 | |
| 160 | private function imageExists( string $name, IReadableDatabase $db ): bool { |
| 161 | if ( $this->migrationStage & SCHEMA_COMPAT_READ_OLD ) { |
| 162 | return (bool)$db->newSelectQueryBuilder() |
| 163 | ->select( '1' ) |
| 164 | ->from( 'image' ) |
| 165 | ->where( [ 'img_name' => $name ] ) |
| 166 | ->caller( __METHOD__ ) |
| 167 | ->fetchField(); |
| 168 | } |
| 169 | return (bool)$db->newSelectQueryBuilder() |
| 170 | ->select( '1' ) |
| 171 | ->from( 'file' ) |
| 172 | ->where( [ 'file_name' => $name ] ) |
| 173 | ->caller( __METHOD__ ) |
| 174 | ->fetchField(); |
| 175 | } |
| 176 | |
| 177 | private function pageExists( string $name, IReadableDatabase $db ): bool { |
| 178 | return (bool)$db->newSelectQueryBuilder() |
| 179 | ->select( '1' ) |
| 180 | ->from( 'page' ) |
| 181 | ->where( [ |
| 182 | 'page_namespace' => NS_FILE, |
| 183 | 'page_title' => $name, |
| 184 | ] ) |
| 185 | ->caller( __METHOD__ ) |
| 186 | ->fetchField(); |
| 187 | } |
| 188 | |
| 189 | private function pokeFile( string $orig, string $new ) { |
| 190 | $path = $this->filePath( $orig ); |
| 191 | if ( !file_exists( $path ) ) { |
| 192 | $this->output( "missing file: $path\n" ); |
| 193 | $this->killRow( $orig ); |
| 194 | |
| 195 | return; |
| 196 | } |
| 197 | |
| 198 | $db = $this->getPrimaryDB(); |
| 199 | |
| 200 | /* |
| 201 | * To prevent key collisions in the update() statements below, |
| 202 | * if the target title exists in the image table, or if both the |
| 203 | * original and target titles exist in the page table, append |
| 204 | * increasing version numbers until the target title exists in |
| 205 | * neither. (See also T18916.) |
| 206 | */ |
| 207 | $version = 0; |
| 208 | $final = $new; |
| 209 | $conflict = ( $this->imageExists( $final, $db ) || |
| 210 | ( $this->pageExists( $orig, $db ) && $this->pageExists( $final, $db ) ) ); |
| 211 | |
| 212 | while ( $conflict ) { |
| 213 | $this->output( "Rename conflicts with '$final'...\n" ); |
| 214 | $version++; |
| 215 | $final = $this->appendTitle( $new, "_$version" ); |
| 216 | $conflict = ( $this->imageExists( $final, $db ) || $this->pageExists( $final, $db ) ); |
| 217 | } |
| 218 | |
| 219 | $finalPath = $this->filePath( $final ); |
| 220 | |
| 221 | if ( $this->dryrun ) { |
| 222 | $this->output( "DRY RUN: would rename $path to $finalPath\n" ); |
| 223 | } else { |
| 224 | $this->output( "renaming $path to $finalPath\n" ); |
| 225 | // @todo FIXME: Should this use File::move()? |
| 226 | $this->beginTransactionRound( __METHOD__ ); |
| 227 | if ( $this->migrationStage & SCHEMA_COMPAT_WRITE_OLD ) { |
| 228 | $db->newUpdateQueryBuilder() |
| 229 | ->update( 'image' ) |
| 230 | ->set( [ 'img_name' => $final ] ) |
| 231 | ->where( [ 'img_name' => $orig ] ) |
| 232 | ->caller( __METHOD__ ) |
| 233 | ->execute(); |
| 234 | $db->newUpdateQueryBuilder() |
| 235 | ->update( 'oldimage' ) |
| 236 | ->set( [ 'oi_name' => $final ] ) |
| 237 | ->where( [ 'oi_name' => $orig ] ) |
| 238 | ->caller( __METHOD__ ) |
| 239 | ->execute(); |
| 240 | } |
| 241 | if ( $this->migrationStage & SCHEMA_COMPAT_WRITE_NEW ) { |
| 242 | $db->newUpdateQueryBuilder() |
| 243 | ->update( 'file' ) |
| 244 | ->set( [ 'file_name' => $final ] ) |
| 245 | ->where( [ 'file_name' => $orig ] ) |
| 246 | ->caller( __METHOD__ ) |
| 247 | ->execute(); |
| 248 | } |
| 249 | $update = $db->newUpdateQueryBuilder() |
| 250 | ->update( 'page' ) |
| 251 | ->set( [ 'page_title' => $final ] ) |
| 252 | ->where( [ 'page_title' => $orig, 'page_namespace' => NS_FILE ] ) |
| 253 | ->caller( __METHOD__ ); |
| 254 | $update->execute(); |
| 255 | $this->getServiceContainer()->getLinkWriteDuplicator()->duplicate( $update ); |
| 256 | $dir = dirname( $finalPath ); |
| 257 | if ( !file_exists( $dir ) ) { |
| 258 | if ( !wfMkdirParents( $dir, null, __METHOD__ ) ) { |
| 259 | $this->output( "RENAME FAILED, COULD NOT CREATE $dir" ); |
| 260 | $this->rollbackTransactionRound( __METHOD__ ); |
| 261 | |
| 262 | return; |
| 263 | } |
| 264 | } |
| 265 | if ( rename( $path, $finalPath ) ) { |
| 266 | $this->commitTransactionRound( __METHOD__ ); |
| 267 | } else { |
| 268 | $this->error( "RENAME FAILED" ); |
| 269 | $this->rollbackTransactionRound( __METHOD__ ); |
| 270 | } |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | private function appendTitle( string $name, string $suffix ): string { |
| 275 | return preg_replace( '/^(.*)(\..*?)$/', |
| 276 | "\\1$suffix\\2", $name ); |
| 277 | } |
| 278 | |
| 279 | /** @return string|false */ |
| 280 | private function buildSafeTitle( string $name ) { |
| 281 | $x = preg_replace_callback( |
| 282 | '/([^' . Title::legalChars() . ']|~)/', |
| 283 | $this->hexChar( ... ), |
| 284 | $name ); |
| 285 | |
| 286 | $test = Title::makeTitleSafe( NS_FILE, $x ); |
| 287 | if ( $test === null || $test->getDBkey() !== $x ) { |
| 288 | $this->error( "Unable to generate safe title from '$name', got '$x'" ); |
| 289 | |
| 290 | return false; |
| 291 | } |
| 292 | |
| 293 | return $x; |
| 294 | } |
| 295 | } |
| 296 | |
| 297 | // @codeCoverageIgnoreStart |
| 298 | $maintClass = CleanupImages::class; |
| 299 | require_once RUN_MAINTENANCE_IF_MAIN; |
| 300 | // @codeCoverageIgnoreEnd |