MediaWiki  master
cleanupImages.php
Go to the documentation of this file.
1 <?php
29 
30 require_once __DIR__ . '/TableCleanup.php';
31 
37 class CleanupImages extends TableCleanup {
38  protected $defaultParams = [
39  'table' => 'image',
40  'conds' => [],
41  'index' => 'img_name',
42  'callback' => 'processRow',
43  ];
44 
46  private $repo;
47 
48  public function __construct() {
49  parent::__construct();
50  $this->addDescription( 'Script to clean up broken, unparseable upload filenames' );
51  }
52 
53  protected function processRow( $row ) {
54  $source = $row->img_name;
55  if ( $source == '' ) {
56  // Ye olde empty rows. Just kill them.
57  $this->killRow( $source );
58 
59  return $this->progress( 1 );
60  }
61 
62  $cleaned = $source;
63 
64  // About half of old bad image names have percent-codes
65  $cleaned = rawurldecode( $cleaned );
66 
67  // We also have some HTML entities there
68  $cleaned = Sanitizer::decodeCharReferences( $cleaned );
69 
70  $contLang = MediaWikiServices::getInstance()->getContentLanguage();
71 
72  // Some are old latin-1
73  $cleaned = $contLang->checkTitleEncoding( $cleaned );
74 
75  // Many of remainder look like non-normalized unicode
76  $cleaned = $contLang->normalize( $cleaned );
77 
78  $title = Title::makeTitleSafe( NS_FILE, $cleaned );
79 
80  if ( $title === null ) {
81  $this->output( "page $source ($cleaned) is illegal.\n" );
82  $safe = $this->buildSafeTitle( $cleaned );
83  if ( $safe === false ) {
84  return $this->progress( 0 );
85  }
86  $this->pokeFile( $source, $safe );
87 
88  return $this->progress( 1 );
89  }
90 
91  if ( $title->getDBkey() !== $source ) {
92  $munged = $title->getDBkey();
93  $this->output( "page $source ($munged) doesn't match self.\n" );
94  $this->pokeFile( $source, $munged );
95 
96  return $this->progress( 1 );
97  }
98 
99  return $this->progress( 0 );
100  }
101 
105  private function killRow( $name ) {
106  if ( $this->dryrun ) {
107  $this->output( "DRY RUN: would delete bogus row '$name'\n" );
108  } else {
109  $this->output( "deleting bogus row '$name'\n" );
110  $db = $this->getDB( DB_PRIMARY );
111  $db->delete( 'image',
112  [ 'img_name' => $name ],
113  __METHOD__ );
114  }
115  }
116 
121  private function filePath( $name ) {
122  if ( $this->repo === null ) {
123  $this->repo = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo();
124  }
125 
126  return $this->repo->getRootDirectory() . '/' . $this->repo->getHashPath( $name ) . $name;
127  }
128 
129  private function imageExists( $name, $db ) {
130  return (bool)$db->newSelectQueryBuilder()
131  ->select( '1' )
132  ->from( 'image' )
133  ->where( [ 'img_name' => $name ] )
134  ->caller( __METHOD__ )
135  ->fetchField();
136  }
137 
138  private function pageExists( $name, $db ) {
139  return (bool)$db->newSelectQueryBuilder()
140  ->select( '1' )
141  ->from( 'page' )
142  ->where( [
143  'page_namespace' => NS_FILE,
144  'page_title' => $name,
145  ] )
146  ->caller( __METHOD__ )
147  ->fetchField();
148  }
149 
150  private function pokeFile( $orig, $new ) {
151  $path = $this->filePath( $orig );
152  if ( !file_exists( $path ) ) {
153  $this->output( "missing file: $path\n" );
154  $this->killRow( $orig );
155 
156  return;
157  }
158 
159  $db = $this->getDB( DB_PRIMARY );
160 
161  /*
162  * To prevent key collisions in the update() statements below,
163  * if the target title exists in the image table, or if both the
164  * original and target titles exist in the page table, append
165  * increasing version numbers until the target title exists in
166  * neither. (See also T18916.)
167  */
168  $version = 0;
169  $final = $new;
170  $conflict = ( $this->imageExists( $final, $db ) ||
171  ( $this->pageExists( $orig, $db ) && $this->pageExists( $final, $db ) ) );
172 
173  while ( $conflict ) {
174  $this->output( "Rename conflicts with '$final'...\n" );
175  $version++;
176  $final = $this->appendTitle( $new, "_$version" );
177  $conflict = ( $this->imageExists( $final, $db ) || $this->pageExists( $final, $db ) );
178  }
179 
180  $finalPath = $this->filePath( $final );
181 
182  if ( $this->dryrun ) {
183  $this->output( "DRY RUN: would rename $path to $finalPath\n" );
184  } else {
185  $this->output( "renaming $path to $finalPath\n" );
186  // @todo FIXME: Should this use File::move()?
187  $this->beginTransaction( $db, __METHOD__ );
188  $db->update( 'image',
189  [ 'img_name' => $final ],
190  [ 'img_name' => $orig ],
191  __METHOD__ );
192  $db->update( 'oldimage',
193  [ 'oi_name' => $final ],
194  [ 'oi_name' => $orig ],
195  __METHOD__ );
196  $db->update( 'page',
197  [ 'page_title' => $final ],
198  [ 'page_title' => $orig, 'page_namespace' => NS_FILE ],
199  __METHOD__ );
200  $dir = dirname( $finalPath );
201  if ( !file_exists( $dir ) ) {
202  if ( !wfMkdirParents( $dir, null, __METHOD__ ) ) {
203  $this->output( "RENAME FAILED, COULD NOT CREATE $dir" );
204  $this->rollbackTransaction( $db, __METHOD__ );
205 
206  return;
207  }
208  }
209  if ( rename( $path, $finalPath ) ) {
210  $this->commitTransaction( $db, __METHOD__ );
211  } else {
212  $this->error( "RENAME FAILED" );
213  $this->rollbackTransaction( $db, __METHOD__ );
214  }
215  }
216  }
217 
218  private function appendTitle( $name, $suffix ) {
219  return preg_replace( '/^(.*)(\..*?)$/',
220  "\\1$suffix\\2", $name );
221  }
222 
223  private function buildSafeTitle( $name ) {
224  $x = preg_replace_callback(
225  '/([^' . Title::legalChars() . ']|~)/',
226  [ $this, 'hexChar' ],
227  $name );
228 
229  $test = Title::makeTitleSafe( NS_FILE, $x );
230  if ( $test === null || $test->getDBkey() !== $x ) {
231  $this->error( "Unable to generate safe title from '$name', got '$x'" );
232 
233  return false;
234  }
235 
236  return $x;
237  }
238 }
239 
240 $maintClass = CleanupImages::class;
241 require_once RUN_MAINTENANCE_IF_MAIN;
const NS_FILE
Definition: Defines.php:70
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
Maintenance script to clean up broken, unparseable upload filenames.
__construct()
Default constructor.
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
error( $err, $die=0)
Throw an error to the user.
beginTransaction(IDatabase $dbw, $fname)
Begin a transaction on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transaction on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
addDescription( $text)
Set the description text.
rollbackTransaction(IDatabase $dbw, $fname)
Rollback the transaction on a DB handle.
Service locator for MediaWiki core services.
static decodeCharReferences( $text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string.
Definition: Sanitizer.php:1362
Generic class to cleanup a database table.
progress( $updated)
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:734
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:664
$maintClass
$source
const DB_PRIMARY
Definition: defines.php:28