MediaWiki  master
cleanupImages.php
Go to the documentation of this file.
1 <?php
29 
30 require_once __DIR__ . '/cleanupTable.inc';
31 
37 class CleanupImages extends TableCleanup {
38  protected $defaultParams = [
39  'table' => 'image',
40  'conds' => [],
41  'index' => 'img_name',
42  'callback' => 'processRow',
43  ];
44 
46  private $repo;
47 
48  public function __construct() {
49  parent::__construct();
50  $this->addDescription( 'Script to clean up broken, unparseable upload filenames' );
51  }
52 
53  protected function processRow( $row ) {
54  $source = $row->img_name;
55  if ( $source == '' ) {
56  // Ye olde empty rows. Just kill them.
57  $this->killRow( $source );
58 
59  return $this->progress( 1 );
60  }
61 
62  $cleaned = $source;
63 
64  // About half of old bad image names have percent-codes
65  $cleaned = rawurldecode( $cleaned );
66 
67  // We also have some HTML entities there
68  $cleaned = Sanitizer::decodeCharReferences( $cleaned );
69 
70  $contLang = MediaWikiServices::getInstance()->getContentLanguage();
71 
72  // Some are old latin-1
73  $cleaned = $contLang->checkTitleEncoding( $cleaned );
74 
75  // Many of remainder look like non-normalized unicode
76  $cleaned = $contLang->normalize( $cleaned );
77 
78  $title = Title::makeTitleSafe( NS_FILE, $cleaned );
79 
80  if ( is_null( $title ) ) {
81  $this->output( "page $source ($cleaned) is illegal.\n" );
82  $safe = $this->buildSafeTitle( $cleaned );
83  if ( $safe === false ) {
84  return $this->progress( 0 );
85  }
86  $this->pokeFile( $source, $safe );
87 
88  return $this->progress( 1 );
89  }
90 
91  if ( $title->getDBkey() !== $source ) {
92  $munged = $title->getDBkey();
93  $this->output( "page $source ($munged) doesn't match self.\n" );
94  $this->pokeFile( $source, $munged );
95 
96  return $this->progress( 1 );
97  }
98 
99  return $this->progress( 0 );
100  }
101 
105  private function killRow( $name ) {
106  if ( $this->dryrun ) {
107  $this->output( "DRY RUN: would delete bogus row '$name'\n" );
108  } else {
109  $this->output( "deleting bogus row '$name'\n" );
110  $db = $this->getDB( DB_MASTER );
111  $db->delete( 'image',
112  [ 'img_name' => $name ],
113  __METHOD__ );
114  }
115  }
116 
121  private function filePath( $name ) {
122  if ( $this->repo === null ) {
123  $this->repo = RepoGroup::singleton()->getLocalRepo();
124  }
125 
126  return $this->repo->getRootDirectory() . '/' . $this->repo->getHashPath( $name ) . $name;
127  }
128 
129  private function imageExists( $name, $db ) {
130  return $db->selectField( 'image', '1', [ 'img_name' => $name ], __METHOD__ );
131  }
132 
133  private function pageExists( $name, $db ) {
134  return $db->selectField(
135  'page',
136  '1',
137  [ 'page_namespace' => NS_FILE, 'page_title' => $name ],
138  __METHOD__
139  );
140  }
141 
142  private function pokeFile( $orig, $new ) {
143  $path = $this->filePath( $orig );
144  if ( !file_exists( $path ) ) {
145  $this->output( "missing file: $path\n" );
146  $this->killRow( $orig );
147 
148  return;
149  }
150 
151  $db = $this->getDB( DB_MASTER );
152 
153  /*
154  * To prevent key collisions in the update() statements below,
155  * if the target title exists in the image table, or if both the
156  * original and target titles exist in the page table, append
157  * increasing version numbers until the target title exists in
158  * neither. (See also T18916.)
159  */
160  $version = 0;
161  $final = $new;
162  $conflict = ( $this->imageExists( $final, $db ) ||
163  ( $this->pageExists( $orig, $db ) && $this->pageExists( $final, $db ) ) );
164 
165  while ( $conflict ) {
166  $this->output( "Rename conflicts with '$final'...\n" );
167  $version++;
168  $final = $this->appendTitle( $new, "_$version" );
169  $conflict = ( $this->imageExists( $final, $db ) || $this->pageExists( $final, $db ) );
170  }
171 
172  $finalPath = $this->filePath( $final );
173 
174  if ( $this->dryrun ) {
175  $this->output( "DRY RUN: would rename $path to $finalPath\n" );
176  } else {
177  $this->output( "renaming $path to $finalPath\n" );
178  // @todo FIXME: Should this use File::move()?
179  $this->beginTransaction( $db, __METHOD__ );
180  $db->update( 'image',
181  [ 'img_name' => $final ],
182  [ 'img_name' => $orig ],
183  __METHOD__ );
184  $db->update( 'oldimage',
185  [ 'oi_name' => $final ],
186  [ 'oi_name' => $orig ],
187  __METHOD__ );
188  $db->update( 'page',
189  [ 'page_title' => $final ],
190  [ 'page_title' => $orig, 'page_namespace' => NS_FILE ],
191  __METHOD__ );
192  $dir = dirname( $finalPath );
193  if ( !file_exists( $dir ) ) {
194  if ( !wfMkdirParents( $dir, null, __METHOD__ ) ) {
195  $this->output( "RENAME FAILED, COULD NOT CREATE $dir" );
196  $this->rollbackTransaction( $db, __METHOD__ );
197 
198  return;
199  }
200  }
201  if ( rename( $path, $finalPath ) ) {
202  $this->commitTransaction( $db, __METHOD__ );
203  } else {
204  $this->error( "RENAME FAILED" );
205  $this->rollbackTransaction( $db, __METHOD__ );
206  }
207  }
208  }
209 
210  private function appendTitle( $name, $suffix ) {
211  return preg_replace( '/^(.*)(\..*?)$/',
212  "\\1$suffix\\2", $name );
213  }
214 
215  private function buildSafeTitle( $name ) {
216  $x = preg_replace_callback(
217  '/([^' . Title::legalChars() . ']|~)/',
218  [ $this, 'hexChar' ],
219  $name );
220 
221  $test = Title::makeTitleSafe( NS_FILE, $x );
222  if ( is_null( $test ) || $test->getDBkey() !== $x ) {
223  $this->error( "Unable to generate safe title from '$name', got '$x'" );
224 
225  return false;
226  }
227 
228  return $x;
229  }
230 }
231 
232 $maintClass = CleanupImages::class;
233 require_once RUN_MAINTENANCE_IF_MAIN;
commitTransaction(IDatabase $dbw, $fname)
Commit the transcation on a DB handle and wait for replica DBs to catch up.
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:39
error( $err, $die=0)
Throw an error to the user.
imageExists( $name, $db)
rollbackTransaction(IDatabase $dbw, $fname)
Rollback the transcation on a DB handle.
$source
const DB_MASTER
Definition: defines.php:26
$maintClass
LocalRepo null $repo
Generic class to cleanup a database table.
static singleton()
Definition: RepoGroup.php:60
addDescription( $text)
Set the description text.
pageExists( $name, $db)
Maintenance script to clean up broken, unparseable upload filenames.
pokeFile( $orig, $new)
const NS_FILE
Definition: Defines.php:66
output( $out, $channel=null)
Throw some output to the user.
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:612
progress( $updated)
buildSafeTitle( $name)
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don&#39;t exist.
static decodeCharReferences( $text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
Definition: Sanitizer.php:1669
appendTitle( $name, $suffix)
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:694
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
beginTransaction(IDatabase $dbw, $fname)
Begin a transcation on a DB.