MediaWiki  master
cleanupImages.php
Go to the documentation of this file.
1 <?php
29 
30 require_once __DIR__ . '/TableCleanup.php';
31 
37 class CleanupImages extends TableCleanup {
38  protected $defaultParams = [
39  'table' => 'image',
40  'conds' => [],
41  'index' => 'img_name',
42  'callback' => 'processRow',
43  ];
44 
46  private $repo;
47 
48  public function __construct() {
49  parent::__construct();
50  $this->addDescription( 'Script to clean up broken, unparseable upload filenames' );
51  }
52 
53  protected function processRow( $row ) {
54  $source = $row->img_name;
55  if ( $source == '' ) {
56  // Ye olde empty rows. Just kill them.
57  $this->killRow( $source );
58 
59  return $this->progress( 1 );
60  }
61 
62  $cleaned = $source;
63 
64  // About half of old bad image names have percent-codes
65  $cleaned = rawurldecode( $cleaned );
66 
67  // We also have some HTML entities there
68  $cleaned = Sanitizer::decodeCharReferences( $cleaned );
69 
70  $contLang = MediaWikiServices::getInstance()->getContentLanguage();
71 
72  // Some are old latin-1
73  $cleaned = $contLang->checkTitleEncoding( $cleaned );
74 
75  // Many of remainder look like non-normalized unicode
76  $cleaned = $contLang->normalize( $cleaned );
77 
78  $title = Title::makeTitleSafe( NS_FILE, $cleaned );
79 
80  if ( $title === null ) {
81  $this->output( "page $source ($cleaned) is illegal.\n" );
82  $safe = $this->buildSafeTitle( $cleaned );
83  if ( $safe === false ) {
84  return $this->progress( 0 );
85  }
86  $this->pokeFile( $source, $safe );
87 
88  return $this->progress( 1 );
89  }
90 
91  if ( $title->getDBkey() !== $source ) {
92  $munged = $title->getDBkey();
93  $this->output( "page $source ($munged) doesn't match self.\n" );
94  $this->pokeFile( $source, $munged );
95 
96  return $this->progress( 1 );
97  }
98 
99  return $this->progress( 0 );
100  }
101 
105  private function killRow( $name ) {
106  if ( $this->dryrun ) {
107  $this->output( "DRY RUN: would delete bogus row '$name'\n" );
108  } else {
109  $this->output( "deleting bogus row '$name'\n" );
110  $db = $this->getDB( DB_PRIMARY );
111  $db->delete( 'image',
112  [ 'img_name' => $name ],
113  __METHOD__ );
114  }
115  }
116 
121  private function filePath( $name ) {
122  if ( $this->repo === null ) {
123  $this->repo = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo();
124  }
125 
126  return $this->repo->getRootDirectory() . '/' . $this->repo->getHashPath( $name ) . $name;
127  }
128 
129  private function imageExists( $name, $db ) {
130  return (bool)$db->selectField( 'image', '1', [ 'img_name' => $name ], __METHOD__ );
131  }
132 
133  private function pageExists( $name, $db ) {
134  return (bool)$db->selectField( 'page', '1',
135  [ 'page_namespace' => NS_FILE, 'page_title' => $name ],
136  __METHOD__
137  );
138  }
139 
140  private function pokeFile( $orig, $new ) {
141  $path = $this->filePath( $orig );
142  if ( !file_exists( $path ) ) {
143  $this->output( "missing file: $path\n" );
144  $this->killRow( $orig );
145 
146  return;
147  }
148 
149  $db = $this->getDB( DB_PRIMARY );
150 
151  /*
152  * To prevent key collisions in the update() statements below,
153  * if the target title exists in the image table, or if both the
154  * original and target titles exist in the page table, append
155  * increasing version numbers until the target title exists in
156  * neither. (See also T18916.)
157  */
158  $version = 0;
159  $final = $new;
160  $conflict = ( $this->imageExists( $final, $db ) ||
161  ( $this->pageExists( $orig, $db ) && $this->pageExists( $final, $db ) ) );
162 
163  while ( $conflict ) {
164  $this->output( "Rename conflicts with '$final'...\n" );
165  $version++;
166  $final = $this->appendTitle( $new, "_$version" );
167  $conflict = ( $this->imageExists( $final, $db ) || $this->pageExists( $final, $db ) );
168  }
169 
170  $finalPath = $this->filePath( $final );
171 
172  if ( $this->dryrun ) {
173  $this->output( "DRY RUN: would rename $path to $finalPath\n" );
174  } else {
175  $this->output( "renaming $path to $finalPath\n" );
176  // @todo FIXME: Should this use File::move()?
177  $this->beginTransaction( $db, __METHOD__ );
178  $db->update( 'image',
179  [ 'img_name' => $final ],
180  [ 'img_name' => $orig ],
181  __METHOD__ );
182  $db->update( 'oldimage',
183  [ 'oi_name' => $final ],
184  [ 'oi_name' => $orig ],
185  __METHOD__ );
186  $db->update( 'page',
187  [ 'page_title' => $final ],
188  [ 'page_title' => $orig, 'page_namespace' => NS_FILE ],
189  __METHOD__ );
190  $dir = dirname( $finalPath );
191  if ( !file_exists( $dir ) ) {
192  if ( !wfMkdirParents( $dir, null, __METHOD__ ) ) {
193  $this->output( "RENAME FAILED, COULD NOT CREATE $dir" );
194  $this->rollbackTransaction( $db, __METHOD__ );
195 
196  return;
197  }
198  }
199  if ( rename( $path, $finalPath ) ) {
200  $this->commitTransaction( $db, __METHOD__ );
201  } else {
202  $this->error( "RENAME FAILED" );
203  $this->rollbackTransaction( $db, __METHOD__ );
204  }
205  }
206  }
207 
208  private function appendTitle( $name, $suffix ) {
209  return preg_replace( '/^(.*)(\..*?)$/',
210  "\\1$suffix\\2", $name );
211  }
212 
213  private function buildSafeTitle( $name ) {
214  $x = preg_replace_callback(
215  '/([^' . Title::legalChars() . ']|~)/',
216  [ $this, 'hexChar' ],
217  $name );
218 
219  $test = Title::makeTitleSafe( NS_FILE, $x );
220  if ( $test === null || $test->getDBkey() !== $x ) {
221  $this->error( "Unable to generate safe title from '$name', got '$x'" );
222 
223  return false;
224  }
225 
226  return $x;
227  }
228 }
229 
230 $maintClass = CleanupImages::class;
231 require_once RUN_MAINTENANCE_IF_MAIN;
wfMkdirParents
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
Definition: GlobalFunctions.php:1750
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:193
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:329
CleanupImages\$repo
LocalRepo null $repo
Definition: cleanupImages.php:46
CleanupImages\imageExists
imageExists( $name, $db)
Definition: cleanupImages.php:129
CleanupImages\processRow
processRow( $row)
Definition: cleanupImages.php:53
CleanupImages\$defaultParams
$defaultParams
Definition: cleanupImages.php:38
Maintenance\rollbackTransaction
rollbackTransaction(IDatabase $dbw, $fname)
Rollback the transaction on a DB handle.
Definition: Maintenance.php:1438
Maintenance\beginTransaction
beginTransaction(IDatabase $dbw, $fname)
Begin a transaction on a DB.
Definition: Maintenance.php:1393
CleanupImages\killRow
killRow( $name)
Definition: cleanupImages.php:105
$title
$title
Definition: testCompression.php:38
TableCleanup\progress
progress( $updated)
Definition: TableCleanup.php:76
Title\makeTitleSafe
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:677
TableCleanup
Generic class to cleanup a database table.
Definition: TableCleanup.php:31
$maintClass
$maintClass
Definition: cleanupImages.php:230
DB_PRIMARY
const DB_PRIMARY
Definition: defines.php:27
CleanupImages
Maintenance script to clean up broken, unparseable upload filenames.
Definition: cleanupImages.php:37
CleanupImages\__construct
__construct()
Default constructor.
Definition: cleanupImages.php:48
Maintenance\commitTransaction
commitTransaction(IDatabase $dbw, $fname)
Commit the transaction on a DB handle and wait for replica DBs to catch up.
Definition: Maintenance.php:1408
Maintenance\getDB
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
Definition: Maintenance.php:1362
CleanupImages\pokeFile
pokeFile( $orig, $new)
Definition: cleanupImages.php:140
$path
$path
Definition: NoLocalSettings.php:25
$source
$source
Definition: mwdoc-filter.php:34
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:464
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:435
CleanupImages\buildSafeTitle
buildSafeTitle( $name)
Definition: cleanupImages.php:213
Title\legalChars
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:737
Sanitizer\decodeCharReferences
static decodeCharReferences( $text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string.
Definition: Sanitizer.php:1229
CleanupImages\pageExists
pageExists( $name, $db)
Definition: cleanupImages.php:133
NS_FILE
const NS_FILE
Definition: Defines.php:70
CleanupImages\appendTitle
appendTitle( $name, $suffix)
Definition: cleanupImages.php:208
LocalRepo
A repository that stores files in the local filesystem and registers them in the wiki's own database.
Definition: LocalRepo.php:41
CleanupImages\filePath
filePath( $name)
Definition: cleanupImages.php:121