MediaWiki master
findOrphanedFiles.php
Go to the documentation of this file.
1<?php
11
12// @codeCoverageIgnoreStart
13require_once __DIR__ . '/Maintenance.php';
14// @codeCoverageIgnoreEnd
15
17
18 public function __construct() {
19 parent::__construct();
20
21 $this->addDescription( "Find unregistered files in the 'public' repo zone." );
22 $this->addOption( 'subdir',
23 'Only scan files in this subdirectory (e.g. "a/a0")', false, true );
24 $this->addOption( 'verbose', "Mention file paths checked" );
25 $this->setBatchSize( 500 );
26 }
27
28 public function execute() {
29 $subdir = $this->getOption( 'subdir', '' );
30 $verbose = $this->hasOption( 'verbose' );
31
32 $repo = $this->getServiceContainer()->getRepoGroup()->getLocalRepo();
33 if ( $repo->hasSha1Storage() ) {
34 $this->fatalError( "Local repo uses SHA-1 file storage names; aborting." );
35 }
36
37 $directory = $repo->getZonePath( 'public' );
38 if ( $subdir != '' ) {
39 $directory .= "/$subdir/";
40 }
41
42 if ( $verbose ) {
43 $this->output( "Scanning files under $directory:\n" );
44 }
45
46 $list = $repo->getBackend()->getFileList( [ 'dir' => $directory ] );
47 if ( $list === null ) {
48 $this->fatalError( "Could not get file listing." );
49 }
50
51 $pathBatch = [];
52 foreach ( $list as $path ) {
53 if ( preg_match( '#^(thumb|deleted)/#', $path ) ) {
54 continue; // handle ugly nested containers on stock installs
55 }
56
57 $pathBatch[] = $path;
58 if ( count( $pathBatch ) >= $this->getBatchSize() ) {
59 $this->checkFiles( $repo, $pathBatch, $verbose );
60 $pathBatch = [];
61 }
62 }
63 $this->checkFiles( $repo, $pathBatch, $verbose );
64 }
65
66 protected function checkFiles( LocalRepo $repo, array $paths, bool $verbose ) {
67 if ( !count( $paths ) ) {
68 return;
69 }
70
71 $dbr = $repo->getReplicaDB();
72
73 $curNames = [];
74 $oldNames = [];
75 $imgIN = [];
76 $oiWheres = [];
77 foreach ( $paths as $path ) {
78 $name = basename( $path );
79 if ( preg_match( '#^archive/#', $path ) ) {
80 if ( $verbose ) {
81 $this->output( "Checking old file $name\n" );
82 }
83
84 $oldNames[] = $name;
85 [ , $base ] = explode( '!', $name, 2 ); // <TS::MW>!<img_name>
86 $oiWheres[] = $dbr->expr( 'oi_name', '=', $base )->and( 'oi_archive_name', '=', $name );
87 } else {
88 if ( $verbose ) {
89 $this->output( "Checking current file $name\n" );
90 }
91
92 $curNames[] = $name;
93 $imgIN[] = $name;
94 }
95 }
96
97 $res1 = FileSelectQueryBuilder::newForFile( $dbr )
98 ->where( $imgIN ? [ 'img_name' => $imgIN ] : '1=0' )
99 ->caller( __METHOD__ )
100 ->fetchResultSet();
101 $res2 = FileSelectQueryBuilder::newForOldFile( $dbr )
102 ->where( $oiWheres ? $dbr->orExpr( $oiWheres ) : '1=0' )
103 ->caller( __METHOD__ )
104 ->fetchResultSet();
105
106 $curNamesFound = [];
107 $oldNamesFound = [];
108
109 foreach ( $res1 as $row ) {
110 $curNamesFound[] = $row->img_name;
111 }
112 foreach ( $res2 as $row ) {
113 $oldNamesFound[] = $row->oi_name;
114 }
115
116 foreach ( array_diff( $curNames, $curNamesFound ) as $name ) {
117 $file = $repo->newFile( $name );
118 // Print name and public URL to ease recovery
119 if ( $file ) {
120 $this->output( $name . "\n" . $file->getCanonicalUrl() . "\n\n" );
121 } else {
122 $this->error( "Cannot get URL for bad file title '$name'" );
123 }
124 }
125
126 foreach ( array_diff( $oldNames, $oldNamesFound ) as $name ) {
127 [ , $base ] = explode( '!', $name, 2 ); // <TS::MW>!<img_name>
128 $file = $repo->newFromArchiveName( Title::makeTitle( NS_FILE, $base ), $name );
129 // Print name and public URL to ease recovery
130 $this->output( $name . "\n" . $file->getCanonicalUrl() . "\n\n" );
131 }
132 }
133}
134
135// @codeCoverageIgnoreStart
136$maintClass = FindOrphanedFiles::class;
137require_once RUN_MAINTENANCE_IF_MAIN;
138// @codeCoverageIgnoreEnd
const NS_FILE
Definition Defines.php:57
execute()
Do the actual work.
__construct()
Default constructor.
checkFiles(LocalRepo $repo, array $paths, bool $verbose)
newFile( $title, $time=false)
Create a new File object from the local repository.
Definition FileRepo.php:421
Local repository that stores files in the local filesystem and registers them in the wiki's own datab...
Definition LocalRepo.php:45
newFromArchiveName( $title, $archiveName)
getReplicaDB()
Get a connection to the replica DB.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
getBatchSize()
Returns batch size.
output( $out, $channel=null)
Throw some output to the user.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
error( $err, $die=0)
Throw an error to the user.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
Represents a title within MediaWiki.
Definition Title.php:69