MediaWiki  master
purgeChangedFiles.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
37  private static $typeMappings = [
38  'created' => [
39  'upload' => [ 'upload' ],
40  'import' => [ 'upload', 'interwiki' ],
41  ],
42  'deleted' => [
43  'delete' => [ 'delete', 'revision' ],
44  'suppress' => [ 'delete', 'revision' ],
45  ],
46  'modified' => [
47  'upload' => [ 'overwrite', 'revert' ],
48  'move' => [ 'move', 'move_redir' ],
49  ],
50  ];
51 
55  private $startTimestamp;
56 
60  private $endTimestamp;
61 
62  public function __construct() {
63  parent::__construct();
64  $this->addDescription( 'Scan the logging table and purge files and thumbnails.' );
65  $this->addOption( 'starttime', 'Starting timestamp', true, true );
66  $this->addOption( 'endtime', 'Ending timestamp', true, true );
67  $this->addOption( 'type', 'Comma-separated list of types of changes to send purges for (' .
68  implode( ',', array_keys( self::$typeMappings ) ) . ',all)', false, true );
69  $this->addOption( 'htcp-dest', 'HTCP announcement destination (IP:port)', false, true );
70  $this->addOption( 'dry-run', 'Do not send purge requests' );
71  $this->addOption( 'sleep-per-batch', 'Milliseconds to sleep between batches', false, true );
72  $this->addOption( 'verbose', 'Show more output', false, false, 'v' );
73  $this->setBatchSize( 100 );
74  }
75 
76  public function execute() {
77  global $wgHTCPRouting;
78 
79  if ( $this->hasOption( 'htcp-dest' ) ) {
80  $parts = explode( ':', $this->getOption( 'htcp-dest' ), 2 );
81  if ( count( $parts ) < 2 ) {
82  // Add default htcp port
83  $parts[] = '4827';
84  }
85 
86  // Route all HTCP messages to provided host:port
87  $wgHTCPRouting = [
88  '' => [ 'host' => $parts[0], 'port' => $parts[1] ],
89  ];
90  $this->verbose( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" );
91  }
92 
93  // Find out which actions we should be concerned with
94  $typeOpt = $this->getOption( 'type', 'all' );
95  $validTypes = array_keys( self::$typeMappings );
96  if ( $typeOpt === 'all' ) {
97  // Convert 'all' to all registered types
98  $typeOpt = implode( ',', $validTypes );
99  }
100  $typeList = explode( ',', $typeOpt );
101  foreach ( $typeList as $type ) {
102  if ( !in_array( $type, $validTypes ) ) {
103  $this->error( "\nERROR: Unknown type: {$type}\n" );
104  $this->maybeHelp( true );
105  }
106  }
107 
108  // Validate the timestamps
109  $dbr = $this->getDB( DB_REPLICA );
110  $this->startTimestamp = $dbr->timestamp( $this->getOption( 'starttime' ) );
111  $this->endTimestamp = $dbr->timestamp( $this->getOption( 'endtime' ) );
112 
113  if ( $this->startTimestamp > $this->endTimestamp ) {
114  $this->error( "\nERROR: starttime after endtime\n" );
115  $this->maybeHelp( true );
116  }
117 
118  // Turn on verbose when dry-run is enabled
119  if ( $this->hasOption( 'dry-run' ) ) {
120  $this->mOptions['verbose'] = 1;
121  }
122 
123  $this->verbose( 'Purging files that were: ' . implode( ', ', $typeList ) . "\n" );
124  foreach ( $typeList as $type ) {
125  $this->verbose( "Checking for {$type} files...\n" );
126  $this->purgeFromLogType( $type );
127  if ( !$this->hasOption( 'dry-run' ) ) {
128  $this->verbose( "...{$type} files purged.\n\n" );
129  }
130  }
131  }
132 
138  protected function purgeFromLogType( $type ) {
139  $repo = RepoGroup::singleton()->getLocalRepo();
140  $dbr = $this->getDB( DB_REPLICA );
141 
142  foreach ( self::$typeMappings[$type] as $logType => $logActions ) {
143  $this->verbose( "Scanning for {$logType}/" . implode( ',', $logActions ) . "\n" );
144 
145  $res = $dbr->select(
146  'logging',
147  [ 'log_title', 'log_timestamp', 'log_params' ],
148  [
149  'log_namespace' => NS_FILE,
150  'log_type' => $logType,
151  'log_action' => $logActions,
152  'log_timestamp >= ' . $dbr->addQuotes( $this->startTimestamp ),
153  'log_timestamp <= ' . $dbr->addQuotes( $this->endTimestamp ),
154  ],
155  __METHOD__
156  );
157 
158  $bSize = 0;
159  foreach ( $res as $row ) {
160  $file = $repo->newFile( Title::makeTitle( NS_FILE, $row->log_title ) );
161 
162  if ( $this->hasOption( 'dry-run' ) ) {
163  $this->verbose( "{$type}[{$row->log_timestamp}]: {$row->log_title}\n" );
164  continue;
165  }
166 
167  // Purge current version and its thumbnails
168  $file->purgeCache();
169  // Purge the old versions and their thumbnails
170  foreach ( $file->getHistory() as $oldFile ) {
171  $oldFile->purgeCache();
172  }
173 
174  if ( $logType === 'delete' ) {
175  // If there is an orphaned storage file... delete it
176  if ( !$file->exists() && $repo->fileExists( $file->getPath() ) ) {
177  $dpath = $this->getDeletedPath( $repo, $file );
178  if ( $repo->fileExists( $dpath ) ) {
179  // Sanity check to avoid data loss
180  $repo->getBackend()->delete( [ 'src' => $file->getPath() ] );
181  $this->verbose( "Deleted orphan file: {$file->getPath()}.\n" );
182  } else {
183  $this->error( "File was not deleted: {$file->getPath()}.\n" );
184  }
185  }
186 
187  // Purge items from fileachive table (rows are likely here)
188  $this->purgeFromArchiveTable( $repo, $file );
189  } elseif ( $logType === 'move' ) {
190  // Purge the target file as well
191 
192  $params = unserialize( $row->log_params );
193  if ( isset( $params['4::target'] ) ) {
194  $target = $params['4::target'];
195  $targetFile = $repo->newFile( Title::makeTitle( NS_FILE, $target ) );
196  $targetFile->purgeCache();
197  $this->verbose( "Purged file {$target}; move target @{$row->log_timestamp}.\n" );
198  }
199  }
200 
201  $this->verbose( "Purged file {$row->log_title}; {$type} @{$row->log_timestamp}.\n" );
202 
203  if ( $this->hasOption( 'sleep-per-batch' ) && ++$bSize > $this->getBatchSize() ) {
204  $bSize = 0;
205  // sleep-per-batch is milliseconds, usleep wants micro seconds.
206  usleep( 1000 * (int)$this->getOption( 'sleep-per-batch' ) );
207  }
208  }
209  }
210  }
211 
212  protected function purgeFromArchiveTable( LocalRepo $repo, LocalFile $file ) {
213  $dbr = $repo->getReplicaDB();
214  $res = $dbr->select(
215  'filearchive',
216  [ 'fa_archive_name' ],
217  [ 'fa_name' => $file->getName() ],
218  __METHOD__
219  );
220 
221  foreach ( $res as $row ) {
222  if ( $row->fa_archive_name === null ) {
223  // Was not an old version (current version names checked already)
224  continue;
225  }
226  $ofile = $repo->newFromArchiveName( $file->getTitle(), $row->fa_archive_name );
227  // If there is an orphaned storage file still there...delete it
228  if ( !$file->exists() && $repo->fileExists( $ofile->getPath() ) ) {
229  $dpath = $this->getDeletedPath( $repo, $ofile );
230  if ( $repo->fileExists( $dpath ) ) {
231  // Sanity check to avoid data loss
232  $repo->getBackend()->delete( [ 'src' => $ofile->getPath() ] );
233  $this->output( "Deleted orphan file: {$ofile->getPath()}.\n" );
234  } else {
235  $this->error( "File was not deleted: {$ofile->getPath()}.\n" );
236  }
237  }
238  $file->purgeOldThumbnails( $row->fa_archive_name );
239  }
240  }
241 
242  protected function getDeletedPath( LocalRepo $repo, LocalFile $file ) {
243  $hash = $repo->getFileSha1( $file->getPath() );
244  $key = "{$hash}.{$file->getExtension()}";
245 
246  return $repo->getDeletedHashPath( $key ) . $key;
247  }
248 
254  protected function verbose( $msg ) {
255  if ( $this->hasOption( 'verbose' ) ) {
256  $this->output( $msg );
257  }
258  }
259 }
260 
262 require_once RUN_MAINTENANCE_IF_MAIN;
static array string $startTimestamp
exists()
canRender inherited
Definition: LocalFile.php:986
maybeHelp( $force=false)
Maybe show the help.
error( $err, $die=0)
Throw an error to the user.
newFromArchiveName( $title, $archiveName)
Definition: LocalRepo.php:85
getOption( $name, $default=null)
Get an option, or return the default.
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Definition: router.php:42
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: maintenance.txt:39
getReplicaDB()
Get a connection to the replica DB.
Definition: LocalRepo.php:468
A repository that stores files in the local filesystem and registers them in the wiki&#39;s own database...
Definition: LocalRepo.php:36
getName()
Return the name of this file.
Definition: File.php:307
setBatchSize( $s=0)
Set the batch size.
static array $typeMappings
Mapping from type option to log type and actions.
hasOption( $name)
Checks to see if a particular option exists.
require_once RUN_MAINTENANCE_IF_MAIN
Definition: maintenance.txt:50
getTitle()
Return the associated title object.
Definition: File.php:336
getBackend()
Get the file backend instance.
Definition: FileRepo.php:218
fileExists( $file)
Checks existence of a file.
Definition: FileRepo.php:1371
getPath()
Return the storage path to the file.
Definition: File.php:427
purgeFromArchiveTable(LocalRepo $repo, LocalFile $file)
verbose( $msg)
Send an output message iff the &#39;verbose&#39; option has been provided.
getFileSha1( $virtualUrl)
Get the sha1 (base 36) of a file with a given virtual URL/storage path.
Definition: FileRepo.php:1604
$res
Definition: database.txt:21
static singleton()
Definition: RepoGroup.php:60
addDescription( $text)
Set the description text.
$params
unserialize( $serialized)
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:773
const NS_FILE
Definition: Defines.php:66
output( $out, $channel=null)
Throw some output to the user.
purgeOldThumbnails( $archiveName)
Delete cached transformed files for an archived version only.
Definition: LocalFile.php:1060
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
Maintenance script that scans the deletion log and purges affected files within a timeframe...
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:592
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
you have access to all of the normal MediaWiki so you can get a DB use the etc For full docs on the Maintenance class
Definition: maintenance.txt:52
getBatchSize()
Returns batch size.
purgeFromLogType( $type)
Purge cache and thumbnails for changes of the given type.
Class to represent a local file in the wiki&#39;s own database.
Definition: LocalFile.php:56
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
const DB_REPLICA
Definition: defines.php:25
getDeletedHashPath( $key)
Get a relative path for a deletion archive key, e.g.
Definition: FileRepo.php:1500
getDeletedPath(LocalRepo $repo, LocalFile $file)
$wgHTCPRouting
Routing configuration for HTCP multicast purging.
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.