MediaWiki  1.28.0
purgeChangedFiles.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
37  private static $typeMappings = [
38  'created' => [
39  'upload' => [ 'upload' ],
40  'import' => [ 'upload', 'interwiki' ],
41  ],
42  'deleted' => [
43  'delete' => [ 'delete', 'revision' ],
44  'suppress' => [ 'delete', 'revision' ],
45  ],
46  'modified' => [
47  'upload' => [ 'overwrite', 'revert' ],
48  'move' => [ 'move', 'move_redir' ],
49  ],
50  ];
51 
55  private $startTimestamp;
56 
60  private $endTimestamp;
61 
62  public function __construct() {
63  parent::__construct();
64  $this->addDescription( 'Scan the logging table and purge files and thumbnails.' );
65  $this->addOption( 'starttime', 'Starting timestamp', true, true );
66  $this->addOption( 'endtime', 'Ending timestamp', true, true );
67  $this->addOption( 'type', 'Comma-separated list of types of changes to send purges for (' .
68  implode( ',', array_keys( self::$typeMappings ) ) . ',all)', false, true );
69  $this->addOption( 'htcp-dest', 'HTCP announcement destination (IP:port)', false, true );
70  $this->addOption( 'dry-run', 'Do not send purge requests' );
71  $this->addOption( 'sleep-per-batch', 'Milliseconds to sleep between batches', false, true );
72  $this->addOption( 'verbose', 'Show more output', false, false, 'v' );
73  $this->setBatchSize( 100 );
74  }
75 
76  public function execute() {
78 
79  if ( $this->hasOption( 'htcp-dest' ) ) {
80  $parts = explode( ':', $this->getOption( 'htcp-dest' ) );
81  if ( count( $parts ) < 2 ) {
82  // Add default htcp port
83  $parts[] = '4827';
84  }
85 
86  // Route all HTCP messages to provided host:port
87  $wgHTCPRouting = [
88  '' => [ 'host' => $parts[0], 'port' => $parts[1] ],
89  ];
90  $this->verbose( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" );
91  }
92 
93  // Find out which actions we should be concerned with
94  $typeOpt = $this->getOption( 'type', 'all' );
95  $validTypes = array_keys( self::$typeMappings );
96  if ( $typeOpt === 'all' ) {
97  // Convert 'all' to all registered types
98  $typeOpt = implode( ',', $validTypes );
99  }
100  $typeList = explode( ',', $typeOpt );
101  foreach ( $typeList as $type ) {
102  if ( !in_array( $type, $validTypes ) ) {
103  $this->error( "\nERROR: Unknown type: {$type}\n" );
104  $this->maybeHelp( true );
105  }
106  }
107 
108  // Validate the timestamps
109  $dbr = $this->getDB( DB_REPLICA );
110  $this->startTimestamp = $dbr->timestamp( $this->getOption( 'starttime' ) );
111  $this->endTimestamp = $dbr->timestamp( $this->getOption( 'endtime' ) );
112 
113  if ( $this->startTimestamp > $this->endTimestamp ) {
114  $this->error( "\nERROR: starttime after endtime\n" );
115  $this->maybeHelp( true );
116  }
117 
118  // Turn on verbose when dry-run is enabled
119  if ( $this->hasOption( 'dry-run' ) ) {
120  $this->mOptions['verbose'] = 1;
121  }
122 
123  $this->verbose( 'Purging files that were: ' . implode( ', ', $typeList ) . "\n" );
124  foreach ( $typeList as $type ) {
125  $this->verbose( "Checking for {$type} files...\n" );
126  $this->purgeFromLogType( $type );
127  if ( !$this->hasOption( 'dry-run' ) ) {
128  $this->verbose( "...{$type} files purged.\n\n" );
129  }
130  }
131  }
132 
138  protected function purgeFromLogType( $type ) {
139  $repo = RepoGroup::singleton()->getLocalRepo();
140  $dbr = $this->getDB( DB_REPLICA );
141 
142  foreach ( self::$typeMappings[$type] as $logType => $logActions ) {
143  $this->verbose( "Scanning for {$logType}/" . implode( ',', $logActions ) . "\n" );
144 
145  $res = $dbr->select(
146  'logging',
147  [ 'log_title', 'log_timestamp', 'log_params' ],
148  [
149  'log_namespace' => NS_FILE,
150  'log_type' => $logType,
151  'log_action' => $logActions,
152  'log_timestamp >= ' . $dbr->addQuotes( $this->startTimestamp ),
153  'log_timestamp <= ' . $dbr->addQuotes( $this->endTimestamp ),
154  ],
155  __METHOD__
156  );
157 
158  $bSize = 0;
159  foreach ( $res as $row ) {
160  $file = $repo->newFile( Title::makeTitle( NS_FILE, $row->log_title ) );
161 
162  if ( $this->hasOption( 'dry-run' ) ) {
163  $this->verbose( "{$type}[{$row->log_timestamp}]: {$row->log_title}\n" );
164  continue;
165  }
166 
167  // Purge current version and its thumbnails
168  $file->purgeCache();
169  // Purge the old versions and their thumbnails
170  foreach ( $file->getHistory() as $oldFile ) {
171  $oldFile->purgeCache();
172  }
173 
174  if ( $logType === 'delete' ) {
175  // If there is an orphaned storage file... delete it
176  if ( !$file->exists() && $repo->fileExists( $file->getPath() ) ) {
177  $dpath = $this->getDeletedPath( $repo, $file );
178  if ( $repo->fileExists( $dpath ) ) {
179  // Sanity check to avoid data loss
180  $repo->getBackend()->delete( [ 'src' => $file->getPath() ] );
181  $this->verbose( "Deleted orphan file: {$file->getPath()}.\n" );
182  } else {
183  $this->error( "File was not deleted: {$file->getPath()}.\n" );
184  }
185  }
186 
187  // Purge items from fileachive table (rows are likely here)
188  $this->purgeFromArchiveTable( $repo, $file );
189  } elseif ( $logType === 'move' ) {
190  // Purge the target file as well
191 
192  $params = unserialize( $row->log_params );
193  if ( isset( $params['4::target'] ) ) {
194  $target = $params['4::target'];
195  $targetFile = $repo->newFile( Title::makeTitle( NS_FILE, $target ) );
196  $targetFile->purgeCache();
197  $this->verbose( "Purged file {$target}; move target @{$row->log_timestamp}.\n" );
198  }
199  }
200 
201  $this->verbose( "Purged file {$row->log_title}; {$type} @{$row->log_timestamp}.\n" );
202 
203  if ( $this->hasOption( 'sleep-per-batch' ) && ++$bSize > $this->mBatchSize ) {
204  $bSize = 0;
205  // sleep-per-batch is milliseconds, usleep wants micro seconds.
206  usleep( 1000 * (int)$this->getOption( 'sleep-per-batch' ) );
207  }
208  }
209  }
210  }
211 
212  protected function purgeFromArchiveTable( LocalRepo $repo, LocalFile $file ) {
213  $dbr = $repo->getSlaveDB();
214  $res = $dbr->select(
215  'filearchive',
216  [ 'fa_archive_name' ],
217  [ 'fa_name' => $file->getName() ],
218  __METHOD__
219  );
220 
221  foreach ( $res as $row ) {
222  if ( $row->fa_archive_name === null ) {
223  // Was not an old version (current version names checked already)
224  continue;
225  }
226  $ofile = $repo->newFromArchiveName( $file->getTitle(), $row->fa_archive_name );
227  // If there is an orphaned storage file still there...delete it
228  if ( !$file->exists() && $repo->fileExists( $ofile->getPath() ) ) {
229  $dpath = $this->getDeletedPath( $repo, $ofile );
230  if ( $repo->fileExists( $dpath ) ) {
231  // Sanity check to avoid data loss
232  $repo->getBackend()->delete( [ 'src' => $ofile->getPath() ] );
233  $this->output( "Deleted orphan file: {$ofile->getPath()}.\n" );
234  } else {
235  $this->error( "File was not deleted: {$ofile->getPath()}.\n" );
236  }
237  }
238  $file->purgeOldThumbnails( $row->fa_archive_name );
239  }
240  }
241 
242  protected function getDeletedPath( LocalRepo $repo, LocalFile $file ) {
243  $hash = $repo->getFileSha1( $file->getPath() );
244  $key = "{$hash}.{$file->getExtension()}";
245 
246  return $repo->getDeletedHashPath( $key ) . $key;
247  }
248 
254  protected function verbose( $msg ) {
255  if ( $this->hasOption( 'verbose' ) ) {
256  $this->output( $msg );
257  }
258  }
259 }
260 
261 $maintClass = "PurgeChangedFiles";
262 require_once RUN_MAINTENANCE_IF_MAIN;
static array string $startTimestamp
exists()
canRender inherited
Definition: LocalFile.php:849
newFromArchiveName($title, $archiveName)
Definition: LocalRepo.php:80
getSlaveDB()
Get a connection to the replica DB.
Definition: LocalRepo.php:455
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: maintenance.txt:39
verbose($msg)
Send an output message iff the 'verbose' option has been provided.
A repository that stores files in the local filesystem and registers them in the wiki's own database...
Definition: LocalRepo.php:31
getDB($db, $groups=[], $wiki=false)
Returns a database to be used by current maintenance script.
hasOption($name)
Checks to see if a particular param exists.
getName()
Return the name of this file.
Definition: File.php:296
static array $typeMappings
Mapping from type option to log type and actions.
require_once RUN_MAINTENANCE_IF_MAIN
Definition: maintenance.txt:50
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
getTitle()
Return the associated title object.
Definition: File.php:325
getBackend()
Get the file backend instance.
Definition: FileRepo.php:215
getPath()
Return the storage path to the file.
Definition: File.php:416
addOption($name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getFileSha1($virtualUrl)
Get the sha1 (base 36) of a file with a given virtual URL/storage path.
Definition: FileRepo.php:1586
purgeFromArchiveTable(LocalRepo $repo, LocalFile $file)
unserialize($serialized)
Definition: ApiMessage.php:102
$res
Definition: database.txt:21
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:59
$params
purgeFromLogType($type)
Purge cache and thumbnails for changes of the given type.
fileExists($file)
Checks existence of a a file.
Definition: FileRepo.php:1353
const NS_FILE
Definition: Defines.php:62
addDescription($text)
Set the description text.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
getOption($name, $default=null)
Get an option, or return the default.
Maintenance script that scans the deletion log and purges affected files within a timeframe...
output($out, $channel=null)
Throw some output to the user.
purgeOldThumbnails($archiveName)
Delete cached transformed files for an archived version only.
Definition: LocalFile.php:922
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
maybeHelp($force=false)
Maybe show the help.
getDeletedHashPath($key)
Get a relative path for a deletion archive key, e.g.
Definition: FileRepo.php:1482
error($err, $die=0)
Throw an error to the user.
Class to represent a local file in the wiki's own database.
Definition: LocalFile.php:43
const DB_REPLICA
Definition: defines.php:22
setBatchSize($s=0)
Set the batch size.
getDeletedPath(LocalRepo $repo, LocalFile $file)
$wgHTCPRouting
Routing configuration for HTCP multicast purging.
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2491
static makeTitle($ns, $title, $fragment= '', $interwiki= '')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:511