MediaWiki  1.29.1
purgeChangedFiles.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
37  private static $typeMappings = [
38  'created' => [
39  'upload' => [ 'upload' ],
40  'import' => [ 'upload', 'interwiki' ],
41  ],
42  'deleted' => [
43  'delete' => [ 'delete', 'revision' ],
44  'suppress' => [ 'delete', 'revision' ],
45  ],
46  'modified' => [
47  'upload' => [ 'overwrite', 'revert' ],
48  'move' => [ 'move', 'move_redir' ],
49  ],
50  ];
51 
55  private $startTimestamp;
56 
60  private $endTimestamp;
61 
62  public function __construct() {
63  parent::__construct();
64  $this->addDescription( 'Scan the logging table and purge files and thumbnails.' );
65  $this->addOption( 'starttime', 'Starting timestamp', true, true );
66  $this->addOption( 'endtime', 'Ending timestamp', true, true );
67  $this->addOption( 'type', 'Comma-separated list of types of changes to send purges for (' .
68  implode( ',', array_keys( self::$typeMappings ) ) . ',all)', false, true );
69  $this->addOption( 'htcp-dest', 'HTCP announcement destination (IP:port)', false, true );
70  $this->addOption( 'dry-run', 'Do not send purge requests' );
71  $this->addOption( 'sleep-per-batch', 'Milliseconds to sleep between batches', false, true );
72  $this->addOption( 'verbose', 'Show more output', false, false, 'v' );
73  $this->setBatchSize( 100 );
74  }
75 
76  public function execute() {
78 
79  if ( $this->hasOption( 'htcp-dest' ) ) {
80  $parts = explode( ':', $this->getOption( 'htcp-dest' ) );
81  if ( count( $parts ) < 2 ) {
82  // Add default htcp port
83  $parts[] = '4827';
84  }
85 
86  // Route all HTCP messages to provided host:port
87  $wgHTCPRouting = [
88  '' => [ 'host' => $parts[0], 'port' => $parts[1] ],
89  ];
90  $this->verbose( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" );
91  }
92 
93  // Find out which actions we should be concerned with
94  $typeOpt = $this->getOption( 'type', 'all' );
95  $validTypes = array_keys( self::$typeMappings );
96  if ( $typeOpt === 'all' ) {
97  // Convert 'all' to all registered types
98  $typeOpt = implode( ',', $validTypes );
99  }
100  $typeList = explode( ',', $typeOpt );
101  foreach ( $typeList as $type ) {
102  if ( !in_array( $type, $validTypes ) ) {
103  $this->error( "\nERROR: Unknown type: {$type}\n" );
104  $this->maybeHelp( true );
105  }
106  }
107 
108  // Validate the timestamps
109  $dbr = $this->getDB( DB_REPLICA );
110  $this->startTimestamp = $dbr->timestamp( $this->getOption( 'starttime' ) );
111  $this->endTimestamp = $dbr->timestamp( $this->getOption( 'endtime' ) );
112 
113  if ( $this->startTimestamp > $this->endTimestamp ) {
114  $this->error( "\nERROR: starttime after endtime\n" );
115  $this->maybeHelp( true );
116  }
117 
118  // Turn on verbose when dry-run is enabled
119  if ( $this->hasOption( 'dry-run' ) ) {
120  $this->mOptions['verbose'] = 1;
121  }
122 
123  $this->verbose( 'Purging files that were: ' . implode( ', ', $typeList ) . "\n" );
124  foreach ( $typeList as $type ) {
125  $this->verbose( "Checking for {$type} files...\n" );
126  $this->purgeFromLogType( $type );
127  if ( !$this->hasOption( 'dry-run' ) ) {
128  $this->verbose( "...{$type} files purged.\n\n" );
129  }
130  }
131  }
132 
138  protected function purgeFromLogType( $type ) {
139  $repo = RepoGroup::singleton()->getLocalRepo();
140  $dbr = $this->getDB( DB_REPLICA );
141 
142  foreach ( self::$typeMappings[$type] as $logType => $logActions ) {
143  $this->verbose( "Scanning for {$logType}/" . implode( ',', $logActions ) . "\n" );
144 
145  $res = $dbr->select(
146  'logging',
147  [ 'log_title', 'log_timestamp', 'log_params' ],
148  [
149  'log_namespace' => NS_FILE,
150  'log_type' => $logType,
151  'log_action' => $logActions,
152  'log_timestamp >= ' . $dbr->addQuotes( $this->startTimestamp ),
153  'log_timestamp <= ' . $dbr->addQuotes( $this->endTimestamp ),
154  ],
155  __METHOD__
156  );
157 
158  $bSize = 0;
159  foreach ( $res as $row ) {
160  $file = $repo->newFile( Title::makeTitle( NS_FILE, $row->log_title ) );
161 
162  if ( $this->hasOption( 'dry-run' ) ) {
163  $this->verbose( "{$type}[{$row->log_timestamp}]: {$row->log_title}\n" );
164  continue;
165  }
166 
167  // Purge current version and its thumbnails
168  $file->purgeCache();
169  // Purge the old versions and their thumbnails
170  foreach ( $file->getHistory() as $oldFile ) {
171  $oldFile->purgeCache();
172  }
173 
174  if ( $logType === 'delete' ) {
175  // If there is an orphaned storage file... delete it
176  if ( !$file->exists() && $repo->fileExists( $file->getPath() ) ) {
177  $dpath = $this->getDeletedPath( $repo, $file );
178  if ( $repo->fileExists( $dpath ) ) {
179  // Sanity check to avoid data loss
180  $repo->getBackend()->delete( [ 'src' => $file->getPath() ] );
181  $this->verbose( "Deleted orphan file: {$file->getPath()}.\n" );
182  } else {
183  $this->error( "File was not deleted: {$file->getPath()}.\n" );
184  }
185  }
186 
187  // Purge items from fileachive table (rows are likely here)
188  $this->purgeFromArchiveTable( $repo, $file );
189  } elseif ( $logType === 'move' ) {
190  // Purge the target file as well
191 
192  $params = unserialize( $row->log_params );
193  if ( isset( $params['4::target'] ) ) {
194  $target = $params['4::target'];
195  $targetFile = $repo->newFile( Title::makeTitle( NS_FILE, $target ) );
196  $targetFile->purgeCache();
197  $this->verbose( "Purged file {$target}; move target @{$row->log_timestamp}.\n" );
198  }
199  }
200 
201  $this->verbose( "Purged file {$row->log_title}; {$type} @{$row->log_timestamp}.\n" );
202 
203  if ( $this->hasOption( 'sleep-per-batch' ) && ++$bSize > $this->mBatchSize ) {
204  $bSize = 0;
205  // sleep-per-batch is milliseconds, usleep wants micro seconds.
206  usleep( 1000 * (int)$this->getOption( 'sleep-per-batch' ) );
207  }
208  }
209  }
210  }
211 
212  protected function purgeFromArchiveTable( LocalRepo $repo, LocalFile $file ) {
213  $dbr = $repo->getReplicaDB();
214  $res = $dbr->select(
215  'filearchive',
216  [ 'fa_archive_name' ],
217  [ 'fa_name' => $file->getName() ],
218  __METHOD__
219  );
220 
221  foreach ( $res as $row ) {
222  if ( $row->fa_archive_name === null ) {
223  // Was not an old version (current version names checked already)
224  continue;
225  }
226  $ofile = $repo->newFromArchiveName( $file->getTitle(), $row->fa_archive_name );
227  // If there is an orphaned storage file still there...delete it
228  if ( !$file->exists() && $repo->fileExists( $ofile->getPath() ) ) {
229  $dpath = $this->getDeletedPath( $repo, $ofile );
230  if ( $repo->fileExists( $dpath ) ) {
231  // Sanity check to avoid data loss
232  $repo->getBackend()->delete( [ 'src' => $ofile->getPath() ] );
233  $this->output( "Deleted orphan file: {$ofile->getPath()}.\n" );
234  } else {
235  $this->error( "File was not deleted: {$ofile->getPath()}.\n" );
236  }
237  }
238  $file->purgeOldThumbnails( $row->fa_archive_name );
239  }
240  }
241 
242  protected function getDeletedPath( LocalRepo $repo, LocalFile $file ) {
243  $hash = $repo->getFileSha1( $file->getPath() );
244  $key = "{$hash}.{$file->getExtension()}";
245 
246  return $repo->getDeletedHashPath( $key ) . $key;
247  }
248 
254  protected function verbose( $msg ) {
255  if ( $this->hasOption( 'verbose' ) ) {
256  $this->output( $msg );
257  }
258  }
259 }
260 
261 $maintClass = "PurgeChangedFiles";
262 require_once RUN_MAINTENANCE_IF_MAIN;
File\getPath
getPath()
Return the storage path to the file.
Definition: File.php:417
RepoGroup\singleton
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:59
PurgeChangedFiles\$endTimestamp
string $endTimestamp
Definition: purgeChangedFiles.php:60
Maintenance\maybeHelp
maybeHelp( $force=false)
Maybe show the help.
Definition: Maintenance.php:952
$maintClass
$maintClass
Definition: purgeChangedFiles.php:261
captcha-old.count
count
Definition: captcha-old.py:225
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:287
LocalRepo\getReplicaDB
getReplicaDB()
Get a connection to the replica DB.
Definition: LocalRepo.php:459
unserialize
unserialize( $serialized)
Definition: ApiMessage.php:185
NS_FILE
const NS_FILE
Definition: Defines.php:68
RUN_MAINTENANCE_IF_MAIN
require_once RUN_MAINTENANCE_IF_MAIN
Definition: maintenance.txt:50
$params
$params
Definition: styleTest.css.php:40
LocalFile\purgeOldThumbnails
purgeOldThumbnails( $archiveName)
Delete cached transformed files for an archived version only.
Definition: LocalFile.php:933
PurgeChangedFiles\__construct
__construct()
Default constructor.
Definition: purgeChangedFiles.php:62
$res
$res
Definition: database.txt:21
Maintenance
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: maintenance.txt:39
$type
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2536
PurgeChangedFiles\verbose
verbose( $msg)
Send an output message iff the 'verbose' option has been provided.
Definition: purgeChangedFiles.php:254
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
PurgeChangedFiles\$typeMappings
static array $typeMappings
Mapping from type option to log type and actions.
Definition: purgeChangedFiles.php:37
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
Definition: Maintenance.php:215
PurgeChangedFiles\purgeFromArchiveTable
purgeFromArchiveTable(LocalRepo $repo, LocalFile $file)
Definition: purgeChangedFiles.php:212
$wgHTCPRouting
$wgHTCPRouting
Routing configuration for HTCP multicast purging.
Definition: DefaultSettings.php:2803
PurgeChangedFiles\$startTimestamp
string $startTimestamp
Definition: purgeChangedFiles.php:55
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:514
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
FileRepo\fileExists
fileExists( $file)
Checks existence of a a file.
Definition: FileRepo.php:1353
FileRepo\getFileSha1
getFileSha1( $virtualUrl)
Get the sha1 (base 36) of a file with a given virtual URL/storage path.
Definition: FileRepo.php:1586
LocalFile
Class to represent a local file in the wiki's own database.
Definition: LocalFile.php:45
LocalRepo\newFromArchiveName
newFromArchiveName( $title, $archiveName)
Definition: LocalRepo.php:84
File\getName
getName()
Return the name of this file.
Definition: File.php:297
FileRepo\getBackend
getBackend()
Get the file backend instance.
Definition: FileRepo.php:215
PurgeChangedFiles\execute
execute()
Do the actual work.
Definition: purgeChangedFiles.php:76
PurgeChangedFiles\getDeletedPath
getDeletedPath(LocalRepo $repo, LocalFile $file)
Definition: purgeChangedFiles.php:242
PurgeChangedFiles\purgeFromLogType
purgeFromLogType( $type)
Purge cache and thumbnails for changes of the given type.
Definition: purgeChangedFiles.php:138
File\getTitle
getTitle()
Return the associated title object.
Definition: File.php:326
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:250
$dbr
if(! $regexes) $dbr
Definition: cleanup.php:94
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
Maintenance\getDB
getDB( $db, $groups=[], $wiki=false)
Returns a database to be used by current maintenance script.
Definition: Maintenance.php:1251
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:392
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:373
Maintenance\hasOption
hasOption( $name)
Checks to see if a particular param exists.
Definition: Maintenance.php:236
FileRepo\getDeletedHashPath
getDeletedHashPath( $key)
Get a relative path for a deletion archive key, e.g.
Definition: FileRepo.php:1482
PurgeChangedFiles
Maintenance script that scans the deletion log and purges affected files within a timeframe.
Definition: purgeChangedFiles.php:32
LocalRepo
A repository that stores files in the local filesystem and registers them in the wiki's own database.
Definition: LocalRepo.php:35
LocalFile\exists
exists()
canRender inherited
Definition: LocalFile.php:860
array
the array() calling protocol came about after MediaWiki 1.4rc1.
Maintenance\setBatchSize
setBatchSize( $s=0)
Set the batch size.
Definition: Maintenance.php:314