MediaWiki REL1_32
purgeChangedFiles.php
Go to the documentation of this file.
1<?php
24require_once __DIR__ . '/Maintenance.php';
25
37 private static $typeMappings = [
38 'created' => [
39 'upload' => [ 'upload' ],
40 'import' => [ 'upload', 'interwiki' ],
41 ],
42 'deleted' => [
43 'delete' => [ 'delete', 'revision' ],
44 'suppress' => [ 'delete', 'revision' ],
45 ],
46 'modified' => [
47 'upload' => [ 'overwrite', 'revert' ],
48 'move' => [ 'move', 'move_redir' ],
49 ],
50 ];
51
56
61
62 public function __construct() {
63 parent::__construct();
64 $this->addDescription( 'Scan the logging table and purge files and thumbnails.' );
65 $this->addOption( 'starttime', 'Starting timestamp', true, true );
66 $this->addOption( 'endtime', 'Ending timestamp', true, true );
67 $this->addOption( 'type', 'Comma-separated list of types of changes to send purges for (' .
68 implode( ',', array_keys( self::$typeMappings ) ) . ',all)', false, true );
69 $this->addOption( 'htcp-dest', 'HTCP announcement destination (IP:port)', false, true );
70 $this->addOption( 'dry-run', 'Do not send purge requests' );
71 $this->addOption( 'sleep-per-batch', 'Milliseconds to sleep between batches', false, true );
72 $this->addOption( 'verbose', 'Show more output', false, false, 'v' );
73 $this->setBatchSize( 100 );
74 }
75
76 public function execute() {
77 global $wgHTCPRouting;
78
79 if ( $this->hasOption( 'htcp-dest' ) ) {
80 $parts = explode( ':', $this->getOption( 'htcp-dest' ) );
81 if ( count( $parts ) < 2 ) {
82 // Add default htcp port
83 $parts[] = '4827';
84 }
85
86 // Route all HTCP messages to provided host:port
88 '' => [ 'host' => $parts[0], 'port' => $parts[1] ],
89 ];
90 $this->verbose( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" );
91 }
92
93 // Find out which actions we should be concerned with
94 $typeOpt = $this->getOption( 'type', 'all' );
95 $validTypes = array_keys( self::$typeMappings );
96 if ( $typeOpt === 'all' ) {
97 // Convert 'all' to all registered types
98 $typeOpt = implode( ',', $validTypes );
99 }
100 $typeList = explode( ',', $typeOpt );
101 foreach ( $typeList as $type ) {
102 if ( !in_array( $type, $validTypes ) ) {
103 $this->error( "\nERROR: Unknown type: {$type}\n" );
104 $this->maybeHelp( true );
105 }
106 }
107
108 // Validate the timestamps
109 $dbr = $this->getDB( DB_REPLICA );
110 $this->startTimestamp = $dbr->timestamp( $this->getOption( 'starttime' ) );
111 $this->endTimestamp = $dbr->timestamp( $this->getOption( 'endtime' ) );
112
113 if ( $this->startTimestamp > $this->endTimestamp ) {
114 $this->error( "\nERROR: starttime after endtime\n" );
115 $this->maybeHelp( true );
116 }
117
118 // Turn on verbose when dry-run is enabled
119 if ( $this->hasOption( 'dry-run' ) ) {
120 $this->mOptions['verbose'] = 1;
121 }
122
123 $this->verbose( 'Purging files that were: ' . implode( ', ', $typeList ) . "\n" );
124 foreach ( $typeList as $type ) {
125 $this->verbose( "Checking for {$type} files...\n" );
126 $this->purgeFromLogType( $type );
127 if ( !$this->hasOption( 'dry-run' ) ) {
128 $this->verbose( "...{$type} files purged.\n\n" );
129 }
130 }
131 }
132
138 protected function purgeFromLogType( $type ) {
139 $repo = RepoGroup::singleton()->getLocalRepo();
140 $dbr = $this->getDB( DB_REPLICA );
141
142 foreach ( self::$typeMappings[$type] as $logType => $logActions ) {
143 $this->verbose( "Scanning for {$logType}/" . implode( ',', $logActions ) . "\n" );
144
145 $res = $dbr->select(
146 'logging',
147 [ 'log_title', 'log_timestamp', 'log_params' ],
148 [
149 'log_namespace' => NS_FILE,
150 'log_type' => $logType,
151 'log_action' => $logActions,
152 'log_timestamp >= ' . $dbr->addQuotes( $this->startTimestamp ),
153 'log_timestamp <= ' . $dbr->addQuotes( $this->endTimestamp ),
154 ],
155 __METHOD__
156 );
157
158 $bSize = 0;
159 foreach ( $res as $row ) {
160 $file = $repo->newFile( Title::makeTitle( NS_FILE, $row->log_title ) );
161
162 if ( $this->hasOption( 'dry-run' ) ) {
163 $this->verbose( "{$type}[{$row->log_timestamp}]: {$row->log_title}\n" );
164 continue;
165 }
166
167 // Purge current version and its thumbnails
168 $file->purgeCache();
169 // Purge the old versions and their thumbnails
170 foreach ( $file->getHistory() as $oldFile ) {
171 $oldFile->purgeCache();
172 }
173
174 if ( $logType === 'delete' ) {
175 // If there is an orphaned storage file... delete it
176 if ( !$file->exists() && $repo->fileExists( $file->getPath() ) ) {
177 $dpath = $this->getDeletedPath( $repo, $file );
178 if ( $repo->fileExists( $dpath ) ) {
179 // Sanity check to avoid data loss
180 $repo->getBackend()->delete( [ 'src' => $file->getPath() ] );
181 $this->verbose( "Deleted orphan file: {$file->getPath()}.\n" );
182 } else {
183 $this->error( "File was not deleted: {$file->getPath()}.\n" );
184 }
185 }
186
187 // Purge items from fileachive table (rows are likely here)
188 $this->purgeFromArchiveTable( $repo, $file );
189 } elseif ( $logType === 'move' ) {
190 // Purge the target file as well
191
192 $params = unserialize( $row->log_params );
193 if ( isset( $params['4::target'] ) ) {
194 $target = $params['4::target'];
195 $targetFile = $repo->newFile( Title::makeTitle( NS_FILE, $target ) );
196 $targetFile->purgeCache();
197 $this->verbose( "Purged file {$target}; move target @{$row->log_timestamp}.\n" );
198 }
199 }
200
201 $this->verbose( "Purged file {$row->log_title}; {$type} @{$row->log_timestamp}.\n" );
202
203 if ( $this->hasOption( 'sleep-per-batch' ) && ++$bSize > $this->getBatchSize() ) {
204 $bSize = 0;
205 // sleep-per-batch is milliseconds, usleep wants micro seconds.
206 usleep( 1000 * (int)$this->getOption( 'sleep-per-batch' ) );
207 }
208 }
209 }
210 }
211
212 protected function purgeFromArchiveTable( LocalRepo $repo, LocalFile $file ) {
213 $dbr = $repo->getReplicaDB();
214 $res = $dbr->select(
215 'filearchive',
216 [ 'fa_archive_name' ],
217 [ 'fa_name' => $file->getName() ],
218 __METHOD__
219 );
220
221 foreach ( $res as $row ) {
222 if ( $row->fa_archive_name === null ) {
223 // Was not an old version (current version names checked already)
224 continue;
225 }
226 $ofile = $repo->newFromArchiveName( $file->getTitle(), $row->fa_archive_name );
227 // If there is an orphaned storage file still there...delete it
228 if ( !$file->exists() && $repo->fileExists( $ofile->getPath() ) ) {
229 $dpath = $this->getDeletedPath( $repo, $ofile );
230 if ( $repo->fileExists( $dpath ) ) {
231 // Sanity check to avoid data loss
232 $repo->getBackend()->delete( [ 'src' => $ofile->getPath() ] );
233 $this->output( "Deleted orphan file: {$ofile->getPath()}.\n" );
234 } else {
235 $this->error( "File was not deleted: {$ofile->getPath()}.\n" );
236 }
237 }
238 $file->purgeOldThumbnails( $row->fa_archive_name );
239 }
240 }
241
242 protected function getDeletedPath( LocalRepo $repo, LocalFile $file ) {
243 $hash = $repo->getFileSha1( $file->getPath() );
244 $key = "{$hash}.{$file->getExtension()}";
245
246 return $repo->getDeletedHashPath( $key ) . $key;
247 }
248
254 protected function verbose( $msg ) {
255 if ( $this->hasOption( 'verbose' ) ) {
256 $this->output( $msg );
257 }
258 }
259}
260
261$maintClass = PurgeChangedFiles::class;
262require_once RUN_MAINTENANCE_IF_MAIN;
unserialize( $serialized)
$wgHTCPRouting
Routing configuration for HTCP multicast purging.
fileExists( $file)
Checks existence of a file.
getFileSha1( $virtualUrl)
Get the sha1 (base 36) of a file with a given virtual URL/storage path.
getDeletedHashPath( $key)
Get a relative path for a deletion archive key, e.g.
getBackend()
Get the file backend instance.
Definition FileRepo.php:210
getPath()
Return the storage path to the file.
Definition File.php:417
getName()
Return the name of this file.
Definition File.php:297
getTitle()
Return the associated title object.
Definition File.php:326
Class to represent a local file in the wiki's own database.
Definition LocalFile.php:46
exists()
canRender inherited
purgeOldThumbnails( $archiveName)
Delete cached transformed files for an archived version only.
A repository that stores files in the local filesystem and registers them in the wiki's own database.
Definition LocalRepo.php:36
getReplicaDB()
Get a connection to the replica DB.
newFromArchiveName( $title, $archiveName)
Definition LocalRepo.php:85
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
getDB( $db, $groups=[], $wiki=false)
Returns a database to be used by current maintenance script.
hasOption( $name)
Checks to see if a particular option exists.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
maybeHelp( $force=false)
Maybe show the help.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Set the batch size.
Maintenance script that scans the deletion log and purges affected files within a timeframe.
execute()
Do the actual work.
__construct()
Default constructor.
purgeFromLogType( $type)
Purge cache and thumbnails for changes of the given type.
getDeletedPath(LocalRepo $repo, LocalFile $file)
purgeFromArchiveTable(LocalRepo $repo, LocalFile $file)
static array $typeMappings
Mapping from type option to log type and actions.
verbose( $msg)
Send an output message iff the 'verbose' option has been provided.
static singleton()
Get a RepoGroup instance.
Definition RepoGroup.php:59
$res
Definition database.txt:21
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
const NS_FILE
Definition Defines.php:70
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
require_once RUN_MAINTENANCE_IF_MAIN
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
$tester verbose
const DB_REPLICA
Definition defines.php:25
$params