MediaWiki REL1_32
purgeChangedFiles.php
Go to the documentation of this file.
1<?php
24require_once __DIR__ . '/Maintenance.php';
25
37 private static $typeMappings = [
38 'created' => [
39 'upload' => [ 'upload' ],
40 'import' => [ 'upload', 'interwiki' ],
41 ],
42 'deleted' => [
43 'delete' => [ 'delete', 'revision' ],
44 'suppress' => [ 'delete', 'revision' ],
45 ],
46 'modified' => [
47 'upload' => [ 'overwrite', 'revert' ],
48 'move' => [ 'move', 'move_redir' ],
49 ],
50 ];
51
56
61
62 public function __construct() {
63 parent::__construct();
64 $this->addDescription( 'Scan the logging table and purge files and thumbnails.' );
65 $this->addOption( 'starttime', 'Starting timestamp', true, true );
66 $this->addOption( 'endtime', 'Ending timestamp', true, true );
67 $this->addOption( 'type', 'Comma-separated list of types of changes to send purges for (' .
68 implode( ',', array_keys( self::$typeMappings ) ) . ',all)', false, true );
69 $this->addOption( 'htcp-dest', 'HTCP announcement destination (IP:port)', false, true );
70 $this->addOption( 'dry-run', 'Do not send purge requests' );
71 $this->addOption( 'sleep-per-batch', 'Milliseconds to sleep between batches', false, true );
72 $this->addOption( 'verbose', 'Show more output', false, false, 'v' );
73 $this->setBatchSize( 100 );
74 }
75
76 public function execute() {
77 global $wgHTCPRouting;
78
79 if ( $this->hasOption( 'htcp-dest' ) ) {
80 $parts = explode( ':', $this->getOption( 'htcp-dest' ) );
81 if ( count( $parts ) < 2 ) {
82 // Add default htcp port
83 $parts[] = '4827';
84 }
85
86 // Route all HTCP messages to provided host:port
88 '' => [ 'host' => $parts[0], 'port' => $parts[1] ],
89 ];
90 $this->verbose( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" );
91 }
92
93 // Find out which actions we should be concerned with
94 $typeOpt = $this->getOption( 'type', 'all' );
95 $validTypes = array_keys( self::$typeMappings );
96 if ( $typeOpt === 'all' ) {
97 // Convert 'all' to all registered types
98 $typeOpt = implode( ',', $validTypes );
99 }
100 $typeList = explode( ',', $typeOpt );
101 foreach ( $typeList as $type ) {
102 if ( !in_array( $type, $validTypes ) ) {
103 $this->error( "\nERROR: Unknown type: {$type}\n" );
104 $this->maybeHelp( true );
105 }
106 }
107
108 // Validate the timestamps
109 $dbr = $this->getDB( DB_REPLICA );
110 $this->startTimestamp = $dbr->timestamp( $this->getOption( 'starttime' ) );
111 $this->endTimestamp = $dbr->timestamp( $this->getOption( 'endtime' ) );
112
113 if ( $this->startTimestamp > $this->endTimestamp ) {
114 $this->error( "\nERROR: starttime after endtime\n" );
115 $this->maybeHelp( true );
116 }
117
118 // Turn on verbose when dry-run is enabled
119 if ( $this->hasOption( 'dry-run' ) ) {
120 $this->mOptions['verbose'] = 1;
121 }
122
123 $this->verbose( 'Purging files that were: ' . implode( ', ', $typeList ) . "\n" );
124 foreach ( $typeList as $type ) {
125 $this->verbose( "Checking for {$type} files...\n" );
126 $this->purgeFromLogType( $type );
127 if ( !$this->hasOption( 'dry-run' ) ) {
128 $this->verbose( "...{$type} files purged.\n\n" );
129 }
130 }
131 }
132
138 protected function purgeFromLogType( $type ) {
139 $repo = RepoGroup::singleton()->getLocalRepo();
140 $dbr = $this->getDB( DB_REPLICA );
141
142 foreach ( self::$typeMappings[$type] as $logType => $logActions ) {
143 $this->verbose( "Scanning for {$logType}/" . implode( ',', $logActions ) . "\n" );
144
145 $res = $dbr->select(
146 'logging',
147 [ 'log_title', 'log_timestamp', 'log_params' ],
148 [
149 'log_namespace' => NS_FILE,
150 'log_type' => $logType,
151 'log_action' => $logActions,
152 'log_timestamp >= ' . $dbr->addQuotes( $this->startTimestamp ),
153 'log_timestamp <= ' . $dbr->addQuotes( $this->endTimestamp ),
154 ],
155 __METHOD__
156 );
157
158 $bSize = 0;
159 foreach ( $res as $row ) {
160 $file = $repo->newFile( Title::makeTitle( NS_FILE, $row->log_title ) );
161
162 if ( $this->hasOption( 'dry-run' ) ) {
163 $this->verbose( "{$type}[{$row->log_timestamp}]: {$row->log_title}\n" );
164 continue;
165 }
166
167 // Purge current version and its thumbnails
168 $file->purgeCache();
169 // Purge the old versions and their thumbnails
170 foreach ( $file->getHistory() as $oldFile ) {
171 $oldFile->purgeCache();
172 }
173
174 if ( $logType === 'delete' ) {
175 // If there is an orphaned storage file... delete it
176 if ( !$file->exists() && $repo->fileExists( $file->getPath() ) ) {
177 $dpath = $this->getDeletedPath( $repo, $file );
178 if ( $repo->fileExists( $dpath ) ) {
179 // Sanity check to avoid data loss
180 $repo->getBackend()->delete( [ 'src' => $file->getPath() ] );
181 $this->verbose( "Deleted orphan file: {$file->getPath()}.\n" );
182 } else {
183 $this->error( "File was not deleted: {$file->getPath()}.\n" );
184 }
185 }
186
187 // Purge items from fileachive table (rows are likely here)
188 $this->purgeFromArchiveTable( $repo, $file );
189 } elseif ( $logType === 'move' ) {
190 // Purge the target file as well
191
192 $params = unserialize( $row->log_params );
193 if ( isset( $params['4::target'] ) ) {
194 $target = $params['4::target'];
195 $targetFile = $repo->newFile( Title::makeTitle( NS_FILE, $target ) );
196 $targetFile->purgeCache();
197 $this->verbose( "Purged file {$target}; move target @{$row->log_timestamp}.\n" );
198 }
199 }
200
201 $this->verbose( "Purged file {$row->log_title}; {$type} @{$row->log_timestamp}.\n" );
202
203 if ( $this->hasOption( 'sleep-per-batch' ) && ++$bSize > $this->getBatchSize() ) {
204 $bSize = 0;
205 // sleep-per-batch is milliseconds, usleep wants micro seconds.
206 usleep( 1000 * (int)$this->getOption( 'sleep-per-batch' ) );
207 }
208 }
209 }
210 }
211
212 protected function purgeFromArchiveTable( LocalRepo $repo, LocalFile $file ) {
213 $dbr = $repo->getReplicaDB();
214 $res = $dbr->select(
215 'filearchive',
216 [ 'fa_archive_name' ],
217 [ 'fa_name' => $file->getName() ],
218 __METHOD__
219 );
220
221 foreach ( $res as $row ) {
222 if ( $row->fa_archive_name === null ) {
223 // Was not an old version (current version names checked already)
224 continue;
225 }
226 $ofile = $repo->newFromArchiveName( $file->getTitle(), $row->fa_archive_name );
227 // If there is an orphaned storage file still there...delete it
228 if ( !$file->exists() && $repo->fileExists( $ofile->getPath() ) ) {
229 $dpath = $this->getDeletedPath( $repo, $ofile );
230 if ( $repo->fileExists( $dpath ) ) {
231 // Sanity check to avoid data loss
232 $repo->getBackend()->delete( [ 'src' => $ofile->getPath() ] );
233 $this->output( "Deleted orphan file: {$ofile->getPath()}.\n" );
234 } else {
235 $this->error( "File was not deleted: {$ofile->getPath()}.\n" );
236 }
237 }
238 $file->purgeOldThumbnails( $row->fa_archive_name );
239 }
240 }
241
242 protected function getDeletedPath( LocalRepo $repo, LocalFile $file ) {
243 $hash = $repo->getFileSha1( $file->getPath() );
244 $key = "{$hash}.{$file->getExtension()}";
245
246 return $repo->getDeletedHashPath( $key ) . $key;
247 }
248
254 protected function verbose( $msg ) {
255 if ( $this->hasOption( 'verbose' ) ) {
256 $this->output( $msg );
257 }
258 }
259}
260
261$maintClass = PurgeChangedFiles::class;
262require_once RUN_MAINTENANCE_IF_MAIN;
unserialize( $serialized)
$wgHTCPRouting
Routing configuration for HTCP multicast purging.
fileExists( $file)
Checks existence of a file.
getFileSha1( $virtualUrl)
Get the sha1 (base 36) of a file with a given virtual URL/storage path.
getDeletedHashPath( $key)
Get a relative path for a deletion archive key, e.g.
getBackend()
Get the file backend instance.
Definition FileRepo.php:210
Class to represent a local file in the wiki's own database.
Definition LocalFile.php:46
purgeOldThumbnails( $archiveName)
Delete cached transformed files for an archived version only.
A repository that stores files in the local filesystem and registers them in the wiki's own database.
Definition LocalRepo.php:36
getReplicaDB()
Get a connection to the replica DB.
newFromArchiveName( $title, $archiveName)
Definition LocalRepo.php:85
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
output( $out, $channel=null)
Throw some output to the user.
getDB( $db, $groups=[], $wiki=false)
Returns a database to be used by current maintenance script.
hasOption( $name)
Checks to see if a particular option exists.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
maybeHelp( $force=false)
Maybe show the help.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Set the batch size.
Maintenance script that scans the deletion log and purges affected files within a timeframe.
execute()
Do the actual work.
__construct()
Default constructor.
purgeFromLogType( $type)
Purge cache and thumbnails for changes of the given type.
getDeletedPath(LocalRepo $repo, LocalFile $file)
purgeFromArchiveTable(LocalRepo $repo, LocalFile $file)
static array $typeMappings
Mapping from type option to log type and actions.
verbose( $msg)
Send an output message iff the 'verbose' option has been provided.
static singleton()
Get a RepoGroup instance.
Definition RepoGroup.php:59
$res
Definition database.txt:21
do that in ParserLimitReportFormat instead use this to modify the parameters of the image all existing parser cache entries will be invalid To avoid you ll need to handle that somehow(e.g. with the RejectParserCacheValue hook) because MediaWiki won 't do it for you. & $defaults error
Definition hooks.txt:2683
const NS_FILE
Definition Defines.php:70
require_once RUN_MAINTENANCE_IF_MAIN
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
$tester verbose
const DB_REPLICA
Definition defines.php:25
$params