MediaWiki  REL1_33
purgeChangedFiles.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
37  private static $typeMappings = [
38  'created' => [
39  'upload' => [ 'upload' ],
40  'import' => [ 'upload', 'interwiki' ],
41  ],
42  'deleted' => [
43  'delete' => [ 'delete', 'revision' ],
44  'suppress' => [ 'delete', 'revision' ],
45  ],
46  'modified' => [
47  'upload' => [ 'overwrite', 'revert' ],
48  'move' => [ 'move', 'move_redir' ],
49  ],
50  ];
51 
55  private $startTimestamp;
56 
60  private $endTimestamp;
61 
62  public function __construct() {
63  parent::__construct();
64  $this->addDescription( 'Scan the logging table and purge files and thumbnails.' );
65  $this->addOption( 'starttime', 'Starting timestamp', true, true );
66  $this->addOption( 'endtime', 'Ending timestamp', true, true );
67  $this->addOption( 'type', 'Comma-separated list of types of changes to send purges for (' .
68  implode( ',', array_keys( self::$typeMappings ) ) . ',all)', false, true );
69  $this->addOption( 'htcp-dest', 'HTCP announcement destination (IP:port)', false, true );
70  $this->addOption( 'dry-run', 'Do not send purge requests' );
71  $this->addOption( 'sleep-per-batch', 'Milliseconds to sleep between batches', false, true );
72  $this->addOption( 'verbose', 'Show more output', false, false, 'v' );
73  $this->setBatchSize( 100 );
74  }
75 
76  public function execute() {
77  global $wgHTCPRouting;
78 
79  if ( $this->hasOption( 'htcp-dest' ) ) {
80  $parts = explode( ':', $this->getOption( 'htcp-dest' ), 2 );
81  if ( count( $parts ) < 2 ) {
82  // Add default htcp port
83  $parts[] = '4827';
84  }
85 
86  // Route all HTCP messages to provided host:port
87  $wgHTCPRouting = [
88  '' => [ 'host' => $parts[0], 'port' => $parts[1] ],
89  ];
90  $this->verbose( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" );
91  }
92 
93  // Find out which actions we should be concerned with
94  $typeOpt = $this->getOption( 'type', 'all' );
95  $validTypes = array_keys( self::$typeMappings );
96  if ( $typeOpt === 'all' ) {
97  // Convert 'all' to all registered types
98  $typeOpt = implode( ',', $validTypes );
99  }
100  $typeList = explode( ',', $typeOpt );
101  foreach ( $typeList as $type ) {
102  if ( !in_array( $type, $validTypes ) ) {
103  $this->error( "\nERROR: Unknown type: {$type}\n" );
104  $this->maybeHelp( true );
105  }
106  }
107 
108  // Validate the timestamps
109  $dbr = $this->getDB( DB_REPLICA );
110  $this->startTimestamp = $dbr->timestamp( $this->getOption( 'starttime' ) );
111  $this->endTimestamp = $dbr->timestamp( $this->getOption( 'endtime' ) );
112 
113  if ( $this->startTimestamp > $this->endTimestamp ) {
114  $this->error( "\nERROR: starttime after endtime\n" );
115  $this->maybeHelp( true );
116  }
117 
118  // Turn on verbose when dry-run is enabled
119  if ( $this->hasOption( 'dry-run' ) ) {
120  $this->mOptions['verbose'] = 1;
121  }
122 
123  $this->verbose( 'Purging files that were: ' . implode( ', ', $typeList ) . "\n" );
124  foreach ( $typeList as $type ) {
125  $this->verbose( "Checking for {$type} files...\n" );
126  $this->purgeFromLogType( $type );
127  if ( !$this->hasOption( 'dry-run' ) ) {
128  $this->verbose( "...{$type} files purged.\n\n" );
129  }
130  }
131  }
132 
138  protected function purgeFromLogType( $type ) {
139  $repo = RepoGroup::singleton()->getLocalRepo();
140  $dbr = $this->getDB( DB_REPLICA );
141 
142  foreach ( self::$typeMappings[$type] as $logType => $logActions ) {
143  $this->verbose( "Scanning for {$logType}/" . implode( ',', $logActions ) . "\n" );
144 
145  $res = $dbr->select(
146  'logging',
147  [ 'log_title', 'log_timestamp', 'log_params' ],
148  [
149  'log_namespace' => NS_FILE,
150  'log_type' => $logType,
151  'log_action' => $logActions,
152  'log_timestamp >= ' . $dbr->addQuotes( $this->startTimestamp ),
153  'log_timestamp <= ' . $dbr->addQuotes( $this->endTimestamp ),
154  ],
155  __METHOD__
156  );
157 
158  $bSize = 0;
159  foreach ( $res as $row ) {
160  $file = $repo->newFile( Title::makeTitle( NS_FILE, $row->log_title ) );
161 
162  if ( $this->hasOption( 'dry-run' ) ) {
163  $this->verbose( "{$type}[{$row->log_timestamp}]: {$row->log_title}\n" );
164  continue;
165  }
166 
167  // Purge current version and its thumbnails
168  $file->purgeCache();
169  // Purge the old versions and their thumbnails
170  foreach ( $file->getHistory() as $oldFile ) {
171  $oldFile->purgeCache();
172  }
173 
174  if ( $logType === 'delete' ) {
175  // If there is an orphaned storage file... delete it
176  if ( !$file->exists() && $repo->fileExists( $file->getPath() ) ) {
177  $dpath = $this->getDeletedPath( $repo, $file );
178  if ( $repo->fileExists( $dpath ) ) {
179  // Sanity check to avoid data loss
180  $repo->getBackend()->delete( [ 'src' => $file->getPath() ] );
181  $this->verbose( "Deleted orphan file: {$file->getPath()}.\n" );
182  } else {
183  $this->error( "File was not deleted: {$file->getPath()}.\n" );
184  }
185  }
186 
187  // Purge items from fileachive table (rows are likely here)
188  $this->purgeFromArchiveTable( $repo, $file );
189  } elseif ( $logType === 'move' ) {
190  // Purge the target file as well
191 
192  $params = unserialize( $row->log_params );
193  if ( isset( $params['4::target'] ) ) {
194  $target = $params['4::target'];
195  $targetFile = $repo->newFile( Title::makeTitle( NS_FILE, $target ) );
196  $targetFile->purgeCache();
197  $this->verbose( "Purged file {$target}; move target @{$row->log_timestamp}.\n" );
198  }
199  }
200 
201  $this->verbose( "Purged file {$row->log_title}; {$type} @{$row->log_timestamp}.\n" );
202 
203  if ( $this->hasOption( 'sleep-per-batch' ) && ++$bSize > $this->getBatchSize() ) {
204  $bSize = 0;
205  // sleep-per-batch is milliseconds, usleep wants micro seconds.
206  usleep( 1000 * (int)$this->getOption( 'sleep-per-batch' ) );
207  }
208  }
209  }
210  }
211 
212  protected function purgeFromArchiveTable( LocalRepo $repo, LocalFile $file ) {
213  $dbr = $repo->getReplicaDB();
214  $res = $dbr->select(
215  'filearchive',
216  [ 'fa_archive_name' ],
217  [ 'fa_name' => $file->getName() ],
218  __METHOD__
219  );
220 
221  foreach ( $res as $row ) {
222  if ( $row->fa_archive_name === null ) {
223  // Was not an old version (current version names checked already)
224  continue;
225  }
226  $ofile = $repo->newFromArchiveName( $file->getTitle(), $row->fa_archive_name );
227  // If there is an orphaned storage file still there...delete it
228  if ( !$file->exists() && $repo->fileExists( $ofile->getPath() ) ) {
229  $dpath = $this->getDeletedPath( $repo, $ofile );
230  if ( $repo->fileExists( $dpath ) ) {
231  // Sanity check to avoid data loss
232  $repo->getBackend()->delete( [ 'src' => $ofile->getPath() ] );
233  $this->output( "Deleted orphan file: {$ofile->getPath()}.\n" );
234  } else {
235  $this->error( "File was not deleted: {$ofile->getPath()}.\n" );
236  }
237  }
238  $file->purgeOldThumbnails( $row->fa_archive_name );
239  }
240  }
241 
242  protected function getDeletedPath( LocalRepo $repo, LocalFile $file ) {
243  $hash = $repo->getFileSha1( $file->getPath() );
244  $key = "{$hash}.{$file->getExtension()}";
245 
246  return $repo->getDeletedHashPath( $key ) . $key;
247  }
248 
254  protected function verbose( $msg ) {
255  if ( $this->hasOption( 'verbose' ) ) {
256  $this->output( $msg );
257  }
258  }
259 }
260 
262 require_once RUN_MAINTENANCE_IF_MAIN;
RepoGroup\singleton
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:61
$file
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Definition: router.php:42
PurgeChangedFiles\$endTimestamp
string $endTimestamp
Definition: purgeChangedFiles.php:60
Maintenance\maybeHelp
maybeHelp( $force=false)
Maybe show the help.
Definition: Maintenance.php:1043
$maintClass
$maintClass
Definition: purgeChangedFiles.php:261
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:329
LocalRepo\getReplicaDB
getReplicaDB()
Get a connection to the replica DB.
Definition: LocalRepo.php:468
NS_FILE
const NS_FILE
Definition: Defines.php:79
RUN_MAINTENANCE_IF_MAIN
require_once RUN_MAINTENANCE_IF_MAIN
Definition: maintenance.txt:50
$params
$params
Definition: styleTest.css.php:44
LocalFile\purgeOldThumbnails
purgeOldThumbnails( $archiveName)
Delete cached transformed files for an archived version only.
Definition: LocalFile.php:1050
PurgeChangedFiles\__construct
__construct()
Default constructor.
Definition: purgeChangedFiles.php:62
$res
$res
Definition: database.txt:21
Maintenance
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: maintenance.txt:39
PurgeChangedFiles\verbose
verbose( $msg)
Send an output message iff the 'verbose' option has been provided.
Definition: purgeChangedFiles.php:254
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:37
$dbr
$dbr
Definition: testCompression.php:50
PurgeChangedFiles\$typeMappings
static array $typeMappings
Mapping from type option to log type and actions.
Definition: purgeChangedFiles.php:37
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
Definition: Maintenance.php:248
PurgeChangedFiles\purgeFromArchiveTable
purgeFromArchiveTable(LocalRepo $repo, LocalFile $file)
Definition: purgeChangedFiles.php:212
$wgHTCPRouting
$wgHTCPRouting
Routing configuration for HTCP multicast purging.
Definition: DefaultSettings.php:2878
PurgeChangedFiles\$startTimestamp
string $startTimestamp
Definition: purgeChangedFiles.php:55
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:576
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
array
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
FileRepo\fileExists
fileExists( $file)
Checks existence of a file.
Definition: FileRepo.php:1365
FileRepo\getFileSha1
getFileSha1( $virtualUrl)
Get the sha1 (base 36) of a file with a given virtual URL/storage path.
Definition: FileRepo.php:1598
LocalFile
Class to represent a local file in the wiki's own database.
Definition: LocalFile.php:46
LocalRepo\newFromArchiveName
newFromArchiveName( $title, $archiveName)
Definition: LocalRepo.php:85
FileRepo\getBackend
getBackend()
Get the file backend instance.
Definition: FileRepo.php:215
PurgeChangedFiles\execute
execute()
Do the actual work.
Definition: purgeChangedFiles.php:76
PurgeChangedFiles\getDeletedPath
getDeletedPath(LocalRepo $repo, LocalFile $file)
Definition: purgeChangedFiles.php:242
PurgeChangedFiles\purgeFromLogType
purgeFromLogType( $type)
Purge cache and thumbnails for changes of the given type.
Definition: purgeChangedFiles.php:138
unserialize
unserialize( $serialized)
Definition: ApiMessageTrait.php:142
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:283
Maintenance\getBatchSize
getBatchSize()
Returns batch size.
Definition: Maintenance.php:367
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:22
Maintenance\getDB
getDB( $db, $groups=[], $wiki=false)
Returns a database to be used by current maintenance script.
Definition: Maintenance.php:1373
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:462
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:434
class
you have access to all of the normal MediaWiki so you can get a DB use the etc For full docs on the Maintenance class
Definition: maintenance.txt:56
Maintenance\hasOption
hasOption( $name)
Checks to see if a particular option exists.
Definition: Maintenance.php:269
FileRepo\getDeletedHashPath
getDeletedHashPath( $key)
Get a relative path for a deletion archive key, e.g.
Definition: FileRepo.php:1494
PurgeChangedFiles
Maintenance script that scans the deletion log and purges affected files within a timeframe.
Definition: purgeChangedFiles.php:32
LocalRepo
A repository that stores files in the local filesystem and registers them in the wiki's own database.
Definition: LocalRepo.php:36
Maintenance\setBatchSize
setBatchSize( $s=0)
Set the batch size.
Definition: Maintenance.php:375
$type
$type
Definition: testCompression.php:48