MediaWiki master
cleanupUploadStash.php
Go to the documentation of this file.
1<?php
19use Wikimedia\Timestamp\TimestampFormat as TS;
20
21// @codeCoverageIgnoreStart
22require_once __DIR__ . '/Maintenance.php';
23// @codeCoverageIgnoreEnd
24
32
33 public function __construct() {
34 parent::__construct();
35 $this->addDescription( 'Clean up abandoned files in temporary uploaded file stash' );
36 $this->setBatchSize( 50 );
37 }
38
39 public function execute() {
40 $repo = $this->getServiceContainer()->getRepoGroup()->getLocalRepo();
41 $tempRepo = $repo->getTempRepo();
42
43 $dbr = $repo->getReplicaDB();
44
45 // how far back should this look for files to delete?
46 $cutoff = time() - (int)$this->getConfig()->get( MainConfigNames::UploadStashMaxAge );
47
48 $this->output( "Getting list of files to clean up...\n" );
49 $keys = $dbr->newSelectQueryBuilder()
50 ->select( 'us_key' )
51 ->from( 'uploadstash' )
52 ->where( $dbr->expr( 'us_timestamp', '<', $dbr->timestamp( $cutoff ) ) )
53 ->caller( __METHOD__ )
54 ->fetchFieldValues();
55
56 // Delete all registered stash files...
57 if ( !$keys ) {
58 $this->output( "No stashed files to cleanup according to the DB.\n" );
59 } else {
60 $this->output( 'Removing ' . count( $keys ) . " file(s)...\n" );
61 // this could be done some other, more direct/efficient way, but using
62 // UploadStash's own methods means it's less likely to fall accidentally
63 // out-of-date someday
64 $stash = new UploadStash( $repo );
65
66 $i = 0;
67 foreach ( $keys as $key ) {
68 $i++;
69 try {
70 $stash->getFile( $key, true );
71 $stash->removeFileNoAuth( $key );
72 } catch ( UploadStashException $ex ) {
73 $type = get_class( $ex );
74 $this->output( "Failed removing stashed upload with key: $key ($type)\n" );
75 }
76 if ( $i % 100 == 0 ) {
77 $this->waitForReplication();
78 $this->output( "$i\n" );
79 }
80 }
81 $this->output( "$i done\n" );
82 }
83
84 // Delete all the corresponding thumbnails...
85 $dir = $tempRepo->getZonePath( 'thumb' );
86 $iterator = $tempRepo->getBackend()->getFileList( [ 'dir' => $dir, 'adviseStat' => 1 ] );
87 if ( $iterator === null ) {
88 $this->fatalError( "Could not get file listing." );
89 }
90 $this->output( "Deleting old thumbnails...\n" );
91 $i = 0;
92 $batch = [];
93 foreach ( $iterator as $file ) {
94 if ( wfTimestamp( TS::UNIX, $tempRepo->getFileTimestamp( "$dir/$file" ) ) < $cutoff ) {
95 $batch[] = [ 'op' => 'delete', 'src' => "$dir/$file" ];
96 if ( count( $batch ) >= $this->getBatchSize() ) {
97 $this->doOperations( $tempRepo, $batch );
98 $i += count( $batch );
99 $batch = [];
100 $this->output( "$i\n" );
101 }
102 }
103 }
104 if ( count( $batch ) ) {
105 $this->doOperations( $tempRepo, $batch );
106 $i += count( $batch );
107 }
108 $this->output( "$i done\n" );
109
110 // Apparently lots of stash files are not registered in the DB...
111 $dir = $tempRepo->getZonePath( 'public' );
112 $iterator = $tempRepo->getBackend()->getFileList( [ 'dir' => $dir, 'adviseStat' => 1 ] );
113 if ( $iterator === null ) {
114 $this->fatalError( "Could not get file listing." );
115 }
116 $this->output( "Deleting orphaned temp files...\n" );
117 if ( !str_contains( $dir, '/local-temp' ) ) {
118 $this->output( "Temp repo might be misconfigured. It points to directory: '$dir' \n" );
119 }
120
121 $i = 0;
122 $batch = [];
123 foreach ( $iterator as $file ) {
124 if ( wfTimestamp( TS::UNIX, $tempRepo->getFileTimestamp( "$dir/$file" ) ) < $cutoff ) {
125 $batch[] = [ 'op' => 'delete', 'src' => "$dir/$file" ];
126 if ( count( $batch ) >= $this->getBatchSize() ) {
127 $this->doOperations( $tempRepo, $batch );
128 $i += count( $batch );
129 $batch = [];
130 $this->output( "$i\n" );
131 }
132 }
133 }
134 if ( count( $batch ) ) {
135 $this->doOperations( $tempRepo, $batch );
136 $i += count( $batch );
137 }
138 $this->output( "$i done\n" );
139 }
140
141 protected function doOperations( FileRepo $tempRepo, array $ops ) {
142 $status = $tempRepo->getBackend()->doQuickOperations( $ops );
143 if ( !$status->isOK() ) {
144 $this->error( $status );
145 }
146 }
147}
148
149// @codeCoverageIgnoreStart
150$maintClass = CleanupUploadStash::class;
151require_once RUN_MAINTENANCE_IF_MAIN;
152// @codeCoverageIgnoreEnd
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
Maintenance script to remove old or broken uploads from temporary uploaded file storage and clean up ...
execute()
Do the actual work.
doOperations(FileRepo $tempRepo, array $ops)
__construct()
Default constructor.
Base class for file repositories.
Definition FileRepo.php:51
getBackend()
Get the file backend instance.
Definition FileRepo.php:253
A class containing constants representing the names of configuration variables.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
getBatchSize()
Returns batch size.
output( $out, $channel=null)
Throw some output to the user.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
waitForReplication()
Wait for replica DB servers to catch up.
error( $err, $die=0)
Throw an error to the user.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
UploadStash is intended to accomplish a few things: