Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 122 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
CountOrphanedFiles | |
0.00% |
0 / 116 |
|
0.00% |
0 / 7 |
756 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
20 | |||
getSites | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
42 | |||
isExtensionInstalled | |
0.00% |
0 / 27 |
|
0.00% |
0 / 1 |
42 | |||
fetchUsedFiles | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
reportUnusedFiles | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
30 | |||
deleteFiles | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | |
3 | use MediaWiki\Extension\Phonos\Engine\Engine; |
4 | use MediaWiki\Http\HttpRequestFactory; |
5 | use MediaWiki\Site\MediaWikiSite; |
6 | use MediaWiki\Site\SiteList; |
7 | use MediaWiki\Site\SiteStore; |
8 | use MediaWiki\Status\Status; |
9 | use MediaWiki\WikiMap\WikiMap; |
10 | use Wikimedia\Rdbms\LBFactory; |
11 | |
12 | $IP = getenv( 'MW_INSTALL_PATH' ); |
13 | if ( $IP === false ) { |
14 | $IP = __DIR__ . '/../../..'; |
15 | } |
16 | require_once "$IP/maintenance/Maintenance.php"; |
17 | |
18 | /** |
19 | * Maintenance script to find and optionally delete orphaned Phonos files. |
20 | * |
21 | * On wiki farms, you can use the '--wikis' flag to specify which wikis to process, passing |
22 | * in the global IDs (database names). If not provided, the script will loop through all |
23 | * wikis as specified in the 'sites' table, and process any where Phonos is installed. |
24 | * If the 'sites' table is not set up, the script will act only on the current wiki. |
25 | * |
26 | * @see https://www.mediawiki.org/wiki/Manual:AddSite.php |
27 | * |
28 | * @ingroup Maintenance |
29 | */ |
30 | class CountOrphanedFiles extends Maintenance { |
31 | |
32 | /** @var HttpRequestFactory */ |
33 | private $requestFactory; |
34 | |
35 | /** @var LBFactory */ |
36 | private $lbFactory; |
37 | |
38 | /** @var SiteStore */ |
39 | private $siteStore; |
40 | |
41 | /** @var FileBackend */ |
42 | private $backend; |
43 | |
44 | /** @var string */ |
45 | private $apiProxy; |
46 | |
47 | public function __construct() { |
48 | parent::__construct(); |
49 | $this->addDescription( 'Find and optionally delete orphaned Phonos files across all wikis.' ); |
50 | $this->addOption( 'delete', 'Delete the orphaned files in addition to reporting how many there are.' ); |
51 | $this->addOption( |
52 | 'wikis', |
53 | 'Comma-separated list of db names. Only these wikis will be processed.', |
54 | false, |
55 | true |
56 | ); |
57 | $this->requireExtension( 'Phonos' ); |
58 | } |
59 | |
60 | public function execute(): void { |
61 | $services = $this->getServiceContainer(); |
62 | $config = $services->getMainConfig(); |
63 | $this->requestFactory = $services->getHttpRequestFactory(); |
64 | $this->apiProxy = $config->get( 'PhonosApiProxy' ); |
65 | $this->lbFactory = $services->getDBLoadBalancerFactory(); |
66 | $this->siteStore = $services->getSiteStore(); |
67 | $this->backend = Engine::getFileBackend( |
68 | $services->getFileBackendGroup(), |
69 | $config |
70 | ); |
71 | |
72 | $usedFiles = []; |
73 | $skippedSites = 0; |
74 | /** @var MediaWikiSite $site */ |
75 | foreach ( $this->getSites() as $site ) { |
76 | try { |
77 | $usedFiles = array_unique( array_merge( $usedFiles, $this->fetchUsedFiles( $site ) ) ); |
78 | } catch ( Throwable $e ) { |
79 | $skippedSites++; |
80 | $this->error( $e->getMessage() . "\n" ); |
81 | continue; |
82 | } |
83 | } |
84 | |
85 | $msg = count( $usedFiles ) . ' in-use files found.' . |
86 | ( $skippedSites > 0 ? " $skippedSites sites skipped due to errors." : '' ); |
87 | $this->output( "$msg\n" ); |
88 | |
89 | $this->reportUnusedFiles( array_unique( $usedFiles ) ); |
90 | } |
91 | |
92 | /** |
93 | * Get an array of all the sites we need to query. |
94 | * |
95 | * @return SiteList |
96 | */ |
97 | private function getSites(): SiteList { |
98 | $wikisOption = $this->getOption( 'wikis' ); |
99 | if ( $wikisOption ) { |
100 | $wikis = explode( ',', $wikisOption ); |
101 | $sites = new SiteList(); |
102 | foreach ( $wikis as $wiki ) { |
103 | /** @var MediaWikiSite $site */ |
104 | $site = $this->siteStore->getSite( $wiki ); |
105 | // @phan-suppress-next-line PhanTypeMismatchArgumentSuperType |
106 | if ( $site && $this->isExtensionInstalled( $site ) ) { |
107 | $sites->setSite( $site ); |
108 | } else { |
109 | $this->output( "Wiki '$wiki' not found or Phonos isn't installed, skipping...\n" ); |
110 | } |
111 | } |
112 | } else { |
113 | $sites = $this->siteStore->getSites(); |
114 | } |
115 | |
116 | if ( $sites->isEmpty() ) { |
117 | // 'sites' table is probably not set up. |
118 | // Assume this is a MW installation and act only on the current wiki. |
119 | $id = WikiMap::getCurrentWikiId(); |
120 | $this->output( "sites table is empty, processing only $id...\n" ); |
121 | $site = new MediaWikiSite(); |
122 | $site->setGlobalId( $id ); |
123 | $sites->setSite( $site ); |
124 | } |
125 | |
126 | return $sites; |
127 | } |
128 | |
129 | /** |
130 | * Query API:Siteinfo to determine if Phonos is installed on the given Site. |
131 | * |
132 | * @param MediaWikiSite $site |
133 | * @return bool |
134 | */ |
135 | private function isExtensionInstalled( MediaWikiSite $site ): bool { |
136 | $wiki = $site->getGlobalId(); |
137 | if ( WikiMap::isCurrentWikiId( $wiki ) ) { |
138 | // The API code will error out for local installations since MediaWiki-Docker |
139 | // can't talk to localhost as if it were public. Phonos has to be installed |
140 | // for the script to be ran anyway, so there's no need to check for the current wiki. |
141 | return true; |
142 | } |
143 | |
144 | try { |
145 | $apiRoot = $site->getFileUrl( 'api.php' ); |
146 | } catch ( RuntimeException $e ) { |
147 | $this->fatalError( "file_path not specified in the sites table for wiki '$wiki'.\n" ); |
148 | } |
149 | |
150 | $request = $this->requestFactory->create( |
151 | $apiRoot . '?' . http_build_query( [ |
152 | 'action' => 'query', |
153 | 'meta' => 'siteinfo', |
154 | 'siprop' => 'extensions', |
155 | 'format' => 'json' |
156 | ] ), |
157 | [ |
158 | 'proxy' => $this->apiProxy, |
159 | 'followRedirects' => true |
160 | ] |
161 | ); |
162 | $status = $request->execute(); |
163 | if ( !$status->isOK() ) { |
164 | $msg = $status->getMessage(); |
165 | $this->fatalError( "Could not fetch siteinfo for wiki '$wiki': $msg\n" ); |
166 | } |
167 | |
168 | $extensions = json_decode( $request->getContent(), true )['query']['extensions'] ?? []; |
169 | foreach ( $extensions as $extension ) { |
170 | if ( $extension['name'] === 'Phonos' ) { |
171 | return true; |
172 | } |
173 | } |
174 | |
175 | return false; |
176 | } |
177 | |
178 | /** |
179 | * Query for the 'phonos-files' page property to find all Phonos files that are in-use. |
180 | * |
181 | * @param MediaWikiSite $site |
182 | * @return string[] Paths to the files relative to root storage path with Engine::STORAGE_PREFIX. |
183 | */ |
184 | private function fetchUsedFiles( MediaWikiSite $site ): array { |
185 | $dbr = $this->lbFactory->getReplicaDatabase( $site->getGlobalId() ); |
186 | $queryBuilder = $dbr->newSelectQueryBuilder(); |
187 | $queryBuilder->select( 'pp_value' ) |
188 | ->from( 'page_props' ) |
189 | ->where( [ 'pp_propname' => 'phonos-files' ] ); |
190 | $props = $queryBuilder->caller( __METHOD__ )->fetchFieldValues(); |
191 | return array_unique( array_merge( ...array_map( 'json_decode', $props ) ) ); |
192 | } |
193 | |
194 | /** |
195 | * Reports the number of unused files in storage, optionally deleting them as well. |
196 | * |
197 | * @param array $usedFiles |
198 | */ |
199 | private function reportUnusedFiles( array $usedFiles ): void { |
200 | $this->output( "Finding unused files in storage...\n" ); |
201 | $dir = $this->backend->getRootStoragePath() . '/' . Engine::STORAGE_PREFIX; |
202 | $filesToDelete = []; |
203 | |
204 | foreach ( |
205 | $this->backend->getFileList( [ 'dir' => $dir, 'adviseStat' => true ] ) as $file |
206 | ) { |
207 | $slug = basename( $file, '.mp3' ); |
208 | if ( !in_array( $slug, $usedFiles ) ) { |
209 | $fullPath = $dir . '/' . $file; |
210 | $filesToDelete[] = [ 'op' => 'delete', 'src' => $fullPath ]; |
211 | } |
212 | } |
213 | |
214 | $count = count( $filesToDelete ); |
215 | |
216 | if ( $count ) { |
217 | $this->output( $count . " unused files found.\n" ); |
218 | if ( $this->getOption( 'delete' ) ) { |
219 | $this->deleteFiles( $dir, $filesToDelete ); |
220 | } |
221 | } else { |
222 | $this->output( "No unused files found!\n" ); |
223 | } |
224 | } |
225 | |
226 | /** |
227 | * Delete the given files within the given directory. |
228 | * This operation is batched for performance reasons. |
229 | * |
230 | * @param string $dir |
231 | * @param array $files |
232 | */ |
233 | private function deleteFiles( string $dir, array $files ): void { |
234 | $this->output( "Deleting files from storage...\n" ); |
235 | $deletedCount = 0; |
236 | foreach ( array_chunk( $files, 1000 ) as $chunk ) { |
237 | $ret = $this->backend->doQuickOperations( $chunk ); |
238 | |
239 | if ( $ret->isOK() ) { |
240 | $deletedCount += count( $chunk ); |
241 | $this->output( "$deletedCount...\n" ); |
242 | } else { |
243 | $status = Status::wrap( $ret ); |
244 | $this->output( "Deleting unused Phonos files errored.\n" ); |
245 | $this->fatalError( $status->getWikiText( false, false, 'en' ) ); |
246 | } |
247 | } |
248 | |
249 | $this->output( "$deletedCount orphaned Phonos files deleted.\n" ); |
250 | |
251 | // Remove empty directories. |
252 | $ret = $this->backend->clean( [ |
253 | 'dir' => $dir, |
254 | 'recursive' => true, |
255 | ] ); |
256 | if ( !$ret->isOK() ) { |
257 | $status = Status::wrap( $ret ); |
258 | $this->output( "Cleaning empty directories errored.\n" ); |
259 | $this->fatalError( $status->getWikiText( false, false, 'en' ) ); |
260 | } |
261 | } |
262 | } |
263 | |
264 | $maintClass = CountOrphanedFiles::class; |
265 | require_once RUN_MAINTENANCE_IF_MAIN; |