Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 123 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
CountOrphanedFiles | |
0.00% |
0 / 117 |
|
0.00% |
0 / 7 |
756 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
20 | |||
getSites | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
42 | |||
isExtensionInstalled | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
42 | |||
fetchUsedFiles | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
reportUnusedFiles | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
30 | |||
deleteFiles | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | |
3 | use MediaWiki\Extension\Phonos\Engine\Engine; |
4 | use MediaWiki\Http\HttpRequestFactory; |
5 | use MediaWiki\Maintenance\Maintenance; |
6 | use MediaWiki\Site\MediaWikiSite; |
7 | use MediaWiki\Site\SiteList; |
8 | use MediaWiki\Site\SiteStore; |
9 | use MediaWiki\Status\Status; |
10 | use MediaWiki\WikiMap\WikiMap; |
11 | use Wikimedia\FileBackend\FileBackend; |
12 | use Wikimedia\Rdbms\LBFactory; |
13 | |
14 | $IP = getenv( 'MW_INSTALL_PATH' ); |
15 | if ( $IP === false ) { |
16 | $IP = __DIR__ . '/../../..'; |
17 | } |
18 | require_once "$IP/maintenance/Maintenance.php"; |
19 | |
20 | /** |
21 | * Maintenance script to find and optionally delete orphaned Phonos files. |
22 | * |
23 | * On wiki farms, you can use the '--wikis' flag to specify which wikis to process, passing |
24 | * in the global IDs (database names). If not provided, the script will loop through all |
25 | * wikis as specified in the 'sites' table, and process any where Phonos is installed. |
26 | * If the 'sites' table is not set up, the script will act only on the current wiki. |
27 | * |
28 | * @see https://www.mediawiki.org/wiki/Manual:AddSite.php |
29 | * |
30 | * @ingroup Maintenance |
31 | */ |
32 | class CountOrphanedFiles extends Maintenance { |
33 | |
34 | private HttpRequestFactory $requestFactory; |
35 | private LBFactory $lbFactory; |
36 | private SiteStore $siteStore; |
37 | private FileBackend $backend; |
38 | |
39 | /** @var string|false */ |
40 | private $apiProxy; |
41 | |
42 | public function __construct() { |
43 | parent::__construct(); |
44 | $this->addDescription( 'Find and optionally delete orphaned Phonos files across all wikis.' ); |
45 | $this->addOption( 'delete', 'Delete the orphaned files in addition to reporting how many there are.' ); |
46 | $this->addOption( |
47 | 'wikis', |
48 | 'Comma-separated list of db names. Only these wikis will be processed.', |
49 | false, |
50 | true |
51 | ); |
52 | $this->requireExtension( 'Phonos' ); |
53 | } |
54 | |
55 | public function execute(): void { |
56 | $services = $this->getServiceContainer(); |
57 | $config = $services->getMainConfig(); |
58 | $this->requestFactory = $services->getHttpRequestFactory(); |
59 | $this->apiProxy = $config->get( 'PhonosApiProxy' ); |
60 | $this->lbFactory = $services->getDBLoadBalancerFactory(); |
61 | $this->siteStore = $services->getSiteStore(); |
62 | $this->backend = Engine::getFileBackend( |
63 | $services->getFileBackendGroup(), |
64 | $config |
65 | ); |
66 | |
67 | $usedFiles = []; |
68 | $skippedSites = 0; |
69 | /** @var MediaWikiSite $site */ |
70 | foreach ( $this->getSites() as $site ) { |
71 | try { |
72 | $usedFiles = array_unique( array_merge( $usedFiles, $this->fetchUsedFiles( $site ) ) ); |
73 | } catch ( Throwable $e ) { |
74 | $skippedSites++; |
75 | $this->error( $e->getMessage() . "\n" ); |
76 | continue; |
77 | } |
78 | } |
79 | |
80 | $msg = count( $usedFiles ) . ' in-use files found.' . |
81 | ( $skippedSites > 0 ? " $skippedSites sites skipped due to errors." : '' ); |
82 | $this->output( "$msg\n" ); |
83 | |
84 | $this->reportUnusedFiles( array_unique( $usedFiles ) ); |
85 | } |
86 | |
87 | /** |
88 | * Get an array of all the sites we need to query. |
89 | * |
90 | * @return SiteList |
91 | */ |
92 | private function getSites(): SiteList { |
93 | $wikisOption = $this->getOption( 'wikis' ); |
94 | if ( $wikisOption ) { |
95 | $wikis = explode( ',', $wikisOption ); |
96 | $sites = new SiteList(); |
97 | foreach ( $wikis as $wiki ) { |
98 | /** @var MediaWikiSite $site */ |
99 | $site = $this->siteStore->getSite( $wiki ); |
100 | // @phan-suppress-next-line PhanTypeMismatchArgumentSuperType |
101 | if ( $site && $this->isExtensionInstalled( $site ) ) { |
102 | $sites->setSite( $site ); |
103 | } else { |
104 | $this->output( "Wiki '$wiki' not found or Phonos isn't installed, skipping...\n" ); |
105 | } |
106 | } |
107 | } else { |
108 | $sites = $this->siteStore->getSites(); |
109 | } |
110 | |
111 | if ( $sites->isEmpty() ) { |
112 | // 'sites' table is probably not set up. |
113 | // Assume this is a MW installation and act only on the current wiki. |
114 | $id = WikiMap::getCurrentWikiId(); |
115 | $this->output( "sites table is empty, processing only $id...\n" ); |
116 | $site = new MediaWikiSite(); |
117 | $site->setGlobalId( $id ); |
118 | $sites->setSite( $site ); |
119 | } |
120 | |
121 | return $sites; |
122 | } |
123 | |
124 | /** |
125 | * Query API:Siteinfo to determine if Phonos is installed on the given Site. |
126 | * |
127 | * @param MediaWikiSite $site |
128 | * @return bool |
129 | */ |
130 | private function isExtensionInstalled( MediaWikiSite $site ): bool { |
131 | $wiki = $site->getGlobalId(); |
132 | if ( WikiMap::isCurrentWikiId( $wiki ) ) { |
133 | // The API code will error out for local installations since MediaWiki-Docker |
134 | // can't talk to localhost as if it were public. Phonos has to be installed |
135 | // for the script to be ran anyway, so there's no need to check for the current wiki. |
136 | return true; |
137 | } |
138 | |
139 | try { |
140 | $apiRoot = $site->getFileUrl( 'api.php' ); |
141 | } catch ( RuntimeException $e ) { |
142 | $this->fatalError( "file_path not specified in the sites table for wiki '$wiki'.\n" ); |
143 | } |
144 | |
145 | $request = $this->requestFactory->create( |
146 | $apiRoot . '?' . http_build_query( [ |
147 | 'action' => 'query', |
148 | 'meta' => 'siteinfo', |
149 | 'siprop' => 'extensions', |
150 | 'format' => 'json' |
151 | ] ), |
152 | [ |
153 | 'proxy' => $this->apiProxy, |
154 | 'followRedirects' => true |
155 | ], |
156 | __METHOD__ |
157 | ); |
158 | $status = $request->execute(); |
159 | if ( !$status->isOK() ) { |
160 | $msg = $status->getMessage(); |
161 | $this->fatalError( "Could not fetch siteinfo for wiki '$wiki': $msg\n" ); |
162 | } |
163 | |
164 | $extensions = json_decode( $request->getContent(), true )['query']['extensions'] ?? []; |
165 | foreach ( $extensions as $extension ) { |
166 | if ( $extension['name'] === 'Phonos' ) { |
167 | return true; |
168 | } |
169 | } |
170 | |
171 | return false; |
172 | } |
173 | |
174 | /** |
175 | * Query for the 'phonos-files' page property to find all Phonos files that are in-use. |
176 | * |
177 | * @param MediaWikiSite $site |
178 | * @return string[] Paths to the files relative to root storage path with Engine::STORAGE_PREFIX. |
179 | */ |
180 | private function fetchUsedFiles( MediaWikiSite $site ): array { |
181 | $dbr = $this->lbFactory->getReplicaDatabase( $site->getGlobalId() ); |
182 | $queryBuilder = $dbr->newSelectQueryBuilder(); |
183 | $queryBuilder->select( 'pp_value' ) |
184 | ->from( 'page_props' ) |
185 | ->where( [ 'pp_propname' => 'phonos-files' ] ); |
186 | $props = $queryBuilder->caller( __METHOD__ )->fetchFieldValues(); |
187 | return array_unique( array_merge( ...array_map( 'json_decode', $props ) ) ); |
188 | } |
189 | |
190 | /** |
191 | * Reports the number of unused files in storage, optionally deleting them as well. |
192 | * |
193 | * @param array $usedFiles |
194 | */ |
195 | private function reportUnusedFiles( array $usedFiles ): void { |
196 | $this->output( "Finding unused files in storage...\n" ); |
197 | $dir = $this->backend->getRootStoragePath() . '/' . Engine::STORAGE_PREFIX; |
198 | $filesToDelete = []; |
199 | |
200 | foreach ( |
201 | $this->backend->getFileList( [ 'dir' => $dir, 'adviseStat' => true ] ) as $file |
202 | ) { |
203 | $slug = basename( $file, '.mp3' ); |
204 | if ( !in_array( $slug, $usedFiles ) ) { |
205 | $fullPath = $dir . '/' . $file; |
206 | $filesToDelete[] = [ 'op' => 'delete', 'src' => $fullPath ]; |
207 | } |
208 | } |
209 | |
210 | $count = count( $filesToDelete ); |
211 | |
212 | if ( $count ) { |
213 | $this->output( $count . " unused files found.\n" ); |
214 | if ( $this->getOption( 'delete' ) ) { |
215 | $this->deleteFiles( $dir, $filesToDelete ); |
216 | } |
217 | } else { |
218 | $this->output( "No unused files found!\n" ); |
219 | } |
220 | } |
221 | |
222 | /** |
223 | * Delete the given files within the given directory. |
224 | * This operation is batched for performance reasons. |
225 | * |
226 | * @param string $dir |
227 | * @param array $files |
228 | */ |
229 | private function deleteFiles( string $dir, array $files ): void { |
230 | $this->output( "Deleting files from storage...\n" ); |
231 | $deletedCount = 0; |
232 | foreach ( array_chunk( $files, 1000 ) as $chunk ) { |
233 | $ret = $this->backend->doQuickOperations( $chunk ); |
234 | |
235 | if ( $ret->isOK() ) { |
236 | $deletedCount += count( $chunk ); |
237 | $this->output( "$deletedCount...\n" ); |
238 | } else { |
239 | $status = Status::wrap( $ret ); |
240 | $this->output( "Deleting unused Phonos files errored.\n" ); |
241 | $this->fatalError( $status->getWikiText( false, false, 'en' ) ); |
242 | } |
243 | } |
244 | |
245 | $this->output( "$deletedCount orphaned Phonos files deleted.\n" ); |
246 | |
247 | // Remove empty directories. |
248 | $ret = $this->backend->clean( [ |
249 | 'dir' => $dir, |
250 | 'recursive' => true, |
251 | ] ); |
252 | if ( !$ret->isOK() ) { |
253 | $status = Status::wrap( $ret ); |
254 | $this->output( "Cleaning empty directories errored.\n" ); |
255 | $this->fatalError( $status->getWikiText( false, false, 'en' ) ); |
256 | } |
257 | } |
258 | } |
259 | |
260 | $maintClass = CountOrphanedFiles::class; |
261 | require_once RUN_MAINTENANCE_IF_MAIN; |