MediaWiki master
ForeignResourceManager.php
Go to the documentation of this file.
1<?php
22
23use Composer\Spdx\SpdxLicenses;
24use LogicException;
28use PharData;
29use RecursiveDirectoryIterator;
30use RecursiveIteratorIterator;
31use SplFileInfo;
32use Symfony\Component\Yaml\Yaml;
34
44 private $defaultAlgo = 'sha384';
45
47 private $hasErrors = false;
48
50 private $registryFile;
51
53 private $libDir;
54
56 private $tmpParentDir;
57
59 private $cacheDir;
60
65 private $infoPrinter;
66
71 private $errorPrinter;
76 private $verbosePrinter;
77
79 private $action;
80
82 private $registry;
83
84 private GlobalIdGenerator $globalIdGenerator;
85
94 public function __construct(
95 $registryFile,
96 $libDir,
97 callable $infoPrinter = null,
98 callable $errorPrinter = null,
99 callable $verbosePrinter = null
100 ) {
101 $this->globalIdGenerator = MediaWikiServices::getInstance()->getGlobalIdGenerator();
102 $this->registryFile = $registryFile;
103 $this->libDir = $libDir;
104 $this->infoPrinter = $infoPrinter ?? static function ( $_ ) {
105 };
106 $this->errorPrinter = $errorPrinter ?? $this->infoPrinter;
107 $this->verbosePrinter = $verbosePrinter ?? static function ( $_ ) {
108 };
109
110 // Support XDG_CACHE_HOME to speed up CI by avoiding repeated downloads.
111 $conf = MediaWikiServices::getInstance()->getMainConfig();
112 if ( ( $cacheHome = getenv( 'XDG_CACHE_HOME' ) ) !== false ) {
113 $this->cacheDir = realpath( $cacheHome ) . '/mw-foreign';
114 } elseif ( ( $cacheConf = $conf->get( MainConfigNames::CacheDirectory ) ) !== false ) {
115 $this->cacheDir = "$cacheConf/ForeignResourceManager";
116 } else {
117 $this->cacheDir = "{$this->libDir}/.foreign/cache";
118 }
119 }
120
127 public function run( $action, $module ) {
128 $actions = [ 'update', 'verify', 'make-sri', 'make-cdx' ];
129 if ( !in_array( $action, $actions ) ) {
130 $this->error( "Invalid action.\n\nMust be one of " . implode( ', ', $actions ) . '.' );
131 return false;
132 }
133 $this->action = $action;
134 $this->setupTempDir( $action );
135
136 $this->registry = Yaml::parseFile( $this->registryFile );
137 if ( $module === 'all' ) {
138 $modules = $this->registry;
139 } elseif ( isset( $this->registry[$module] ) ) {
140 $modules = [ $module => $this->registry[$module] ];
141 } else {
142 $this->error( "Unknown module name.\n\nMust be one of:\n" .
143 wordwrap( implode( ', ', array_keys( $this->registry ) ), 80 ) .
144 '.'
145 );
146 return false;
147 }
148
149 if ( $this->action === 'make-cdx' ) {
150 $this->output( json_encode(
151 $this->generateCdx( $modules ),
152 JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_THROW_ON_ERROR
153 ) );
154 return true;
155 }
156
157 foreach ( $modules as $moduleName => $info ) {
158 $this->verbose( "\n### {$moduleName}\n\n" );
159
160 if ( $this->action === 'update' ) {
161 $this->output( "... updating '{$moduleName}'\n" );
162 } elseif ( $this->action === 'verify' ) {
163 $this->output( "... verifying '{$moduleName}'\n" );
164 } else {
165 $this->output( "... checking '{$moduleName}'\n" );
166 }
167
168 // Do checks on yaml content (such as license existence, validity and type keys)
169 // before doing any potentially destructive actions (potentially deleting directories,
170 // depending on action.
171
172 if ( !isset( $info['type'] ) ) {
173 throw new LogicException( "Module '$moduleName' must have a 'type' key." );
174 }
175
176 $this->validateLicense( $moduleName, $info );
177
178 if ( $info['type'] === 'doc-only' ) {
179 $this->output( "... {$moduleName} is documentation-only, skipping integrity checks.\n" );
180 continue;
181 }
182
183 $destDir = "{$this->libDir}/$moduleName";
184
185 if ( $this->action === 'update' ) {
186 $this->verbose( "... emptying directory for $moduleName\n" );
187 wfRecursiveRemoveDir( $destDir );
188 }
189
190 $this->verbose( "... preparing {$this->tmpParentDir}\n" );
191 wfRecursiveRemoveDir( $this->tmpParentDir );
192 if ( !wfMkdirParents( $this->tmpParentDir ) ) {
193 throw new LogicException( "Unable to create {$this->tmpParentDir}" );
194 }
195
196 switch ( $info['type'] ) {
197 case 'tar':
198 case 'zip':
199 $this->handleTypeTar( $moduleName, $destDir, $info, $info['type'] );
200 break;
201 case 'file':
202 $this->handleTypeFile( $moduleName, $destDir, $info );
203 break;
204 case 'multi-file':
205 $this->handleTypeMultiFile( $moduleName, $destDir, $info );
206 break;
207 default:
208 throw new LogicException( "Unknown type '{$info['type']}' for '$moduleName'" );
209 }
210 }
211
212 $this->cleanUp();
213 if ( $this->hasErrors ) {
214 // The "verify" action should check all modules and files and fail after, not during.
215 // We don't throw on the first issue so that developers enjoy access to all actionable
216 // information at once (given we can't have cascading errors).
217 // The "verify" action prints errors along the way and simply exits here.
218 return false;
219 }
220
221 return true;
222 }
223
229 private function setupTempDir( $action ) {
230 if ( $action === 'verify' ) {
231 $this->tmpParentDir = wfTempDir() . '/ForeignResourceManager';
232 } else {
233 // Use a temporary directory under the destination directory instead
234 // of wfTempDir() because PHP's rename() does not work across file
235 // systems, and the user's /tmp and $IP may be on different filesystems.
236 $this->tmpParentDir = "{$this->libDir}/.foreign/tmp";
237 }
238 }
239
246 private function cacheKey( $src, $integrity, $moduleName ) {
247 $key = $moduleName
248 . '_' . hash( 'fnv132', $integrity )
249 . '_' . hash( 'fnv132', $src )
250 // Append readable filename to aid cache inspection and debugging
251 . '_' . basename( $src );
252 $key = preg_replace( '/[.\/+?=_-]+/', '_', $key );
253 return rtrim( $key, '_' );
254 }
255
260 private function cacheGet( $key ) {
261 // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
262 return @file_get_contents( "{$this->cacheDir}/$key.data" );
263 }
264
269 private function cacheSet( $key, $data ) {
270 // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
271 @mkdir( $this->cacheDir, 0777, true );
272 file_put_contents( "{$this->cacheDir}/$key.data", $data, LOCK_EX );
273 }
274
281 private function fetch( string $src, $integrity, string $moduleName ) {
282 if ( $integrity !== null ) {
283 $key = $this->cacheKey( $src, $integrity, $moduleName );
284 $data = $this->cacheGet( $key );
285 if ( $data ) {
286 return $data;
287 }
288 }
289
290 $services = MediaWikiServices::getInstance();
291 $req = $services->getHttpRequestFactory()
292 ->create( $src, [ 'method' => 'GET', 'followRedirects' => false ], __METHOD__ );
293 $reqStatusValue = $req->execute();
294 if ( !$reqStatusValue->isOK() ) {
295 $message = "Failed to download resource at {$src}";
296 $reqError = $reqStatusValue->getMessages( 'error' )[0] ?? null;
297 if ( $reqError !== null ) {
298 $message .= ': ' . Message::newFromSpecifier( $reqError )->inLanguage( 'en' )->plain();
299 }
300 throw new LogicException( $message );
301 }
302 if ( $req->getStatus() !== 200 ) {
303 throw new LogicException( "Unexpected HTTP {$req->getStatus()} response from {$src}" );
304 }
305 $data = $req->getContent();
306 $algo = $integrity === null ? $this->defaultAlgo : explode( '-', $integrity )[0];
307 $actualIntegrity = $algo . '-' . base64_encode( hash( $algo, $data, true ) );
308 if ( $integrity === $actualIntegrity ) {
309 $this->verbose( "... passed integrity check for {$src}\n" );
310 $key = $this->cacheKey( $src, $actualIntegrity, $moduleName );
311 $this->cacheSet( $key, $data );
312 } elseif ( $this->action === 'make-sri' ) {
313 $this->output( "Integrity for {$src}\n\tintegrity: {$actualIntegrity}\n" );
314 } else {
315 $expectedIntegrity = $integrity ?? 'null';
316 throw new LogicException( "Integrity check failed for {$src}\n" .
317 "\tExpected: {$expectedIntegrity}\n" .
318 "\tActual: {$actualIntegrity}"
319 );
320 }
321 return $data;
322 }
323
329 private function handleTypeFile( $moduleName, $destDir, array $info ) {
330 if ( !isset( $info['src'] ) ) {
331 throw new LogicException( "Module '$moduleName' must have a 'src' key." );
332 }
333 $data = $this->fetch( $info['src'], $info['integrity'] ?? null, $moduleName );
334 $dest = $info['dest'] ?? basename( $info['src'] );
335 $path = "$destDir/$dest";
336 if ( $this->action === 'verify' && sha1_file( $path ) !== sha1( $data ) ) {
337 $this->error( "File for '$moduleName' is different.\n" );
338 }
339 if ( $this->action === 'update' ) {
340 wfMkdirParents( $destDir );
341 file_put_contents( "$destDir/$dest", $data );
342 }
343 }
344
350 private function handleTypeMultiFile( $moduleName, $destDir, array $info ) {
351 if ( !isset( $info['files'] ) ) {
352 throw new LogicException( "Module '$moduleName' must have a 'files' key." );
353 }
354 foreach ( $info['files'] as $dest => $file ) {
355 if ( !isset( $file['src'] ) ) {
356 throw new LogicException( "Module '$moduleName' file '$dest' must have a 'src' key." );
357 }
358 $data = $this->fetch( $file['src'], $file['integrity'] ?? null, $moduleName );
359 $path = "$destDir/$dest";
360 if ( $this->action === 'verify' && sha1_file( $path ) !== sha1( $data ) ) {
361 $this->error( "File '$dest' for '$moduleName' is different.\n" );
362 } elseif ( $this->action === 'update' ) {
363 wfMkdirParents( $destDir );
364 file_put_contents( "$destDir/$dest", $data );
365 }
366 }
367 }
368
375 private function handleTypeTar( $moduleName, $destDir, array $info, string $fileType ) {
376 $info += [ 'src' => null, 'integrity' => null, 'dest' => null ];
377 if ( $info['src'] === null ) {
378 throw new LogicException( "Module '$moduleName' must have a 'src' key." );
379 }
380 // Download the resource to a temporary file and open it
381 $data = $this->fetch( $info['src'], $info['integrity'], $moduleName );
382 $tmpFile = "{$this->tmpParentDir}/$moduleName." . $fileType;
383 $this->verbose( "... writing '$moduleName' src to $tmpFile\n" );
384 file_put_contents( $tmpFile, $data );
385 $p = new PharData( $tmpFile );
386 $tmpDir = "{$this->tmpParentDir}/$moduleName";
387 $p->extractTo( $tmpDir );
388 unset( $data, $p );
389
390 if ( $info['dest'] === null ) {
391 // Default: Replace the entire directory
392 $toCopy = [ $tmpDir => $destDir ];
393 } else {
394 // Expand and normalise the 'dest' entries
395 $toCopy = [];
396 foreach ( $info['dest'] as $fromSubPath => $toSubPath ) {
397 // Use glob() to expand wildcards and check existence
398 $fromPaths = glob( "{$tmpDir}/{$fromSubPath}", GLOB_BRACE );
399 if ( !$fromPaths ) {
400 throw new LogicException( "Path '$fromSubPath' of '$moduleName' not found." );
401 }
402 foreach ( $fromPaths as $fromPath ) {
403 $toCopy[$fromPath] = $toSubPath === null
404 ? "$destDir/" . basename( $fromPath )
405 : "$destDir/$toSubPath/" . basename( $fromPath );
406 }
407 }
408 }
409 foreach ( $toCopy as $from => $to ) {
410 if ( $this->action === 'verify' ) {
411 $this->verbose( "... verifying $to\n" );
412 if ( is_dir( $from ) ) {
413 $rii = new RecursiveIteratorIterator( new RecursiveDirectoryIterator(
414 $from,
415 RecursiveDirectoryIterator::SKIP_DOTS
416 ) );
418 foreach ( $rii as $file ) {
419 $remote = $file->getPathname();
420 $local = strtr( $remote, [ $from => $to ] );
421 if ( sha1_file( $remote ) !== sha1_file( $local ) ) {
422 $this->error( "File '$local' is different.\n" );
423 }
424 }
425 } elseif ( sha1_file( $from ) !== sha1_file( $to ) ) {
426 $this->error( "File '$to' is different.\n" );
427 }
428 } elseif ( $this->action === 'update' ) {
429 $this->verbose( "... moving $from to $to\n" );
430 wfMkdirParents( dirname( $to ) );
431 if ( !rename( $from, $to ) ) {
432 throw new LogicException( "Could not move $from to $to." );
433 }
434 }
435 }
436 }
437
441 private function verbose( $text ) {
442 ( $this->verbosePrinter )( $text );
443 }
444
448 private function output( $text ) {
449 ( $this->infoPrinter )( $text );
450 }
451
455 private function error( $text ) {
456 $this->hasErrors = true;
457 ( $this->errorPrinter )( $text );
458 }
459
460 private function cleanUp() {
461 wfRecursiveRemoveDir( $this->tmpParentDir );
462
463 // Prune the cache of files we don't recognise.
464 $knownKeys = [];
465 foreach ( $this->registry as $module => $info ) {
466 if ( $info['type'] === 'file' || $info['type'] === 'tar' ) {
467 $knownKeys[] = $this->cacheKey( $info['src'], $info['integrity'], $module );
468 } elseif ( $info['type'] === 'multi-file' ) {
469 foreach ( $info['files'] as $file ) {
470 $knownKeys[] = $this->cacheKey( $file['src'], $file['integrity'], $module );
471 }
472 }
473 }
474 foreach ( glob( "{$this->cacheDir}/*" ) as $cacheFile ) {
475 if ( !in_array( basename( $cacheFile, '.data' ), $knownKeys ) ) {
476 unlink( $cacheFile );
477 }
478 }
479 }
480
485 private function validateLicense( $moduleName, $info ) {
486 if ( !isset( $info['license'] ) || !is_string( $info['license'] ) ) {
487 throw new LogicException(
488 "Module '$moduleName' needs a valid SPDX license; no license is currently present"
489 );
490 }
491 $licenses = new SpdxLicenses();
492 if ( !$licenses->validate( $info['license'] ) ) {
493 $this->error(
494 "Module '$moduleName' has an invalid SPDX license identifier '{$info['license']}', "
495 . "see <https://spdx.org/licenses/>.\n"
496 );
497 }
498 }
499
500 private function generateCdx( array $modules ): array {
501 $cdx = [
502 '$schema' => 'http://cyclonedx.org/schema/bom-1.6.schema.json',
503 'bomFormat' => 'CycloneDX',
504 'specVersion' => '1.6',
505 'serialNumber' => 'urn:uuid:' . $this->globalIdGenerator->newUUIDv4(),
506 'version' => 1,
507 'components' => [],
508 ];
509 foreach ( $modules as $moduleName => $module ) {
510 $moduleCdx = [
511 'type' => 'library',
512 'name' => $moduleName,
513 'version' => $module['version'],
514 ];
515 if ( preg_match( '/ (AND|OR|WITH) /', $module['license'] ) ) {
516 $moduleCdx['licenses'][] = [ 'expression' => $module['license'] ];
517 } else {
518 $moduleCdx['licenses'][] = [ 'license' => [ 'id' => $module['license'] ] ];
519 }
520 if ( $module['purl'] ?? false ) {
521 $moduleCdx['purl'] = $module['purl'];
522 }
523 if ( $module['version'] ?? false ) {
524 $moduleCdx['version'] = $module['version'];
525 }
526 if ( $module['authors'] ?? false ) {
527 $moduleCdx['authors'] = array_map(
528 fn ( $author ) => [ 'name' => $author ],
529 preg_split( '/,( and)? /', $module['authors'] )
530 );
531 }
532 if ( $module['homepage'] ?? false ) {
533 $moduleCdx['externalReferences'] = [ [ 'url' => $module['homepage'], 'type' => 'website' ] ];
534 }
535 $cdx['components'][] = $moduleCdx;
536 }
537 return $cdx;
538 }
539}
540
542class_alias( ForeignResourceManager::class, 'ForeignResourceManager' );
wfTempDir()
Tries to get the system directory for temporary files.
wfRecursiveRemoveDir( $dir)
Remove a directory and all its content.
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
A class containing constants representing the names of configuration variables.
const CacheDirectory
Name constant for the CacheDirectory setting, for use with Config::get()
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
The Message class deals with fetching and processing of interface message into a variety of formats.
Definition Message.php:158
static newFromSpecifier( $value)
Transform a MessageSpecifier or a primitive value used interchangeably with specifiers (a message key...
Definition Message.php:460
Manage foreign resources registered with ResourceLoader.
__construct( $registryFile, $libDir, callable $infoPrinter=null, callable $errorPrinter=null, callable $verbosePrinter=null)
Class for getting statistically unique IDs without a central coordinator.