26use InvalidArgumentException;
30use Shellbox\Command\BoxedCommand;
33use Wikimedia\AtEase\AtEase;
49use Wikimedia\Timestamp\ConvertibleTimestamp;
89 protected const RES_ABSENT =
false;
91 protected const RES_ERROR =
null;
94 protected const ABSENT_NORMAL =
'FNE-N';
96 protected const ABSENT_LATEST =
'FNE-L';
112 parent::__construct( $config );
113 $this->mimeCallback = $config[
'mimeCallback'] ??
null;
115 $this->memCache = WANObjectCache::newEmpty();
116 $this->cheapCache =
new MapCacheLRU( self::CACHE_CHEAP_SIZE );
117 $this->expensiveCache =
new MapCacheLRU( self::CACHE_EXPENSIVE_SIZE );
128 return min( $this->maxFileSize, PHP_INT_MAX );
166 $status = $this->
newStatus(
'backend-fail-maxsize',
171 if (
$params[
'dstExists'] ??
true ) {
209 $status = $this->
newStatus(
'backend-fail-maxsize',
214 if (
$params[
'dstExists'] ??
true ) {
254 if (
$params[
'dstExists'] ??
true ) {
325 if (
$params[
'dstExists'] ??
true ) {
357 if ( count(
$params[
'headers'] ) ) {
396 $scopeLockS = $this->
getScopedFileLocks( $params[
'srcs'], LockManager::LOCK_UW, $status );
397 if ( $status->isOK() ) {
399 $start_time = microtime(
true );
401 $sec = microtime(
true ) - $start_time;
402 if ( !$status->isOK() ) {
403 $this->logger->error( static::class .
"-{$this->name}" .
404 " failed to concatenate " . count(
$params[
'srcs'] ) .
" file(s) [$sec sec]" );
423 AtEase::suppressWarnings();
424 $ok = ( is_file( $tmpPath ) && filesize( $tmpPath ) == 0 );
425 AtEase::restoreWarnings();
427 $status->fatal(
'backend-fail-opentemp', $tmpPath );
434 foreach ( $fsFiles as
$path => &$fsFile ) {
439 $fsFile === self::RES_ERROR ?
'backend-fail-read' :
'backend-fail-notexists',
450 $tmpHandle = fopen( $tmpPath,
'ab' );
451 if ( $tmpHandle ===
false ) {
452 $status->fatal(
'backend-fail-opentemp', $tmpPath );
458 foreach ( $fsFiles as $virtualSource => $fsFile ) {
460 $sourceHandle = fopen( $fsFile->getPath(),
'rb' );
461 if ( $sourceHandle ===
false ) {
462 fclose( $tmpHandle );
463 $status->fatal(
'backend-fail-read', $virtualSource );
468 if ( !stream_copy_to_stream( $sourceHandle, $tmpHandle ) ) {
469 fclose( $sourceHandle );
470 fclose( $tmpHandle );
471 $status->fatal(
'backend-fail-writetemp', $tmpPath );
475 fclose( $sourceHandle );
477 if ( !fclose( $tmpHandle ) ) {
478 $status->fatal(
'backend-fail-closetemp', $tmpPath );
497 if ( $dir ===
null ) {
498 $status->fatal(
'backend-fail-invalidpath',
$params[
'dir'] );
503 if ( $shard !==
null ) {
506 $this->logger->debug( __METHOD__ .
": iterating over all container shards." );
534 if ( $dir ===
null ) {
535 $status->fatal(
'backend-fail-invalidpath',
$params[
'dir'] );
540 if ( $shard !==
null ) {
543 $this->logger->debug( __METHOD__ .
": iterating over all container shards." );
571 if ( $dir ===
null ) {
572 $status->fatal(
'backend-fail-invalidpath',
$params[
'dir'] );
577 if ( $shard !==
null ) {
580 $this->logger->debug( __METHOD__ .
": iterating over all container shards." );
610 if ( $subDirsRel !==
null ) {
611 foreach ( $subDirsRel as $subDirRel ) {
612 $subDir =
$params[
'dir'] .
"/{$subDirRel}";
613 $status->merge( $this->
doClean( [
'dir' => $subDir ] +
$params ) );
615 unset( $subDirsRel );
620 if ( $dir ===
null ) {
621 $status->fatal(
'backend-fail-invalidpath',
$params[
'dir'] );
627 $filesLockEx = [
$params[
'dir'] ];
629 $scopedLockE = $this->
getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status );
630 if ( !$status->isOK() ) {
634 if ( $shard !==
null ) {
638 $this->logger->debug( __METHOD__ .
": iterating over all container shards." );
666 if ( is_array( $stat ) ) {
670 return $stat === self::RES_ABSENT ? false : self::EXISTENCE_ERROR;
678 if ( is_array( $stat ) ) {
679 return $stat[
'mtime'];
682 return self::TIMESTAMP_FAIL;
690 if ( is_array( $stat ) ) {
691 return $stat[
'size'];
694 return self::SIZE_FAIL;
702 if (
$path ===
null ) {
703 return self::STAT_ERROR;
708 $latest = !empty(
$params[
'latest'] );
710 $requireSHA1 = !empty(
$params[
'requireSHA1'] );
712 $stat = $this->cheapCache->getField(
$path,
'stat', self::CACHE_TTL );
721 ( $requireSHA1 && is_array( $stat ) && !isset( $stat[
'sha1'] ) )
725 $stat = $this->cheapCache->getField(
$path,
'stat', self::CACHE_TTL );
729 if ( is_array( $stat ) ) {
731 ( !$latest || !empty( $stat[
'latest'] ) ) &&
732 ( !$requireSHA1 || isset( $stat[
'sha1'] ) )
736 } elseif ( $stat === self::ABSENT_LATEST ) {
737 return self::STAT_ABSENT;
738 } elseif ( $stat === self::ABSENT_NORMAL ) {
740 return self::STAT_ABSENT;
748 if ( is_array( $stat ) ) {
752 return $stat === self::RES_ERROR ? self::STAT_ERROR : self::STAT_ABSENT;
765 foreach ( $stats as
$path => $stat ) {
766 if ( is_array( $stat ) ) {
768 $stat[
'latest'] ??= $latest;
770 $this->cheapCache->setField(
$path,
'stat', $stat );
771 if ( isset( $stat[
'sha1'] ) ) {
773 $this->cheapCache->setField(
776 [
'hash' => $stat[
'sha1'],
'latest' => $latest ]
779 if ( isset( $stat[
'xattr'] ) ) {
782 $this->cheapCache->setField(
785 [
'map' => $stat[
'xattr'],
'latest' => $latest ]
790 } elseif ( $stat === self::RES_ABSENT ) {
791 $this->cheapCache->setField(
794 $latest ? self::ABSENT_LATEST : self::ABSENT_NORMAL
796 $this->cheapCache->setField(
799 [
'map' => self::XATTRS_FAIL,
'latest' => $latest ]
801 $this->cheapCache->setField(
804 [
'hash' => self::SHA1_FAIL,
'latest' => $latest ]
806 $this->logger->debug(
807 __METHOD__ .
': File {path} does not exist',
812 $this->logger->error(
813 __METHOD__ .
': Could not stat file {path}',
835 foreach ( $contents as
$path => $content ) {
836 if ( !is_string( $content ) ) {
837 $contents[
$path] = self::CONTENT_FAIL;
853 if ( $fsFile instanceof
FSFile ) {
854 AtEase::suppressWarnings();
855 $content = file_get_contents( $fsFile->getPath() );
856 AtEase::restoreWarnings();
857 $contents[
$path] = is_string( $content ) ? $content : self::RES_ERROR;
860 $contents[
$path] = $fsFile;
872 if (
$path ===
null ) {
873 return self::XATTRS_FAIL;
875 $latest = !empty(
$params[
'latest'] );
876 if ( $this->cheapCache->hasField(
$path,
'xattr', self::CACHE_TTL ) ) {
877 $stat = $this->cheapCache->getField(
$path,
'xattr' );
880 if ( !$latest || $stat[
'latest'] ) {
885 if ( is_array( $fields ) ) {
887 $this->cheapCache->setField(
890 [
'map' => $fields,
'latest' => $latest ]
892 } elseif ( $fields === self::RES_ABSENT ) {
893 $this->cheapCache->setField(
896 [
'map' => self::XATTRS_FAIL,
'latest' => $latest ]
899 $fields = self::XATTRS_FAIL;
912 return [
'headers' => [],
'metadata' => [] ];
920 if (
$path ===
null ) {
921 return self::SHA1_FAIL;
923 $latest = !empty(
$params[
'latest'] );
924 if ( $this->cheapCache->hasField(
$path,
'sha1', self::CACHE_TTL ) ) {
925 $stat = $this->cheapCache->getField(
$path,
'sha1' );
928 if ( !$latest || $stat[
'latest'] ) {
929 return $stat[
'hash'];
933 if ( is_string( $sha1 ) ) {
934 $this->cheapCache->setField(
937 [
'hash' => $sha1,
'latest' => $latest ]
939 } elseif ( $sha1 === self::RES_ABSENT ) {
940 $this->cheapCache->setField(
943 [
'hash' => self::SHA1_FAIL,
'latest' => $latest ]
946 $sha1 = self::SHA1_FAIL;
960 if ( $fsFile instanceof
FSFile ) {
961 $sha1 = $fsFile->getSha1Base36();
963 return is_string( $sha1 ) ? $sha1 : self::RES_ERROR;
966 return $fsFile === self::RES_ERROR ? self::RES_ERROR : self::RES_ABSENT;
975 return $fsFile ? $fsFile->getProps() : FSFile::placeholderProps();
985 $latest = !empty(
$params[
'latest'] );
987 foreach (
$params[
'srcs'] as $src ) {
989 if (
$path ===
null ) {
990 $fsFiles[$src] = self::RES_ERROR;
991 } elseif ( $this->expensiveCache->hasField(
$path,
'localRef' ) ) {
992 $val = $this->expensiveCache->getField(
$path,
'localRef' );
995 if ( !$latest || $val[
'latest'] ) {
996 $fsFiles[$src] = $val[
'object'];
1001 $params[
'srcs'] = array_diff(
$params[
'srcs'], array_keys( $fsFiles ) );
1003 if ( $fsFile instanceof
FSFile ) {
1004 $fsFiles[
$path] = $fsFile;
1005 $this->expensiveCache->setField(
1008 [
'object' => $fsFile,
'latest' => $latest ]
1012 $fsFiles[
$path] = $fsFile;
1052 return self::TEMPURL_ERROR;
1059 if ( $ref ===
false ) {
1060 return $this->
newStatus(
'backend-fail-notexists', $params[
'src'] );
1061 } elseif ( $ref ===
null ) {
1062 return $this->
newStatus(
'backend-fail-read', $params[
'src'] );
1064 $file = $command->newInputFileFromFile( $ref->getPath() )
1065 ->userData( __CLASS__, $ref );
1066 $command->inputFile( $boxedName, $file );
1077 $params[
'options'] ??= [];
1081 if ( ( empty(
$params[
'headless'] ) ||
$params[
'headers'] ) && headers_sent() ) {
1082 print
"Headers already sent, terminating.\n";
1083 $status->fatal(
'backend-fail-stream',
$params[
'src'] );
1109 $this->getStreamerOptions()
1111 $res = $streamer->stream(
$params[
'headers'],
true,
$params[
'options'], $flags );
1118 $status->fatal(
'backend-fail-stream',
$params[
'src'] );
1126 if ( $dir ===
null ) {
1127 return self::EXISTENCE_ERROR;
1129 if ( $shard !==
null ) {
1132 $this->logger->debug( __METHOD__ .
": iterating over all container shards." );
1137 if ( $exists ===
true ) {
1140 } elseif ( $exists === self::RES_ERROR ) {
1141 $res = self::EXISTENCE_ERROR;
1161 if ( $dir ===
null ) {
1162 return self::EXISTENCE_ERROR;
1164 if ( $shard !==
null ) {
1168 $this->logger->debug( __METHOD__ .
": iterating over all container shards." );
1191 if ( $dir ===
null ) {
1192 return self::LIST_ERROR;
1194 if ( $shard !==
null ) {
1198 $this->logger->debug( __METHOD__ .
": iterating over all container shards." );
1232 'store' => StoreFileOp::class,
1233 'copy' => CopyFileOp::class,
1234 'move' => MoveFileOp::class,
1235 'delete' => DeleteFileOp::class,
1236 'create' => CreateFileOp::class,
1237 'describe' => DescribeFileOp::class,
1238 'null' => NullFileOp::class
1243 foreach ( $ops as $operation ) {
1244 $opName = $operation[
'op'];
1245 if ( isset( $supportedOps[$opName] ) ) {
1246 $class = $supportedOps[$opName];
1250 $performOps[] =
new $class( $this,
$params, $this->logger );
1271 $paths = [
'sh' => [],
'ex' => [] ];
1272 foreach ( $performOps as $fileOp ) {
1273 $paths[
'sh'] = array_merge( $paths[
'sh'], $fileOp->storagePathsRead() );
1274 $paths[
'ex'] = array_merge( $paths[
'ex'], $fileOp->storagePathsChanged() );
1277 $paths[
'sh'] = array_diff( $paths[
'sh'], $paths[
'ex'] );
1279 $paths[
'sh'] = array_merge( $paths[
'sh'], array_map(
'dirname', $paths[
'ex'] ) );
1282 LockManager::LOCK_UW => $paths[
'sh'],
1283 LockManager::LOCK_EX => $paths[
'ex']
1299 $ops = array_map( [ $this,
'sanitizeOpHeaders' ], $ops );
1303 foreach ( $fileOps as $fileOp ) {
1304 $pathsUsed = array_merge( $pathsUsed, $fileOp->storagePathsReadOrChanged() );
1308 if ( empty( $opts[
'nonLocking'] ) ) {
1312 if ( !$status->isOK() ) {
1318 if ( empty( $opts[
'preserveCache'] ) ) {
1323 $this->cheapCache->setMaxSize( max( 2 * count( $pathsUsed ), self::CACHE_CHEAP_SIZE ) );
1328 $ok = $this->
preloadFileStat( [
'srcs' => $pathsUsed,
'latest' =>
true ] );
1336 $subStatus = $this->
newStatus(
'backend-fail-internal', $this->name );
1337 foreach ( $ops as $i => $op ) {
1338 $subStatus->success[$i] =
false;
1339 ++$subStatus->failCount;
1341 $this->logger->error( static::class .
"-{$this->name} " .
1342 " stat failure; aborted operations: " . FormatJson::encode( $ops ) );
1346 $status->merge( $subStatus );
1347 $status->success = $subStatus->success;
1350 $this->cheapCache->setMaxSize( self::CACHE_CHEAP_SIZE );
1361 $ops = array_map( [ $this,
'sanitizeOpHeaders' ], $ops );
1365 foreach ( $fileOps as $fileOp ) {
1366 $pathsUsed = array_merge( $pathsUsed, $fileOp->storagePathsReadOrChanged() );
1373 $async = ( $this->parallelize ===
'implicit' && count( $ops ) > 1 );
1379 foreach ( $fileOps as $index => $fileOp ) {
1381 ? $fileOp->attemptAsyncQuick()
1382 : $fileOp->attemptQuick();
1384 if ( count( $batch ) >= $maxConcurrency ) {
1390 $batch[$index] = $subStatus->value;
1392 $statuses[$index] = $subStatus;
1395 if ( count( $batch ) ) {
1399 foreach ( $statuses as $index => $subStatus ) {
1400 $status->merge( $subStatus );
1401 if ( $subStatus->isOK() ) {
1402 $status->success[$index] =
true;
1403 ++$status->successCount;
1405 $status->success[$index] =
false;
1406 ++$status->failCount;
1428 foreach ( $fileOpHandles as $fileOpHandle ) {
1430 throw new InvalidArgumentException(
"Expected FileBackendStoreOpHandle object." );
1431 } elseif ( $fileOpHandle->backend->getName() !== $this->getName() ) {
1432 throw new InvalidArgumentException(
"Expected handle for this file backend." );
1437 foreach ( $fileOpHandles as $fileOpHandle ) {
1438 $fileOpHandle->closeResources();
1454 if ( count( $fileOpHandles ) ) {
1455 throw new FileBackendError(
"Backend does not support asynchronous operations." );
1473 static $longs = [
'content-disposition' ];
1475 if ( isset( $op[
'headers'] ) ) {
1477 foreach ( $op[
'headers'] as
$name => $value ) {
1479 $maxHVLen = in_array(
$name, $longs ) ? INF : 255;
1480 if ( strlen(
$name ) > 255 || strlen( $value ) > $maxHVLen ) {
1481 $this->logger->error(
"Header '{header}' is too long.", [
1482 'filebackend' => $this->name,
1483 'header' =>
"$name: $value",
1486 $newHeaders[
$name] = strlen( $value ) ? $value :
'';
1489 $op[
'headers'] = $newHeaders;
1497 foreach ( $paths as
$path ) {
1499 $fullConts[] = $fullCont;
1507 if ( is_array( $paths ) ) {
1508 $paths = array_map( [ FileBackend::class,
'normalizeStoragePath' ], $paths );
1509 $paths = array_filter( $paths,
'strlen' );
1511 if ( $paths ===
null ) {
1512 $this->cheapCache->clear();
1513 $this->expensiveCache->clear();
1515 foreach ( $paths as
$path ) {
1516 $this->cheapCache->clear(
$path );
1517 $this->expensiveCache->clear(
$path );
1538 $params[
'concurrency'] = ( $this->parallelize !==
'off' ) ? $this->concurrency : 1;
1540 if ( $stats ===
null ) {
1545 $latest = !empty(
$params[
'latest'] );
1608 return (
bool)preg_match(
'/^[a-z0-9][a-z0-9-_.]{0,199}$/i', $container );
1626 if ( $backend === $this->name ) {
1628 if ( $relPath !==
null && self::isValidShortContainerName( $shortCont ) ) {
1633 if ( $relPath !==
null ) {
1636 if ( self::isValidContainerName( $container ) ) {
1639 if ( $container !==
null ) {
1640 return [ $container, $relPath, $cShard ];
1647 return [
null,
null, null ];
1667 if ( $cShard !==
null && substr( $relPath, -1 ) !==
'/' ) {
1668 return [ $container, $relPath ];
1671 return [
null, null ];
1684 if ( $levels == 1 || $levels == 2 ) {
1686 $char = ( $base == 36 ) ?
'[0-9a-z]' :
'[0-9a-f]';
1689 if ( $levels === 1 ) {
1690 $hashDirRegex =
'(' . $char .
')';
1693 $hashDirRegex = $char .
'/(' . $char .
'{2})';
1695 $hashDirRegex =
'(' . $char .
')/(' . $char .
')';
1702 if ( preg_match(
"!^(?:[^/]{2,}/)*$hashDirRegex(?:/|$)!", $relPath, $m ) ) {
1703 return '.' . implode(
'', array_slice( $m, 1 ) );
1723 return ( $shard !==
null );
1735 if ( isset( $this->shardViaHashLevels[$container] ) ) {
1736 $config = $this->shardViaHashLevels[$container];
1737 $hashLevels = (int)$config[
'levels'];
1738 if ( $hashLevels == 1 || $hashLevels == 2 ) {
1739 $hashBase = (int)$config[
'base'];
1740 if ( $hashBase == 16 || $hashBase == 36 ) {
1741 return [ $hashLevels, $hashBase, $config[
'repeat'] ];
1746 return [ 0, 0, false ];
1758 if ( $digits > 0 ) {
1759 $numShards = $base ** $digits;
1760 for ( $index = 0; $index < $numShards; $index++ ) {
1761 $shards[] =
'.' . \Wikimedia\base_convert( (
string)$index, 10, $base, $digits );
1775 if ( $this->domainId !=
'' ) {
1776 return "{$this->domainId}-$container";
1807 return $relStoragePath;
1816 private function containerCacheKey( $container ) {
1817 return "filebackend:{$this->name}:{$this->domainId}:container:{$container}";
1827 if ( !$this->memCache->set( $this->containerCacheKey( $container ), $val, 14 * 86400 ) ) {
1828 $this->logger->warning(
"Unable to set stat cache for container {container}.",
1829 [
'filebackend' => $this->name,
'container' => $container ]
1841 if ( !$this->memCache->delete( $this->containerCacheKey( $container ), 300 ) ) {
1842 $this->logger->warning(
"Unable to delete stat cache for container {container}.",
1843 [
'filebackend' => $this->name,
'container' => $container ]
1862 foreach ( $items as $item ) {
1863 if ( self::isStoragePath( $item ) ) {
1865 } elseif ( is_string( $item ) ) {
1866 $contNames[$this->containerCacheKey( $item )] = $item;
1870 foreach ( $paths as
$path ) {
1872 if ( $fullCont !==
null ) {
1873 $contNames[$this->containerCacheKey( $fullCont )] = $fullCont;
1879 $values = $this->memCache->getMulti( array_keys( $contNames ) );
1880 foreach ( $values as $cacheKey => $val ) {
1881 $contInfo[$contNames[$cacheKey]] = $val;
1905 private function fileCacheKey(
$path ) {
1906 return "filebackend:{$this->name}:{$this->domainId}:file:" . sha1(
$path );
1919 if (
$path ===
null ) {
1922 $mtime = (int)ConvertibleTimestamp::convert( TS_UNIX, $val[
'mtime'] );
1923 $ttl = $this->memCache->adaptiveTTL( $mtime, 7 * 86400, 300, 0.1 );
1924 $key = $this->fileCacheKey(
$path );
1926 if ( !$this->memCache->set( $key, $val, $ttl ) ) {
1927 $this->logger->warning(
"Unable to set stat cache for file {path}.",
1928 [
'filebackend' => $this->name,
'path' =>
$path ]
1943 if (
$path ===
null ) {
1946 if ( !$this->memCache->delete( $this->fileCacheKey(
$path ), 300 ) ) {
1947 $this->logger->warning(
"Unable to delete stat cache for file {path}.",
1948 [
'filebackend' => $this->name,
'path' =>
$path ]
1967 foreach ( $items as $item ) {
1968 if ( self::isStoragePath( $item ) ) {
1970 if (
$path !==
null ) {
1976 foreach ( $paths as
$path ) {
1978 if ( $rel !==
null ) {
1979 $pathNames[$this->fileCacheKey(
$path )] =
$path;
1984 $values = $this->memCache->getMulti( array_keys( $pathNames ) );
1986 foreach ( array_filter( $values,
'is_array' ) as $cacheKey => $stat ) {
1987 $path = $pathNames[$cacheKey];
1990 unset( $stat[
'latest'] );
1992 $this->cheapCache->setField(
$path,
'stat', $stat );
1993 if ( isset( $stat[
'sha1'] ) && strlen( $stat[
'sha1'] ) == 31 ) {
1995 $this->cheapCache->setField(
1998 [
'hash' => $stat[
'sha1'],
'latest' =>
false ]
2001 if ( isset( $stat[
'xattr'] ) && is_array( $stat[
'xattr'] ) ) {
2004 $this->cheapCache->setField(
2007 [
'map' => $stat[
'xattr'],
'latest' =>
false ]
2021 $newXAttr = [
'headers' => [],
'metadata' => [] ];
2023 foreach ( $xattr[
'headers'] as
$name => $value ) {
2024 $newXAttr[
'headers'][strtolower(
$name )] = $value;
2027 foreach ( $xattr[
'metadata'] as
$name => $value ) {
2028 $newXAttr[
'metadata'][strtolower(
$name )] = $value;
2041 $opts[
'concurrency'] = 1;
2042 if ( $this->parallelize ===
'implicit' ) {
2043 if ( $opts[
'parallelize'] ??
true ) {
2046 } elseif ( $this->parallelize ===
'explicit' ) {
2047 if ( !empty( $opts[
'parallelize'] ) ) {
2065 if ( $this->mimeCallback ) {
2066 return call_user_func_array( $this->mimeCallback, func_get_args() );
2069 $mime = ( $fsPath !== null ) ? mime_content_type( $fsPath ) :
false;
2070 return $mime ?:
'unknown/unknown';
2075class_alias( FileBackendStore::class,
'FileBackendStore' );
array $params
The job parameters.
Resource locking handling.
Store key-value entries in a size-limited in-memory LRU cache.
Generic operation result class Has warning/error list, boolean status and arbitrary value.