Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
36.47% |
62 / 170 |
|
17.39% |
4 / 23 |
CRAP | |
0.00% |
0 / 1 |
| DjVuHandler | |
36.69% |
62 / 169 |
|
17.39% |
4 / 23 |
1037.61 | |
0.00% |
0 / 1 |
| isEnabled | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
3.33 | |||
| mustRender | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| isExpensiveToThumbnail | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| isMultiPage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getParamMap | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
| validateParam | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
20 | |||
| makeParamString | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| parseParamString | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| getScriptParams | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
| doTransform | |
0.00% |
0 / 62 |
|
0.00% |
0 / 1 |
90 | |||
| getDjVuImage | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
| getMetadataInternal | |
77.78% |
7 / 9 |
|
0.00% |
0 / 1 |
4.18 | |||
| getMetaTree | |
70.00% |
7 / 10 |
|
0.00% |
0 / 1 |
8.32 | |||
| getThumbType | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
| getSizeAndMetadata | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
| getMetadataType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| isFileMetadataValid | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
| pageCount | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
| getPageDimensions | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
3.07 | |||
| getDimensionInfo | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
| getDimensionInfoFromMetaTree | |
81.25% |
13 / 16 |
|
0.00% |
0 / 1 |
6.24 | |||
| getPageText | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
4.59 | |||
| useSplitMetadata | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Handler for DjVu images. |
| 4 | * |
| 5 | * @license GPL-2.0-or-later |
| 6 | * @file |
| 7 | * @ingroup Media |
| 8 | */ |
| 9 | |
| 10 | namespace MediaWiki\Media; |
| 11 | |
| 12 | use MediaWiki\FileRepo\File\File; |
| 13 | use MediaWiki\MainConfigNames; |
| 14 | use MediaWiki\MediaWikiServices; |
| 15 | use MediaWiki\PoolCounter\PoolCounterWorkViaCallback; |
| 16 | use MediaWiki\Shell\Shell; |
| 17 | |
| 18 | /** |
| 19 | * Handler for DjVu images |
| 20 | * |
| 21 | * @ingroup Media |
| 22 | */ |
| 23 | class DjVuHandler extends ImageHandler { |
| 24 | private const EXPENSIVE_SIZE_LIMIT = 10_485_760; // 10MiB |
| 25 | |
| 26 | // Constants for getHandlerState |
| 27 | private const STATE_DJVU_IMAGE = 'djvuImage'; |
| 28 | private const STATE_TEXT_TREE = 'djvuTextTree'; |
| 29 | private const STATE_META_TREE = 'djvuMetaTree'; |
| 30 | private const CACHE_VERSION = 'v2'; |
| 31 | |
| 32 | /** |
| 33 | * @return bool |
| 34 | */ |
| 35 | public function isEnabled() { |
| 36 | $djvuRenderer = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuRenderer ); |
| 37 | $djvuDump = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuDump ); |
| 38 | if ( !$djvuRenderer || !$djvuDump ) { |
| 39 | wfDebug( "DjVu is disabled, please set \$wgDjvuRenderer and \$wgDjvuDump" ); |
| 40 | |
| 41 | return false; |
| 42 | } |
| 43 | return true; |
| 44 | } |
| 45 | |
| 46 | /** |
| 47 | * @param File $file |
| 48 | * @return bool |
| 49 | */ |
| 50 | public function mustRender( $file ) { |
| 51 | return true; |
| 52 | } |
| 53 | |
| 54 | /** |
| 55 | * True if creating thumbnails from the file is large or otherwise resource-intensive. |
| 56 | * @param File $file |
| 57 | * @return bool |
| 58 | */ |
| 59 | public function isExpensiveToThumbnail( $file ) { |
| 60 | return $file->getSize() > static::EXPENSIVE_SIZE_LIMIT; |
| 61 | } |
| 62 | |
| 63 | /** |
| 64 | * @param File $file |
| 65 | * @return bool |
| 66 | */ |
| 67 | public function isMultiPage( $file ) { |
| 68 | return true; |
| 69 | } |
| 70 | |
| 71 | /** |
| 72 | * @return array |
| 73 | */ |
| 74 | public function getParamMap() { |
| 75 | return [ |
| 76 | 'img_width' => 'width', |
| 77 | 'img_page' => 'page', |
| 78 | ]; |
| 79 | } |
| 80 | |
| 81 | /** |
| 82 | * @param string $name |
| 83 | * @param mixed $value |
| 84 | * @return bool |
| 85 | */ |
| 86 | public function validateParam( $name, $value ) { |
| 87 | if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) { |
| 88 | // Extra junk on the end of page, probably actually a caption |
| 89 | // e.g. [[File:Foo.djvu|thumb|Page 3 of the document shows foo]] |
| 90 | return false; |
| 91 | } |
| 92 | return in_array( $name, [ 'width', 'height', 'page' ] ) && $value > 0; |
| 93 | } |
| 94 | |
| 95 | /** |
| 96 | * @param array $params |
| 97 | * @return string|false |
| 98 | */ |
| 99 | public function makeParamString( $params ) { |
| 100 | $page = $params['page'] ?? 1; |
| 101 | if ( !isset( $params['width'] ) ) { |
| 102 | return false; |
| 103 | } |
| 104 | |
| 105 | return "page{$page}-{$params['width']}px"; |
| 106 | } |
| 107 | |
| 108 | /** |
| 109 | * @param string $str |
| 110 | * @return array|false |
| 111 | */ |
| 112 | public function parseParamString( $str ) { |
| 113 | $m = false; |
| 114 | if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) { |
| 115 | return [ 'width' => $m[2], 'page' => $m[1] ]; |
| 116 | } |
| 117 | return false; |
| 118 | } |
| 119 | |
| 120 | /** |
| 121 | * @param array $params |
| 122 | * @return array |
| 123 | */ |
| 124 | protected function getScriptParams( $params ) { |
| 125 | return [ |
| 126 | 'width' => $params['width'], |
| 127 | 'page' => $params['page'], |
| 128 | ]; |
| 129 | } |
| 130 | |
| 131 | /** |
| 132 | * @param File $image |
| 133 | * @param string $dstPath |
| 134 | * @param string $dstUrl |
| 135 | * @param array $params |
| 136 | * @param int $flags |
| 137 | * @return MediaTransformError|ThumbnailImage|TransformParameterError |
| 138 | */ |
| 139 | public function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { |
| 140 | $djvuRenderer = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuRenderer ); |
| 141 | $djvuPostProcessor = MediaWikiServices::getInstance()->getMainConfig() |
| 142 | ->get( MainConfigNames::DjvuPostProcessor ); |
| 143 | if ( !$this->normaliseParams( $image, $params ) ) { |
| 144 | return new TransformParameterError( $params ); |
| 145 | } |
| 146 | $width = $params['width']; |
| 147 | $height = $params['height']; |
| 148 | $page = $params['page']; |
| 149 | |
| 150 | if ( $flags & self::TRANSFORM_LATER ) { |
| 151 | $params = [ |
| 152 | 'width' => $width, |
| 153 | 'height' => $height, |
| 154 | 'page' => $page |
| 155 | ]; |
| 156 | |
| 157 | return new ThumbnailImage( $image, $dstUrl, $dstPath, $params ); |
| 158 | } |
| 159 | |
| 160 | if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) { |
| 161 | return new MediaTransformError( |
| 162 | 'thumbnail_error', |
| 163 | $width, |
| 164 | $height, |
| 165 | wfMessage( 'thumbnail_dest_directory' ) |
| 166 | ); |
| 167 | } |
| 168 | |
| 169 | // Get local copy source for shell scripts |
| 170 | // Thumbnail extraction is very inefficient for large files. |
| 171 | // Provide a way to pool count limit the number of downloaders. |
| 172 | if ( $image->getSize() >= 1e7 ) { // 10 MB |
| 173 | $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ), |
| 174 | [ |
| 175 | 'doWork' => static function () use ( $image ) { |
| 176 | return $image->getLocalRefPath(); |
| 177 | } |
| 178 | ] |
| 179 | ); |
| 180 | $srcPath = $work->execute(); |
| 181 | } else { |
| 182 | $srcPath = $image->getLocalRefPath(); |
| 183 | } |
| 184 | |
| 185 | if ( $srcPath === false ) { // Failed to get local copy |
| 186 | wfDebugLog( 'thumbnail', |
| 187 | sprintf( 'Thumbnail failed on %s: could not get local copy of "%s"', |
| 188 | wfHostname(), $image->getName() ) ); |
| 189 | |
| 190 | return new MediaTransformError( 'thumbnail_error', |
| 191 | $params['width'], $params['height'], |
| 192 | wfMessage( 'filemissing' ) |
| 193 | ); |
| 194 | } |
| 195 | |
| 196 | # Use a subshell (brackets) to aggregate stderr from both pipeline commands |
| 197 | # before redirecting it to the overall stdout. This works in both Linux and Windows XP. |
| 198 | $cmd = '(' . Shell::escape( |
| 199 | $djvuRenderer, |
| 200 | "-format=ppm", |
| 201 | "-page={$page}", |
| 202 | "-size={$params['physicalWidth']}x{$params['physicalHeight']}", |
| 203 | $srcPath ); |
| 204 | if ( $djvuPostProcessor ) { |
| 205 | $cmd .= " | {$djvuPostProcessor}"; |
| 206 | } |
| 207 | $cmd .= ' > ' . Shell::escape( $dstPath ) . ') 2>&1'; |
| 208 | wfDebug( __METHOD__ . ": $cmd" ); |
| 209 | $retval = 0; |
| 210 | $err = wfShellExec( $cmd, $retval ); |
| 211 | |
| 212 | $removed = $this->removeBadFile( $dstPath, $retval ); |
| 213 | if ( $retval !== 0 || $removed ) { |
| 214 | $this->logErrorForExternalProcess( $retval, $err, $cmd ); |
| 215 | return new MediaTransformError( 'thumbnail_error', $width, $height, $err ); |
| 216 | } |
| 217 | $params = [ |
| 218 | 'width' => $width, |
| 219 | 'height' => $height, |
| 220 | 'page' => $page |
| 221 | ]; |
| 222 | |
| 223 | return new ThumbnailImage( $image, $dstUrl, $dstPath, $params ); |
| 224 | } |
| 225 | |
| 226 | /** |
| 227 | * Cache an instance of DjVuImage in a MediaHandlerState object, return |
| 228 | * that instance |
| 229 | * |
| 230 | * @param MediaHandlerState $state |
| 231 | * @param string $path |
| 232 | * @return DjVuImage |
| 233 | */ |
| 234 | private function getDjVuImage( $state, $path ) { |
| 235 | $deja = $state->getHandlerState( self::STATE_DJVU_IMAGE ); |
| 236 | if ( !$deja ) { |
| 237 | $deja = new DjVuImage( $path ); |
| 238 | $state->setHandlerState( self::STATE_DJVU_IMAGE, $deja ); |
| 239 | } |
| 240 | return $deja; |
| 241 | } |
| 242 | |
| 243 | /** |
| 244 | * Get metadata, unserializing it if necessary. |
| 245 | * |
| 246 | * @param File $file The DjVu file in question |
| 247 | * @param bool $gettext |
| 248 | * @return string|false|array metadata |
| 249 | */ |
| 250 | private function getMetadataInternal( File $file, $gettext ) { |
| 251 | $itemNames = [ 'error', '_error', 'data' ]; |
| 252 | if ( $gettext ) { |
| 253 | $itemNames[] = 'text'; |
| 254 | } |
| 255 | $unser = $file->getMetadataItems( $itemNames ); |
| 256 | |
| 257 | if ( isset( $unser['error'] ) ) { |
| 258 | return false; |
| 259 | } |
| 260 | if ( isset( $unser['_error'] ) ) { |
| 261 | return false; |
| 262 | } |
| 263 | return $unser; |
| 264 | } |
| 265 | |
| 266 | /** |
| 267 | * Cache a document tree for the DjVu metadata |
| 268 | * @param File $image |
| 269 | * @param bool $gettext DOCUMENT (Default: false) |
| 270 | * @return false|array |
| 271 | */ |
| 272 | public function getMetaTree( $image, $gettext = false ) { |
| 273 | if ( $gettext && $image->getHandlerState( self::STATE_TEXT_TREE ) ) { |
| 274 | return $image->getHandlerState( self::STATE_TEXT_TREE ); |
| 275 | } |
| 276 | if ( !$gettext && $image->getHandlerState( self::STATE_META_TREE ) ) { |
| 277 | return $image->getHandlerState( self::STATE_META_TREE ); |
| 278 | } |
| 279 | |
| 280 | $metadata = $this->getMetadataInternal( $image, $gettext ); |
| 281 | if ( !$metadata ) { |
| 282 | return false; |
| 283 | } |
| 284 | |
| 285 | if ( !$gettext ) { |
| 286 | unset( $metadata['text'] ); |
| 287 | } |
| 288 | return $metadata; |
| 289 | } |
| 290 | |
| 291 | /** @inheritDoc */ |
| 292 | public function getThumbType( $ext, $mime, $params = null ) { |
| 293 | $djvuOutputExtension = MediaWikiServices::getInstance()->getMainConfig() |
| 294 | ->get( MainConfigNames::DjvuOutputExtension ); |
| 295 | static $djvuMime = null; |
| 296 | if ( $djvuMime === null ) { |
| 297 | $magic = MediaWikiServices::getInstance()->getMimeAnalyzer(); |
| 298 | $djvuMime = $magic->getMimeTypeFromExtensionOrNull( $djvuOutputExtension ); |
| 299 | } |
| 300 | |
| 301 | return [ $djvuOutputExtension, $djvuMime ]; |
| 302 | } |
| 303 | |
| 304 | /** @inheritDoc */ |
| 305 | public function getSizeAndMetadata( $state, $path ) { |
| 306 | wfDebug( "Getting DjVu metadata for $path" ); |
| 307 | |
| 308 | $djvuImage = $this->getDjVuImage( $state, $path ); |
| 309 | $metadata = $djvuImage->retrieveMetaData(); |
| 310 | if ( $metadata === false ) { |
| 311 | // Special value so that we don't repetitively try and decode a broken file. |
| 312 | $metadata = [ 'error' => 'Error extracting metadata' ]; |
| 313 | } |
| 314 | return [ 'metadata' => $metadata ] + $djvuImage->getImageSize(); |
| 315 | } |
| 316 | |
| 317 | /** @inheritDoc */ |
| 318 | public function getMetadataType( $image ) { |
| 319 | // historical reasons |
| 320 | return 'djvuxml'; |
| 321 | } |
| 322 | |
| 323 | /** @inheritDoc */ |
| 324 | public function isFileMetadataValid( $image ) { |
| 325 | return $image->getMetadataArray() ? self::METADATA_GOOD : self::METADATA_BAD; |
| 326 | } |
| 327 | |
| 328 | /** @inheritDoc */ |
| 329 | public function pageCount( File $image ) { |
| 330 | $info = $this->getDimensionInfo( $image ); |
| 331 | |
| 332 | return $info ? $info['pageCount'] : false; |
| 333 | } |
| 334 | |
| 335 | /** @inheritDoc */ |
| 336 | public function getPageDimensions( File $image, $page ) { |
| 337 | $index = $page - 1; // MW starts pages at 1 |
| 338 | |
| 339 | $info = $this->getDimensionInfo( $image ); |
| 340 | if ( $info && isset( $info['dimensionsByPage'][$index] ) ) { |
| 341 | return $info['dimensionsByPage'][$index]; |
| 342 | } |
| 343 | |
| 344 | return false; |
| 345 | } |
| 346 | |
| 347 | /** @inheritDoc */ |
| 348 | protected function getDimensionInfo( File $file ) { |
| 349 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
| 350 | return $cache->getWithSetCallback( |
| 351 | $cache->makeKey( 'file-djvu', 'dimensions', self::CACHE_VERSION, $file->getSha1() ), |
| 352 | $cache::TTL_INDEFINITE, |
| 353 | function () use ( $file ) { |
| 354 | $tree = $this->getMetaTree( $file ); |
| 355 | return $this->getDimensionInfoFromMetaTree( $tree ); |
| 356 | }, |
| 357 | [ 'pcTTL' => $cache::TTL_INDEFINITE ] |
| 358 | ); |
| 359 | } |
| 360 | |
| 361 | /** |
| 362 | * Given the metadata, returns dimension information about the document |
| 363 | * @param false|array $metatree The file's metadata tree |
| 364 | * @return array|false |
| 365 | */ |
| 366 | protected function getDimensionInfoFromMetaTree( $metatree ) { |
| 367 | if ( !$metatree ) { |
| 368 | return false; |
| 369 | } |
| 370 | $dimsByPage = []; |
| 371 | |
| 372 | if ( !isset( $metatree['data'] ) || !$metatree['data'] ) { |
| 373 | return false; |
| 374 | } |
| 375 | foreach ( $metatree['data']['pages'] as $page ) { |
| 376 | if ( !$page ) { |
| 377 | $dimsByPage[] = false; |
| 378 | } else { |
| 379 | $dimsByPage[] = [ |
| 380 | 'width' => (int)$page['width'], |
| 381 | 'height' => (int)$page['height'], |
| 382 | ]; |
| 383 | } |
| 384 | } |
| 385 | return [ |
| 386 | 'pageCount' => count( $metatree['data']['pages'] ), |
| 387 | 'dimensionsByPage' => $dimsByPage |
| 388 | ]; |
| 389 | } |
| 390 | |
| 391 | /** |
| 392 | * @param File $image |
| 393 | * @param int $page Page number to get information for |
| 394 | * @return string|false Page text or false when no text found. |
| 395 | */ |
| 396 | public function getPageText( File $image, $page ) { |
| 397 | $tree = $this->getMetaTree( $image, true ); |
| 398 | if ( !$tree ) { |
| 399 | return false; |
| 400 | } |
| 401 | if ( isset( $tree['text'] ) && isset( $tree['text'][$page - 1] ) ) { |
| 402 | return $tree['text'][$page - 1]; |
| 403 | } |
| 404 | return false; |
| 405 | } |
| 406 | |
| 407 | /** @inheritDoc */ |
| 408 | public function useSplitMetadata() { |
| 409 | return true; |
| 410 | } |
| 411 | } |
| 412 | |
| 413 | /** @deprecated class alias since 1.46 */ |
| 414 | class_alias( DjVuHandler::class, 'DjVuHandler' ); |