MediaWiki master
DjVuHandler.php
Go to the documentation of this file.
1<?php
10namespace MediaWiki\Media;
11
17
24 private const EXPENSIVE_SIZE_LIMIT = 10_485_760; // 10MiB
25
26 // Constants for getHandlerState
27 private const STATE_DJVU_IMAGE = 'djvuImage';
28 private const STATE_TEXT_TREE = 'djvuTextTree';
29 private const STATE_META_TREE = 'djvuMetaTree';
30 private const CACHE_VERSION = 'v2';
31
35 public function isEnabled() {
36 $djvuRenderer = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuRenderer );
37 $djvuDump = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuDump );
38 if ( !$djvuRenderer || !$djvuDump ) {
39 // @codeCoverageIgnoreStart
40 wfDebug( "DjVu is disabled, please set \$wgDjvuRenderer and \$wgDjvuDump" );
41
42 return false;
43 // @codeCoverageIgnoreEnd
44 }
45 return true;
46 }
47
52 public function mustRender( $file ) {
53 return true;
54 }
55
61 public function isExpensiveToThumbnail( $file ) {
62 return $file->getSize() > static::EXPENSIVE_SIZE_LIMIT;
63 }
64
69 public function isMultiPage( $file ) {
70 return true;
71 }
72
76 public function getParamMap() {
77 return [
78 'img_width' => 'width',
79 'img_page' => 'page',
80 ];
81 }
82
84 public function validateParam( $name, $value ) {
85 if ( $name === 'page' ) {
86 return is_int( $value ) ||
87 // Extra junk on the end of page, probably actually a caption
88 // e.g. [[File:Foo.djvu|thumb|Page 3 of the document shows foo]]
89 ( is_string( $value ) && trim( $value ) === (string)intval( $value ) );
90 }
91 return in_array( $name, [ 'width', 'height', 'page', 'physicalWidth', 'physicalHeight' ] ) &&
92 (int)$value > 0;
93 }
94
99 public function makeParamString( $params ) {
100 $page = $params['page'] ?? 1;
101 $width = $params['physicalWidth'] ?? $params['width'] ?? null;
102 if ( !$width ) {
103 return false;
104 }
105
106 return "page{$page}-{$width}px";
107 }
108
113 public function parseParamString( $str ) {
114 $m = false;
115 if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
116 return [ 'width' => $m[2], 'page' => $m[1] ];
117 }
118 return false;
119 }
120
124 public function normaliseParams( $image, &$params ) {
125 // Copy-paste from TransformationalImageHandler::normaliseParams() and PdfHandler
126 // Probably should be trait or subclass.
127 if ( !parent::normaliseParams( $image, $params ) ) {
128 return false;
129 }
130 $srcWidth = $image->getWidth( $params['page'] );
131 $srcHeight = $image->getHeight( $params['page'] );
132 $params['physicalWidth'] = $this->getSteppedThumbWidth(
133 $image, $params['physicalWidth'], $srcWidth, $srcHeight
134 );
135 $params['physicalHeight'] = File::scaleHeight( $srcWidth, $srcHeight, $params['physicalWidth'] );
136 return true;
137 }
138
143 protected function getScriptParams( $params ) {
144 return [
145 'width' => $params['width'],
146 'page' => $params['page'],
147 ];
148 }
149
158 public function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
159 $djvuRenderer = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuRenderer );
160 $djvuPostProcessor = MediaWikiServices::getInstance()->getMainConfig()
162 if ( !$this->normaliseParams( $image, $params ) ) {
163 return new TransformParameterError( $params );
164 }
165 $width = $params['width'];
166 $height = $params['height'];
167 $page = $params['page'];
168
169 if ( $flags & self::TRANSFORM_LATER ) {
170 $params = [
171 'width' => $width,
172 'height' => $height,
173 'page' => $page
174 ];
175
176 return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
177 }
178
179 if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
180 // @codeCoverageIgnoreStart
181 return new MediaTransformError(
182 'thumbnail_error',
183 $width,
184 $height,
185 wfMessage( 'thumbnail_dest_directory' )
186 );
187 // @codeCoverageIgnoreEnd
188 }
189
190 // Get local copy source for shell scripts
191 // Thumbnail extraction is very inefficient for large files.
192 // Provide a way to pool count limit the number of downloaders.
193 if ( $image->getSize() >= 1e7 ) { // 10 MB
194 $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
195 [
196 'doWork' => static function () use ( $image ) {
197 return $image->getLocalRefPath();
198 }
199 ]
200 );
201 $srcPath = $work->execute();
202 } else {
203 $srcPath = $image->getLocalRefPath();
204 }
205
206 if ( $srcPath === false ) {
207 // Failed to get local copy
208 // @codeCoverageIgnoreStart
209 wfDebugLog( 'thumbnail',
210 sprintf( 'Thumbnail failed on %s: could not get local copy of "%s"',
211 wfHostname(), $image->getName() ) );
212
213 return new MediaTransformError( 'thumbnail_error',
214 $params['width'], $params['height'],
215 wfMessage( 'filemissing' )
216 );
217 // @codeCoverageIgnoreEnd
218 }
219
220 // Use a subshell (brackets) to aggregate stderr from both pipeline commands
221 // before redirecting it to the overall stdout. This works in both Linux and Windows XP.
222 $cmd = '(' . Shell::escape(
223 $djvuRenderer,
224 "-format=ppm",
225 "-page={$page}",
226 "-size={$params['physicalWidth']}x{$params['physicalHeight']}",
227 $srcPath );
228 if ( $djvuPostProcessor ) {
229 $cmd .= " | {$djvuPostProcessor}";
230 }
231 $cmd .= ' > ' . Shell::escape( $dstPath ) . ') 2>&1';
232 wfDebug( __METHOD__ . ": $cmd" );
233 $shell = Shell::command()->unsafeCommand( $cmd )->execute();
234 $retval = $shell->getExitCode();
235 $err = $shell->getStderr();
236
237 $removed = $this->removeBadFile( $dstPath, $retval );
238 if ( ( $retval !== 0 || $removed ) && $retval !== null ) {
239 // @codeCoverageIgnoreStart
240 $this->logErrorForExternalProcess( $retval, $err, $cmd );
241 return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
242 // @codeCoverageIgnoreEnd
243 }
244 $params = [
245 'width' => $width,
246 'height' => $height,
247 'page' => $page
248 ];
249
250 return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
251 }
252
257 private function getDjVuImage( MediaHandlerState $state, string $path ): DjVuImage {
258 $deja = $state->getHandlerState( self::STATE_DJVU_IMAGE );
259 if ( !$deja ) {
260 $deja = new DjVuImage( $path );
261 $state->setHandlerState( self::STATE_DJVU_IMAGE, $deja );
262 }
263 return $deja;
264 }
265
273 private function getMetadataInternal( File $file, bool $gettext ) {
274 $itemNames = [ 'error', '_error', 'data' ];
275 if ( $gettext ) {
276 $itemNames[] = 'text';
277 }
278 $unser = $file->getMetadataItems( $itemNames );
279
280 if ( isset( $unser['error'] ) ) {
281 return false;
282 }
283 if ( isset( $unser['_error'] ) ) {
284 return false;
285 }
286 return $unser;
287 }
288
295 public function getMetaTree( $image, $gettext = false ) {
296 if ( $gettext && $image->getHandlerState( self::STATE_TEXT_TREE ) ) {
297 return $image->getHandlerState( self::STATE_TEXT_TREE );
298 }
299 if ( !$gettext && $image->getHandlerState( self::STATE_META_TREE ) ) {
300 return $image->getHandlerState( self::STATE_META_TREE );
301 }
302
303 $metadata = $this->getMetadataInternal( $image, $gettext );
304 if ( !$metadata ) {
305 return false;
306 }
307
308 if ( !$gettext ) {
309 unset( $metadata['text'] );
310 }
311 return $metadata;
312 }
313
315 public function getThumbType( $ext, $mime, $params = null ) {
316 $djvuOutputExtension = MediaWikiServices::getInstance()->getMainConfig()
318 static $djvuMime = null;
319 if ( $djvuMime === null ) {
320 $magic = MediaWikiServices::getInstance()->getMimeAnalyzer();
321 $djvuMime = $magic->getMimeTypeFromExtensionOrNull( $djvuOutputExtension );
322 }
323
324 return [ $djvuOutputExtension, $djvuMime ];
325 }
326
328 public function getSizeAndMetadata( $state, $path ) {
329 wfDebug( "Getting DjVu metadata for $path" );
330
331 $djvuImage = $this->getDjVuImage( $state, $path );
332 $metadata = $djvuImage->retrieveMetaData();
333 if ( $metadata === false ) {
334 // Special value so that we don't repetitively try and decode a broken file.
335 $metadata = [ 'error' => 'Error extracting metadata' ];
336 }
337 return [ 'metadata' => $metadata ] + $djvuImage->getImageSize();
338 }
339
341 public function getMetadataType( $image ) {
342 // historical reasons
343 return 'djvuxml';
344 }
345
347 public function isFileMetadataValid( $image ) {
348 return $image->getMetadataArray() ? self::METADATA_GOOD : self::METADATA_BAD;
349 }
350
352 public function pageCount( File $image ) {
353 $info = $this->getDimensionInfo( $image );
354
355 return $info ? $info['pageCount'] : false;
356 }
357
359 public function getPageDimensions( File $image, $page ) {
360 $index = $page - 1; // MW starts pages at 1
361
362 $info = $this->getDimensionInfo( $image );
363 if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
364 return $info['dimensionsByPage'][$index];
365 }
366
367 return false;
368 }
369
371 protected function getDimensionInfo( File $file ) {
372 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
373 return $cache->getWithSetCallback(
374 $cache->makeKey( 'file-djvu', 'dimensions', self::CACHE_VERSION, $file->getSha1() ),
375 $cache::TTL_INDEFINITE,
376 function () use ( $file ) {
377 $tree = $this->getMetaTree( $file );
378 return $this->getDimensionInfoFromMetaTree( $tree );
379 },
380 [ 'pcTTL' => $cache::TTL_INDEFINITE ]
381 );
382 }
383
389 protected function getDimensionInfoFromMetaTree( $metatree ) {
390 if ( !$metatree ) {
391 return false;
392 }
393 $dimsByPage = [];
394
395 if ( !isset( $metatree['data'] ) || !$metatree['data'] ) {
396 return false;
397 }
398 foreach ( $metatree['data']['pages'] as $page ) {
399 if ( !$page ) {
400 $dimsByPage[] = false;
401 } else {
402 $dimsByPage[] = [
403 'width' => (int)$page['width'],
404 'height' => (int)$page['height'],
405 ];
406 }
407 }
408 return [
409 'pageCount' => count( $metatree['data']['pages'] ),
410 'dimensionsByPage' => $dimsByPage
411 ];
412 }
413
419 public function getPageText( File $image, $page ) {
420 $tree = $this->getMetaTree( $image, true );
421 if ( !$tree ) {
422 return false;
423 }
424 if ( isset( $tree['text'] ) && isset( $tree['text'][$page - 1] ) ) {
425 return $tree['text'][$page - 1];
426 }
427 return false;
428 }
429
431 public function useSplitMetadata() {
432 return true;
433 }
434}
435
437class_alias( DjVuHandler::class, 'DjVuHandler' );
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfHostname()
Get host name of the current machine, for use in error reporting.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:80
getSha1()
Get the SHA-1 base 36 hash of the file.
Definition File.php:2397
A class containing constants representing the names of configuration variables.
const DjvuPostProcessor
Name constant for the DjvuPostProcessor setting, for use with Config::get()
const DjvuDump
Name constant for the DjvuDump setting, for use with Config::get()
const DjvuRenderer
Name constant for the DjvuRenderer setting, for use with Config::get()
const DjvuOutputExtension
Name constant for the DjvuOutputExtension setting, for use with Config::get()
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Handler for DjVu images.
getPageText(File $image, $page)
normaliseParams( $image, &$params)
Changes the parameter array as necessary, ready for transformation.Should be idempotent....
isExpensiveToThumbnail( $file)
True if creating thumbnails from the file is large or otherwise resource-intensive.
getSizeAndMetadata( $state, $path)
Get image size information and metadata array.If this returns null, the caller will fall back to getI...
useSplitMetadata()
If this returns true, LocalFile may split metadata up and store its constituent items separately....
validateParam( $name, $value)
Validate a thumbnail parameter at parse time.Return true to accept the parameter, and false to reject...
doTransform( $image, $dstPath, $dstUrl, $params, $flags=0)
getPageDimensions(File $image, $page)
Get an associative array of page dimensions Currently "width" and "height" are understood,...
getMetaTree( $image, $gettext=false)
Cache a document tree for the DjVu metadata.
pageCount(File $image)
Page count for a multi-page document, false if unsupported or unknown.to overrideint|false
getThumbType( $ext, $mime, $params=null)
Get the thumbnail extension and MIME type for a given source MIME type.to overridearray Thumbnail ext...
isFileMetadataValid( $image)
Check if the metadata is valid for this handler.If it returns MediaHandler::METADATA_BAD (or false),...
getMetadataType( $image)
Get a string describing the type of metadata, for display purposes.to overrideThis method is currentl...
getDimensionInfoFromMetaTree( $metatree)
Given the metadata, returns dimension information about the document.
Support for detecting/validating DjVu image files and getting some basic file metadata (resolution et...
Definition DjVuImage.php:28
Media handler abstract base class for images.
getSteppedThumbWidth(File $image, int $requestWidth, int $srcWidth, int $srcHeight)
Adjust the thumbnail size to fit the width steps defined in config via $wgThumbnailSteps.
removeBadFile( $dstPath, $retval=0)
Check for zero-sized thumbnails.
logErrorForExternalProcess( $retval, $err, $cmd)
Log an error that occurred in an external process.
Basic media transform error class.
Media transform output for images.
Shortcut class for parameter validation errors.
Convenience class for dealing with PoolCounter using callbacks.
execute( $skipcache=false)
Get the result of the work (whatever it is), or the result of the error() function.
Executes shell commands.
Definition Shell.php:32
An interface to support process-local caching of handler data associated with a given file.
setHandlerState(string $key, $value)
Set a value.