MediaWiki master
DjVuHandler.php
Go to the documentation of this file.
1<?php
10namespace MediaWiki\Media;
11
17
24 private const EXPENSIVE_SIZE_LIMIT = 10_485_760; // 10MiB
25
26 // Constants for getHandlerState
27 private const STATE_DJVU_IMAGE = 'djvuImage';
28 private const STATE_TEXT_TREE = 'djvuTextTree';
29 private const STATE_META_TREE = 'djvuMetaTree';
30 private const CACHE_VERSION = 'v2';
31
35 public function isEnabled() {
36 $djvuRenderer = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuRenderer );
37 $djvuDump = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuDump );
38 if ( !$djvuRenderer || !$djvuDump ) {
39 wfDebug( "DjVu is disabled, please set \$wgDjvuRenderer and \$wgDjvuDump" );
40
41 return false;
42 }
43 return true;
44 }
45
50 public function mustRender( $file ) {
51 return true;
52 }
53
59 public function isExpensiveToThumbnail( $file ) {
60 return $file->getSize() > static::EXPENSIVE_SIZE_LIMIT;
61 }
62
67 public function isMultiPage( $file ) {
68 return true;
69 }
70
74 public function getParamMap() {
75 return [
76 'img_width' => 'width',
77 'img_page' => 'page',
78 ];
79 }
80
86 public function validateParam( $name, $value ) {
87 if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
88 // Extra junk on the end of page, probably actually a caption
89 // e.g. [[File:Foo.djvu|thumb|Page 3 of the document shows foo]]
90 return false;
91 }
92 return in_array( $name, [ 'width', 'height', 'page' ] ) && $value > 0;
93 }
94
99 public function makeParamString( $params ) {
100 $page = $params['page'] ?? 1;
101 if ( !isset( $params['width'] ) ) {
102 return false;
103 }
104
105 return "page{$page}-{$params['width']}px";
106 }
107
112 public function parseParamString( $str ) {
113 $m = false;
114 if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
115 return [ 'width' => $m[2], 'page' => $m[1] ];
116 }
117 return false;
118 }
119
124 protected function getScriptParams( $params ) {
125 return [
126 'width' => $params['width'],
127 'page' => $params['page'],
128 ];
129 }
130
139 public function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
140 $djvuRenderer = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuRenderer );
141 $djvuPostProcessor = MediaWikiServices::getInstance()->getMainConfig()
143 if ( !$this->normaliseParams( $image, $params ) ) {
144 return new TransformParameterError( $params );
145 }
146 $width = $params['width'];
147 $height = $params['height'];
148 $page = $params['page'];
149
150 if ( $flags & self::TRANSFORM_LATER ) {
151 $params = [
152 'width' => $width,
153 'height' => $height,
154 'page' => $page
155 ];
156
157 return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
158 }
159
160 if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
161 return new MediaTransformError(
162 'thumbnail_error',
163 $width,
164 $height,
165 wfMessage( 'thumbnail_dest_directory' )
166 );
167 }
168
169 // Get local copy source for shell scripts
170 // Thumbnail extraction is very inefficient for large files.
171 // Provide a way to pool count limit the number of downloaders.
172 if ( $image->getSize() >= 1e7 ) { // 10 MB
173 $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
174 [
175 'doWork' => static function () use ( $image ) {
176 return $image->getLocalRefPath();
177 }
178 ]
179 );
180 $srcPath = $work->execute();
181 } else {
182 $srcPath = $image->getLocalRefPath();
183 }
184
185 if ( $srcPath === false ) { // Failed to get local copy
186 wfDebugLog( 'thumbnail',
187 sprintf( 'Thumbnail failed on %s: could not get local copy of "%s"',
188 wfHostname(), $image->getName() ) );
189
190 return new MediaTransformError( 'thumbnail_error',
191 $params['width'], $params['height'],
192 wfMessage( 'filemissing' )
193 );
194 }
195
196 # Use a subshell (brackets) to aggregate stderr from both pipeline commands
197 # before redirecting it to the overall stdout. This works in both Linux and Windows XP.
198 $cmd = '(' . Shell::escape(
199 $djvuRenderer,
200 "-format=ppm",
201 "-page={$page}",
202 "-size={$params['physicalWidth']}x{$params['physicalHeight']}",
203 $srcPath );
204 if ( $djvuPostProcessor ) {
205 $cmd .= " | {$djvuPostProcessor}";
206 }
207 $cmd .= ' > ' . Shell::escape( $dstPath ) . ') 2>&1';
208 wfDebug( __METHOD__ . ": $cmd" );
209 $retval = 0;
210 $err = wfShellExec( $cmd, $retval );
211
212 $removed = $this->removeBadFile( $dstPath, $retval );
213 if ( $retval !== 0 || $removed ) {
214 $this->logErrorForExternalProcess( $retval, $err, $cmd );
215 return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
216 }
217 $params = [
218 'width' => $width,
219 'height' => $height,
220 'page' => $page
221 ];
222
223 return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
224 }
225
234 private function getDjVuImage( $state, $path ) {
235 $deja = $state->getHandlerState( self::STATE_DJVU_IMAGE );
236 if ( !$deja ) {
237 $deja = new DjVuImage( $path );
238 $state->setHandlerState( self::STATE_DJVU_IMAGE, $deja );
239 }
240 return $deja;
241 }
242
250 private function getMetadataInternal( File $file, $gettext ) {
251 $itemNames = [ 'error', '_error', 'data' ];
252 if ( $gettext ) {
253 $itemNames[] = 'text';
254 }
255 $unser = $file->getMetadataItems( $itemNames );
256
257 if ( isset( $unser['error'] ) ) {
258 return false;
259 }
260 if ( isset( $unser['_error'] ) ) {
261 return false;
262 }
263 return $unser;
264 }
265
272 public function getMetaTree( $image, $gettext = false ) {
273 if ( $gettext && $image->getHandlerState( self::STATE_TEXT_TREE ) ) {
274 return $image->getHandlerState( self::STATE_TEXT_TREE );
275 }
276 if ( !$gettext && $image->getHandlerState( self::STATE_META_TREE ) ) {
277 return $image->getHandlerState( self::STATE_META_TREE );
278 }
279
280 $metadata = $this->getMetadataInternal( $image, $gettext );
281 if ( !$metadata ) {
282 return false;
283 }
284
285 if ( !$gettext ) {
286 unset( $metadata['text'] );
287 }
288 return $metadata;
289 }
290
292 public function getThumbType( $ext, $mime, $params = null ) {
293 $djvuOutputExtension = MediaWikiServices::getInstance()->getMainConfig()
295 static $djvuMime = null;
296 if ( $djvuMime === null ) {
297 $magic = MediaWikiServices::getInstance()->getMimeAnalyzer();
298 $djvuMime = $magic->getMimeTypeFromExtensionOrNull( $djvuOutputExtension );
299 }
300
301 return [ $djvuOutputExtension, $djvuMime ];
302 }
303
305 public function getSizeAndMetadata( $state, $path ) {
306 wfDebug( "Getting DjVu metadata for $path" );
307
308 $djvuImage = $this->getDjVuImage( $state, $path );
309 $metadata = $djvuImage->retrieveMetaData();
310 if ( $metadata === false ) {
311 // Special value so that we don't repetitively try and decode a broken file.
312 $metadata = [ 'error' => 'Error extracting metadata' ];
313 }
314 return [ 'metadata' => $metadata ] + $djvuImage->getImageSize();
315 }
316
318 public function getMetadataType( $image ) {
319 // historical reasons
320 return 'djvuxml';
321 }
322
324 public function isFileMetadataValid( $image ) {
325 return $image->getMetadataArray() ? self::METADATA_GOOD : self::METADATA_BAD;
326 }
327
329 public function pageCount( File $image ) {
330 $info = $this->getDimensionInfo( $image );
331
332 return $info ? $info['pageCount'] : false;
333 }
334
336 public function getPageDimensions( File $image, $page ) {
337 $index = $page - 1; // MW starts pages at 1
338
339 $info = $this->getDimensionInfo( $image );
340 if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
341 return $info['dimensionsByPage'][$index];
342 }
343
344 return false;
345 }
346
348 protected function getDimensionInfo( File $file ) {
349 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
350 return $cache->getWithSetCallback(
351 $cache->makeKey( 'file-djvu', 'dimensions', self::CACHE_VERSION, $file->getSha1() ),
352 $cache::TTL_INDEFINITE,
353 function () use ( $file ) {
354 $tree = $this->getMetaTree( $file );
355 return $this->getDimensionInfoFromMetaTree( $tree );
356 },
357 [ 'pcTTL' => $cache::TTL_INDEFINITE ]
358 );
359 }
360
366 protected function getDimensionInfoFromMetaTree( $metatree ) {
367 if ( !$metatree ) {
368 return false;
369 }
370 $dimsByPage = [];
371
372 if ( !isset( $metatree['data'] ) || !$metatree['data'] ) {
373 return false;
374 }
375 foreach ( $metatree['data']['pages'] as $page ) {
376 if ( !$page ) {
377 $dimsByPage[] = false;
378 } else {
379 $dimsByPage[] = [
380 'width' => (int)$page['width'],
381 'height' => (int)$page['height'],
382 ];
383 }
384 }
385 return [
386 'pageCount' => count( $metatree['data']['pages'] ),
387 'dimensionsByPage' => $dimsByPage
388 ];
389 }
390
396 public function getPageText( File $image, $page ) {
397 $tree = $this->getMetaTree( $image, true );
398 if ( !$tree ) {
399 return false;
400 }
401 if ( isset( $tree['text'] ) && isset( $tree['text'][$page - 1] ) ) {
402 return $tree['text'][$page - 1];
403 }
404 return false;
405 }
406
408 public function useSplitMetadata() {
409 return true;
410 }
411}
412
414class_alias( DjVuHandler::class, 'DjVuHandler' );
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfShellExec( $cmd, &$retval=null, $environ=[], $limits=[], $options=[])
Execute a shell command, with time and memory limits mirrored from the PHP configuration if supported...
wfHostname()
Get host name of the current machine, for use in error reporting.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:79
getSha1()
Get the SHA-1 base 36 hash of the file.
Definition File.php:2398
A class containing constants representing the names of configuration variables.
const DjvuPostProcessor
Name constant for the DjvuPostProcessor setting, for use with Config::get()
const DjvuDump
Name constant for the DjvuDump setting, for use with Config::get()
const DjvuRenderer
Name constant for the DjvuRenderer setting, for use with Config::get()
const DjvuOutputExtension
Name constant for the DjvuOutputExtension setting, for use with Config::get()
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Handler for DjVu images.
getPageText(File $image, $page)
isExpensiveToThumbnail( $file)
True if creating thumbnails from the file is large or otherwise resource-intensive.
getSizeAndMetadata( $state, $path)
Get image size information and metadata array.If this returns null, the caller will fall back to getI...
useSplitMetadata()
If this returns true, LocalFile may split metadata up and store its constituent items separately....
validateParam( $name, $value)
doTransform( $image, $dstPath, $dstUrl, $params, $flags=0)
getPageDimensions(File $image, $page)
Get an associative array of page dimensions Currently "width" and "height" are understood,...
getMetaTree( $image, $gettext=false)
Cache a document tree for the DjVu metadata.
pageCount(File $image)
Page count for a multi-page document, false if unsupported or unknown.to overrideint|false
getThumbType( $ext, $mime, $params=null)
Get the thumbnail extension and MIME type for a given source MIME type.to overridearray Thumbnail ext...
isFileMetadataValid( $image)
Check if the metadata is valid for this handler.If it returns MediaHandler::METADATA_BAD (or false),...
getMetadataType( $image)
Get a string describing the type of metadata, for display purposes.to overrideThis method is currentl...
getDimensionInfoFromMetaTree( $metatree)
Given the metadata, returns dimension information about the document.
Support for detecting/validating DjVu image files and getting some basic file metadata (resolution et...
Definition DjVuImage.php:28
Media handler abstract base class for images.
normaliseParams( $image, &$params)
Changes the parameter array as necessary, ready for transformation.Should be idempotent....
removeBadFile( $dstPath, $retval=0)
Check for zero-sized thumbnails.
logErrorForExternalProcess( $retval, $err, $cmd)
Log an error that occurred in an external process.
Basic media transform error class.
Media transform output for images.
Shortcut class for parameter validation errors.
Convenience class for dealing with PoolCounter using callbacks.
execute( $skipcache=false)
Get the result of the work (whatever it is), or the result of the error() function.
Executes shell commands.
Definition Shell.php:32