MediaWiki REL1_34
Go to the documentation of this file.
32 const EXPENSIVE_SIZE_LIMIT = 10485760; // 10MiB
37 public function isEnabled() {
39 if ( !$wgDjvuRenderer || ( !$wgDjvuDump && !$wgDjvuToXML ) ) {
40 wfDebug( "DjVu is disabled, please set \$wgDjvuRenderer and \$wgDjvuDump\n" );
42 return false;
43 } else {
44 return true;
45 }
46 }
52 public function mustRender( $file ) {
53 return true;
54 }
61 public function isExpensiveToThumbnail( $file ) {
62 return $file->getSize() > static::EXPENSIVE_SIZE_LIMIT;
63 }
69 public function isMultiPage( $file ) {
70 return true;
71 }
76 public function getParamMap() {
77 return [
78 'img_width' => 'width',
79 'img_page' => 'page',
80 ];
81 }
88 public function validateParam( $name, $value ) {
89 if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
90 // Extra junk on the end of page, probably actually a caption
91 // e.g. [[File:Foo.djvu|thumb|Page 3 of the document shows foo]]
92 return false;
93 }
94 if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) {
95 if ( $value <= 0 ) {
96 return false;
97 } else {
98 return true;
99 }
100 } else {
101 return false;
102 }
103 }
109 public function makeParamString( $params ) {
110 $page = $params['page'] ?? 1;
111 if ( !isset( $params['width'] ) ) {
112 return false;
113 }
115 return "page{$page}-{$params['width']}px";
116 }
122 public function parseParamString( $str ) {
123 $m = false;
124 if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
125 return [ 'width' => $m[2], 'page' => $m[1] ];
126 } else {
127 return false;
128 }
129 }
135 protected function getScriptParams( $params ) {
136 return [
137 'width' => $params['width'],
138 'page' => $params['page'],
139 ];
140 }
150 function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
153 if ( !$this->normaliseParams( $image, $params ) ) {
154 return new TransformParameterError( $params );
155 }
156 $width = $params['width'];
157 $height = $params['height'];
158 $page = $params['page'];
160 if ( $flags & self::TRANSFORM_LATER ) {
161 $params = [
162 'width' => $width,
163 'height' => $height,
164 'page' => $page
165 ];
167 return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
168 }
170 if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
171 return new MediaTransformError(
172 'thumbnail_error',
173 $width,
174 $height,
175 wfMessage( 'thumbnail_dest_directory' )
176 );
177 }
179 // Get local copy source for shell scripts
180 // Thumbnail extraction is very inefficient for large files.
181 // Provide a way to pool count limit the number of downloaders.
182 if ( $image->getSize() >= 1e7 ) { // 10MB
183 $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
184 [
185 'doWork' => function () use ( $image ) {
186 return $image->getLocalRefPath();
187 }
188 ]
189 );
190 $srcPath = $work->execute();
191 } else {
192 $srcPath = $image->getLocalRefPath();
193 }
195 if ( $srcPath === false ) { // Failed to get local copy
196 wfDebugLog( 'thumbnail',
197 sprintf( 'Thumbnail failed on %s: could not get local copy of "%s"',
198 wfHostname(), $image->getName() ) );
200 return new MediaTransformError( 'thumbnail_error',
201 $params['width'], $params['height'],
202 wfMessage( 'filemissing' )
203 );
204 }
206 # Use a subshell (brackets) to aggregate stderr from both pipeline commands
207 # before redirecting it to the overall stdout. This works in both Linux and Windows XP.
208 $cmd = '(' . Shell::escape(
210 "-format=ppm",
211 "-page={$page}",
212 "-size={$params['physicalWidth']}x{$params['physicalHeight']}",
213 $srcPath );
214 if ( $wgDjvuPostProcessor ) {
215 $cmd .= " | {$wgDjvuPostProcessor}";
216 }
217 $cmd .= ' > ' . Shell::escape( $dstPath ) . ') 2>&1';
218 wfDebug( __METHOD__ . ": $cmd\n" );
219 $retval = '';
220 $err = wfShellExec( $cmd, $retval );
222 $removed = $this->removeBadFile( $dstPath, $retval );
223 if ( $retval != 0 || $removed ) {
224 $this->logErrorForExternalProcess( $retval, $err, $cmd );
225 return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
226 } else {
227 $params = [
228 'width' => $width,
229 'height' => $height,
230 'page' => $page
231 ];
233 return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
234 }
235 }
245 function getDjVuImage( $image, $path ) {
246 if ( !$image ) {
247 $deja = new DjVuImage( $path );
248 } elseif ( !isset( $image->dejaImage ) ) {
249 $deja = $image->dejaImage = new DjVuImage( $path );
250 } else {
251 $deja = $image->dejaImage;
252 }
254 return $deja;
255 }
264 private function getUnserializedMetadata( File $file ) {
265 $metadata = $file->getMetadata();
266 if ( substr( $metadata, 0, 3 ) === '<?xml' ) {
267 // Old style. Not serialized but instead just a raw string of XML.
268 return $metadata;
269 }
271 Wikimedia\suppressWarnings();
272 $unser = unserialize( $metadata );
273 Wikimedia\restoreWarnings();
274 if ( is_array( $unser ) ) {
275 if ( isset( $unser['error'] ) ) {
276 return false;
277 } elseif ( isset( $unser['xml'] ) ) {
278 return $unser['xml'];
279 } else {
280 // Should never ever reach here.
281 throw new MWException( "Error unserializing DjVu metadata." );
282 }
283 }
285 // unserialize failed. Guess it wasn't really serialized after all,
286 return $metadata;
287 }
296 public function getMetaTree( $image, $gettext = false ) {
297 if ( $gettext && isset( $image->djvuTextTree ) ) {
298 return $image->djvuTextTree;
299 }
300 if ( !$gettext && isset( $image->dejaMetaTree ) ) {
301 return $image->dejaMetaTree;
302 }
304 $metadata = $this->getUnserializedMetadata( $image );
305 if ( !$this->isMetadataValid( $image, $metadata ) ) {
306 wfDebug( "DjVu XML metadata is invalid or missing, should have been fixed in upgradeRow\n" );
308 return false;
309 }
311 $trees = $this->extractTreesFromMetadata( $metadata );
312 $image->djvuTextTree = $trees['TextTree'];
313 $image->dejaMetaTree = $trees['MetaTree'];
315 if ( $gettext ) {
316 return $image->djvuTextTree;
317 } else {
318 return $image->dejaMetaTree;
319 }
320 }
327 protected function extractTreesFromMetadata( $metadata ) {
328 Wikimedia\suppressWarnings();
329 try {
330 // Set to false rather than null to avoid further attempts
331 $metaTree = false;
332 $textTree = false;
333 $tree = new SimpleXMLElement( $metadata, LIBXML_PARSEHUGE );
334 if ( $tree->getName() == 'mw-djvu' ) {
336 foreach ( $tree->children() as $b ) {
337 if ( $b->getName() == 'DjVuTxt' ) {
338 // @todo File::djvuTextTree and File::dejaMetaTree are declared
339 // dynamically. Add a public File::$data to facilitate this?
340 $textTree = $b;
341 } elseif ( $b->getName() == 'DjVuXML' ) {
342 $metaTree = $b;
343 }
344 }
345 } else {
346 $metaTree = $tree;
347 }
348 } catch ( Exception $e ) {
349 wfDebug( "Bogus multipage XML metadata\n" );
350 }
351 Wikimedia\restoreWarnings();
353 return [ 'MetaTree' => $metaTree, 'TextTree' => $textTree ];
354 }
356 function getImageSize( $image, $path ) {
357 return $this->getDjVuImage( $image, $path )->getImageSize();
358 }
360 public function getThumbType( $ext, $mime, $params = null ) {
362 static $mime;
363 if ( !isset( $mime ) ) {
364 $magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer();
365 $mime = $magic->guessTypesForExtension( $wgDjvuOutputExtension );
366 }
368 return [ $wgDjvuOutputExtension, $mime ];
369 }
371 public function getMetadata( $image, $path ) {
372 wfDebug( "Getting DjVu metadata for $path\n" );
374 $xml = $this->getDjVuImage( $image, $path )->retrieveMetaData();
375 if ( $xml === false ) {
376 // Special value so that we don't repetitively try and decode a broken file.
377 return serialize( [ 'error' => 'Error extracting metadata' ] );
378 } else {
379 return serialize( [ 'xml' => $xml ] );
380 }
381 }
383 function getMetadataType( $image ) {
384 return 'djvuxml';
385 }
387 public function isMetadataValid( $image, $metadata ) {
388 return !empty( $metadata ) && $metadata != serialize( [] );
389 }
391 public function pageCount( File $image ) {
392 $info = $this->getDimensionInfo( $image );
394 return $info ? $info['pageCount'] : false;
395 }
397 public function getPageDimensions( File $image, $page ) {
398 $index = $page - 1; // MW starts pages at 1
400 $info = $this->getDimensionInfo( $image );
401 if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
402 return $info['dimensionsByPage'][$index];
403 }
405 return false;
406 }
408 protected function getDimensionInfo( File $file ) {
409 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
410 return $cache->getWithSetCallback(
411 $cache->makeKey( 'file-djvu', 'dimensions', $file->getSha1() ),
412 $cache::TTL_INDEFINITE,
413 function () use ( $file ) {
414 $tree = $this->getMetaTree( $file );
415 return $this->getDimensionInfoFromMetaTree( $tree );
416 },
417 [ 'pcTTL' => $cache::TTL_INDEFINITE ]
418 );
419 }
426 protected function getDimensionInfoFromMetaTree( $metatree ) {
427 if ( !$metatree ) {
428 return false;
429 }
431 $dimsByPage = [];
432 $count = count( $metatree->xpath( '//OBJECT' ) );
433 for ( $i = 0; $i < $count; $i++ ) {
434 $o = $metatree->BODY[0]->OBJECT[$i];
435 if ( $o ) {
436 $dimsByPage[$i] = [
437 'width' => (int)$o['width'],
438 'height' => (int)$o['height'],
439 ];
440 } else {
441 $dimsByPage[$i] = false;
442 }
443 }
445 return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
446 }
453 function getPageText( File $image, $page ) {
454 $tree = $this->getMetaTree( $image, true );
455 if ( !$tree ) {
456 return false;
457 }
459 $o = $tree->BODY[0]->PAGE[$page - 1];
460 if ( $o ) {
461 $txt = $o['value'];
463 return $txt;
464 } else {
465 return false;
466 }
467 }
unserialize( $serialized)
Path of the ddjvu DJVU renderer Enable this and $wgDjvuDump to enable djvu rendering example: $wgDjvu...
File extension for the DJVU post processor output.
Path of the djvutoxml executable This works like djvudump except much, much slower as of version 3....
Shell command for the DJVU post processor Default: pnmtojpeg, since ddjvu generates ppm output Set th...
Path of the djvudump executable Enable this and $wgDjvuRenderer to enable djvu rendering example: $wg...
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfShellExec( $cmd, &$retval=null, $environ=[], $limits=[], $options=[])
Execute a shell command, with time and memory limits mirrored from the PHP configuration if supported...
Get host name of the current machine, for use in error reporting.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Handler for DjVu images.
getDimensionInfoFromMetaTree( $metatree)
Given an XML metadata tree, returns dimension information about the document.
getDjVuImage( $image, $path)
Cache an instance of DjVuImage in an Image object, return that instance.
makeParamString( $params)
isExpensiveToThumbnail( $file)
True if creating thumbnails from the file is large or otherwise resource-intensive.
validateParam( $name, $value)
isMultiPage( $file)
getDimensionInfo(File $file)
getScriptParams( $params)
doTransform( $image, $dstPath, $dstUrl, $params, $flags=0)
pageCount(File $image)
Page count for a multi-page document, false if unsupported or unknown.
getPageText(File $image, $page)
isMetadataValid( $image, $metadata)
Check if the metadata string is valid for this handler.
getPageDimensions(File $image, $page)
Get an associative array of page dimensions Currently "width" and "height" are understood,...
getMetadataType( $image)
Get a string describing the type of metadata, for display purposes.
getMetaTree( $image, $gettext=false)
Cache a document tree for the DjVu XML metadata.
getThumbType( $ext, $mime, $params=null)
Get the thumbnail extension and MIME type for a given source MIME type.
getUnserializedMetadata(File $file)
Get metadata, unserializing it if necessary.
extractTreesFromMetadata( $metadata)
Extracts metadata and text trees from metadata XML in string form.
mustRender( $file)
parseParamString( $str)
getMetadata( $image, $path)
Get handler-specific metadata which will be saved in the img_metadata field.
getImageSize( $image, $path)
Get an image size array like that returned by getimagesize(), or false if it can't be determined.
Support for detecting/validating DjVu image files and getting some basic file metadata (resolution et...
Definition DjVuImage.php:38
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:61
Media handler abstract base class for images.
normaliseParams( $image, &$params)
MediaWiki exception.
logErrorForExternalProcess( $retval, $err, $cmd)
Log an error that occurred in an external process.
removeBadFile( $dstPath, $retval=0)
Check for zero-sized thumbnails.
Basic media transform error class.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Executes shell commands.
Definition Shell.php:44
Convenience class for dealing with PoolCounters using callbacks.
execute( $skipcache=false)
Get the result of the work (whatever it is), or the result of the error() function.
Media transform output for images.
Shortcut class for parameter validation errors.
Definition mcc.php:33
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition router.php:42
if(!is_readable( $file)) $ext
Definition router.php:48