MediaWiki REL1_33
DjVuHandler.php
Go to the documentation of this file.
1<?php
25
32 const EXPENSIVE_SIZE_LIMIT = 10485760; // 10MiB
33
37 public function isEnabled() {
39 if ( !$wgDjvuRenderer || ( !$wgDjvuDump && !$wgDjvuToXML ) ) {
40 wfDebug( "DjVu is disabled, please set \$wgDjvuRenderer and \$wgDjvuDump\n" );
41
42 return false;
43 } else {
44 return true;
45 }
46 }
47
52 public function mustRender( $file ) {
53 return true;
54 }
55
61 public function isExpensiveToThumbnail( $file ) {
62 return $file->getSize() > static::EXPENSIVE_SIZE_LIMIT;
63 }
64
69 public function isMultiPage( $file ) {
70 return true;
71 }
72
76 public function getParamMap() {
77 return [
78 'img_width' => 'width',
79 'img_page' => 'page',
80 ];
81 }
82
88 public function validateParam( $name, $value ) {
89 if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
90 // Extra junk on the end of page, probably actually a caption
91 // e.g. [[File:Foo.djvu|thumb|Page 3 of the document shows foo]]
92 return false;
93 }
94 if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) {
95 if ( $value <= 0 ) {
96 return false;
97 } else {
98 return true;
99 }
100 } else {
101 return false;
102 }
103 }
104
109 public function makeParamString( $params ) {
110 $page = $params['page'] ?? 1;
111 if ( !isset( $params['width'] ) ) {
112 return false;
113 }
114
115 return "page{$page}-{$params['width']}px";
116 }
117
122 public function parseParamString( $str ) {
123 $m = false;
124 if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
125 return [ 'width' => $m[2], 'page' => $m[1] ];
126 } else {
127 return false;
128 }
129 }
130
135 protected function getScriptParams( $params ) {
136 return [
137 'width' => $params['width'],
138 'page' => $params['page'],
139 ];
140 }
141
150 function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
152
153 if ( !$this->normaliseParams( $image, $params ) ) {
154 return new TransformParameterError( $params );
155 }
156 $width = $params['width'];
157 $height = $params['height'];
158 $page = $params['page'];
159
160 if ( $flags & self::TRANSFORM_LATER ) {
161 $params = [
162 'width' => $width,
163 'height' => $height,
164 'page' => $page
165 ];
166
167 return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
168 }
169
170 if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
171 return new MediaTransformError(
172 'thumbnail_error',
173 $width,
174 $height,
175 wfMessage( 'thumbnail_dest_directory' )
176 );
177 }
178
179 // Get local copy source for shell scripts
180 // Thumbnail extraction is very inefficient for large files.
181 // Provide a way to pool count limit the number of downloaders.
182 if ( $image->getSize() >= 1e7 ) { // 10MB
183 $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
184 [
185 'doWork' => function () use ( $image ) {
186 return $image->getLocalRefPath();
187 }
188 ]
189 );
190 $srcPath = $work->execute();
191 } else {
192 $srcPath = $image->getLocalRefPath();
193 }
194
195 if ( $srcPath === false ) { // Failed to get local copy
196 wfDebugLog( 'thumbnail',
197 sprintf( 'Thumbnail failed on %s: could not get local copy of "%s"',
198 wfHostname(), $image->getName() ) );
199
200 return new MediaTransformError( 'thumbnail_error',
201 $params['width'], $params['height'],
202 wfMessage( 'filemissing' )
203 );
204 }
205
206 # Use a subshell (brackets) to aggregate stderr from both pipeline commands
207 # before redirecting it to the overall stdout. This works in both Linux and Windows XP.
208 $cmd = '(' . Shell::escape(
210 "-format=ppm",
211 "-page={$page}",
212 "-size={$params['physicalWidth']}x{$params['physicalHeight']}",
213 $srcPath );
214 if ( $wgDjvuPostProcessor ) {
215 $cmd .= " | {$wgDjvuPostProcessor}";
216 }
217 $cmd .= ' > ' . Shell::escape( $dstPath ) . ') 2>&1';
218 wfDebug( __METHOD__ . ": $cmd\n" );
219 $retval = '';
220 $err = wfShellExec( $cmd, $retval );
221
222 $removed = $this->removeBadFile( $dstPath, $retval );
223 if ( $retval != 0 || $removed ) {
224 $this->logErrorForExternalProcess( $retval, $err, $cmd );
225 return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
226 } else {
227 $params = [
228 'width' => $width,
229 'height' => $height,
230 'page' => $page
231 ];
232
233 return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
234 }
235 }
236
244 function getDjVuImage( $image, $path ) {
245 if ( !$image ) {
246 $deja = new DjVuImage( $path );
247 } elseif ( !isset( $image->dejaImage ) ) {
248 $deja = $image->dejaImage = new DjVuImage( $path );
249 } else {
250 $deja = $image->dejaImage;
251 }
252
253 return $deja;
254 }
255
263 private function getUnserializedMetadata( File $file ) {
264 $metadata = $file->getMetadata();
265 if ( substr( $metadata, 0, 3 ) === '<?xml' ) {
266 // Old style. Not serialized but instead just a raw string of XML.
267 return $metadata;
268 }
269
270 Wikimedia\suppressWarnings();
271 $unser = unserialize( $metadata );
272 Wikimedia\restoreWarnings();
273 if ( is_array( $unser ) ) {
274 if ( isset( $unser['error'] ) ) {
275 return false;
276 } elseif ( isset( $unser['xml'] ) ) {
277 return $unser['xml'];
278 } else {
279 // Should never ever reach here.
280 throw new MWException( "Error unserializing DjVu metadata." );
281 }
282 }
283
284 // unserialize failed. Guess it wasn't really serialized after all,
285 return $metadata;
286 }
287
294 public function getMetaTree( $image, $gettext = false ) {
295 if ( $gettext && isset( $image->djvuTextTree ) ) {
296 return $image->djvuTextTree;
297 }
298 if ( !$gettext && isset( $image->dejaMetaTree ) ) {
299 return $image->dejaMetaTree;
300 }
301
302 $metadata = $this->getUnserializedMetadata( $image );
303 if ( !$this->isMetadataValid( $image, $metadata ) ) {
304 wfDebug( "DjVu XML metadata is invalid or missing, should have been fixed in upgradeRow\n" );
305
306 return false;
307 }
308
309 $trees = $this->extractTreesFromMetadata( $metadata );
310 $image->djvuTextTree = $trees['TextTree'];
311 $image->dejaMetaTree = $trees['MetaTree'];
312
313 if ( $gettext ) {
314 return $image->djvuTextTree;
315 } else {
316 return $image->dejaMetaTree;
317 }
318 }
319
325 protected function extractTreesFromMetadata( $metadata ) {
326 Wikimedia\suppressWarnings();
327 try {
328 // Set to false rather than null to avoid further attempts
329 $metaTree = false;
330 $textTree = false;
331 $tree = new SimpleXMLElement( $metadata, LIBXML_PARSEHUGE );
332 if ( $tree->getName() == 'mw-djvu' ) {
334 foreach ( $tree->children() as $b ) {
335 if ( $b->getName() == 'DjVuTxt' ) {
336 // @todo File::djvuTextTree and File::dejaMetaTree are declared
337 // dynamically. Add a public File::$data to facilitate this?
338 $textTree = $b;
339 } elseif ( $b->getName() == 'DjVuXML' ) {
340 $metaTree = $b;
341 }
342 }
343 } else {
344 $metaTree = $tree;
345 }
346 } catch ( Exception $e ) {
347 wfDebug( "Bogus multipage XML metadata\n" );
348 }
349 Wikimedia\restoreWarnings();
350
351 return [ 'MetaTree' => $metaTree, 'TextTree' => $textTree ];
352 }
353
354 function getImageSize( $image, $path ) {
355 return $this->getDjVuImage( $image, $path )->getImageSize();
356 }
357
358 public function getThumbType( $ext, $mime, $params = null ) {
360 static $mime;
361 if ( !isset( $mime ) ) {
362 $magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer();
363 $mime = $magic->guessTypesForExtension( $wgDjvuOutputExtension );
364 }
365
366 return [ $wgDjvuOutputExtension, $mime ];
367 }
368
369 public function getMetadata( $image, $path ) {
370 wfDebug( "Getting DjVu metadata for $path\n" );
371
372 $xml = $this->getDjVuImage( $image, $path )->retrieveMetaData();
373 if ( $xml === false ) {
374 // Special value so that we don't repetitively try and decode a broken file.
375 return serialize( [ 'error' => 'Error extracting metadata' ] );
376 } else {
377 return serialize( [ 'xml' => $xml ] );
378 }
379 }
380
382 return 'djvuxml';
383 }
384
385 public function isMetadataValid( $image, $metadata ) {
386 return !empty( $metadata ) && $metadata != serialize( [] );
387 }
388
389 public function pageCount( File $image ) {
390 $info = $this->getDimensionInfo( $image );
391
392 return $info ? $info['pageCount'] : false;
393 }
394
395 public function getPageDimensions( File $image, $page ) {
396 $index = $page - 1; // MW starts pages at 1
397
398 $info = $this->getDimensionInfo( $image );
399 if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
400 return $info['dimensionsByPage'][$index];
401 }
402
403 return false;
404 }
405
406 protected function getDimensionInfo( File $file ) {
407 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
408 return $cache->getWithSetCallback(
409 $cache->makeKey( 'file-djvu', 'dimensions', $file->getSha1() ),
410 $cache::TTL_INDEFINITE,
411 function () use ( $file ) {
412 $tree = $this->getMetaTree( $file );
413 return $this->getDimensionInfoFromMetaTree( $tree );
414 },
415 [ 'pcTTL' => $cache::TTL_INDEFINITE ]
416 );
417 }
418
424 protected function getDimensionInfoFromMetaTree( $metatree ) {
425 if ( !$metatree ) {
426 return false;
427 }
428
429 $dimsByPage = [];
430 $count = count( $metatree->xpath( '//OBJECT' ) );
431 for ( $i = 0; $i < $count; $i++ ) {
432 $o = $metatree->BODY[0]->OBJECT[$i];
433 if ( $o ) {
434 $dimsByPage[$i] = [
435 'width' => (int)$o['width'],
436 'height' => (int)$o['height'],
437 ];
438 } else {
439 $dimsByPage[$i] = false;
440 }
441 }
442
443 return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
444 }
445
451 function getPageText( File $image, $page ) {
452 $tree = $this->getMetaTree( $image, true );
453 if ( !$tree ) {
454 return false;
455 }
456
457 $o = $tree->BODY[0]->PAGE[$page - 1];
458 if ( $o ) {
459 $txt = $o['value'];
460
461 return $txt;
462 } else {
463 return false;
464 }
465 }
466}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
serialize()
unserialize( $serialized)
$wgDjvuRenderer
Path of the ddjvu DJVU renderer Enable this and $wgDjvuDump to enable djvu rendering example: $wgDjvu...
$wgDjvuOutputExtension
File extension for the DJVU post processor output.
$wgDjvuToXML
Path of the djvutoxml executable This works like djvudump except much, much slower as of version 3....
$wgDjvuPostProcessor
Shell command for the DJVU post processor Default: pnmtojpeg, since ddjvu generates ppm output Set th...
$wgDjvuDump
Path of the djvudump executable Enable this and $wgDjvuRenderer to enable djvu rendering example: $wg...
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfShellExec( $cmd, &$retval=null, $environ=[], $limits=[], $options=[])
Execute a shell command, with time and memory limits mirrored from the PHP configuration if supported...
wfHostname()
Fetch server name for use in error reporting etc.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
Handler for DjVu images.
getDimensionInfoFromMetaTree( $metatree)
Given an XML metadata tree, returns dimension information about the document.
getDjVuImage( $image, $path)
Cache an instance of DjVuImage in an Image object, return that instance.
makeParamString( $params)
isExpensiveToThumbnail( $file)
True if creating thumbnails from the file is large or otherwise resource-intensive.
validateParam( $name, $value)
isMultiPage( $file)
getDimensionInfo(File $file)
const EXPENSIVE_SIZE_LIMIT
getScriptParams( $params)
doTransform( $image, $dstPath, $dstUrl, $params, $flags=0)
pageCount(File $image)
Page count for a multi-page document, false if unsupported or unknown.
getPageText(File $image, $page)
isMetadataValid( $image, $metadata)
Check if the metadata string is valid for this handler.
getPageDimensions(File $image, $page)
Get an associative array of page dimensions Currently "width" and "height" are understood,...
getMetadataType( $image)
Get a string describing the type of metadata, for display purposes.
getMetaTree( $image, $gettext=false)
Cache a document tree for the DjVu XML metadata.
getThumbType( $ext, $mime, $params=null)
Get the thumbnail extension and MIME type for a given source MIME type.
getUnserializedMetadata(File $file)
Get metadata, unserializing it if necessary.
extractTreesFromMetadata( $metadata)
Extracts metadata and text trees from metadata XML in string form.
mustRender( $file)
parseParamString( $str)
getMetadata( $image, $path)
Get handler-specific metadata which will be saved in the img_metadata field.
getImageSize( $image, $path)
Get an image size array like that returned by getimagesize(), or false if it can't be determined.
Support for detecting/validating DjVu image files and getting some basic file metadata (resolution et...
Definition DjVuImage.php:38
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:52
Media handler abstract base class for images.
normaliseParams( $image, &$params)
MediaWiki exception.
logErrorForExternalProcess( $retval, $err, $cmd)
Log an error that occurred in an external process.
removeBadFile( $dstPath, $retval=0)
Check for zero-sized thumbnails.
Basic media transform error class.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Executes shell commands.
Definition Shell.php:44
Convenience class for dealing with PoolCounters using callbacks.
execute( $skipcache=false)
Get the result of the work (whatever it is), or the result of the error() function.
Media transform output for images.
Shortcut class for parameter validation errors.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check $image
Definition hooks.txt:886
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:271
returning false will NOT prevent logging $e
Definition hooks.txt:2175
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback function
Definition injection.txt:30
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
$cache
Definition mcc.php:33
if(!is_readable( $file)) $ext
Definition router.php:48
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Definition router.php:42
$params