MediaWiki  1.32.0
DjVuHandler.php
Go to the documentation of this file.
1 <?php
29 class DjVuHandler extends ImageHandler {
30  const EXPENSIVE_SIZE_LIMIT = 10485760; // 10MiB
31 
35  function isEnabled() {
37  if ( !$wgDjvuRenderer || ( !$wgDjvuDump && !$wgDjvuToXML ) ) {
38  wfDebug( "DjVu is disabled, please set \$wgDjvuRenderer and \$wgDjvuDump\n" );
39 
40  return false;
41  } else {
42  return true;
43  }
44  }
45 
50  public function mustRender( $file ) {
51  return true;
52  }
53 
59  public function isExpensiveToThumbnail( $file ) {
60  return $file->getSize() > static::EXPENSIVE_SIZE_LIMIT;
61  }
62 
67  public function isMultiPage( $file ) {
68  return true;
69  }
70 
74  public function getParamMap() {
75  return [
76  'img_width' => 'width',
77  'img_page' => 'page',
78  ];
79  }
80 
86  public function validateParam( $name, $value ) {
87  if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
88  // Extra junk on the end of page, probably actually a caption
89  // e.g. [[File:Foo.djvu|thumb|Page 3 of the document shows foo]]
90  return false;
91  }
92  if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) {
93  if ( $value <= 0 ) {
94  return false;
95  } else {
96  return true;
97  }
98  } else {
99  return false;
100  }
101  }
102 
107  public function makeParamString( $params ) {
108  $page = $params['page'] ?? 1;
109  if ( !isset( $params['width'] ) ) {
110  return false;
111  }
112 
113  return "page{$page}-{$params['width']}px";
114  }
115 
120  public function parseParamString( $str ) {
121  $m = false;
122  if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
123  return [ 'width' => $m[2], 'page' => $m[1] ];
124  } else {
125  return false;
126  }
127  }
128 
133  function getScriptParams( $params ) {
134  return [
135  'width' => $params['width'],
136  'page' => $params['page'],
137  ];
138  }
139 
148  function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
150 
151  if ( !$this->normaliseParams( $image, $params ) ) {
152  return new TransformParameterError( $params );
153  }
154  $width = $params['width'];
155  $height = $params['height'];
156  $page = $params['page'];
157 
158  if ( $flags & self::TRANSFORM_LATER ) {
159  $params = [
160  'width' => $width,
161  'height' => $height,
162  'page' => $page
163  ];
164 
165  return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
166  }
167 
168  if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
169  return new MediaTransformError(
170  'thumbnail_error',
171  $width,
172  $height,
173  wfMessage( 'thumbnail_dest_directory' )
174  );
175  }
176 
177  // Get local copy source for shell scripts
178  // Thumbnail extraction is very inefficient for large files.
179  // Provide a way to pool count limit the number of downloaders.
180  if ( $image->getSize() >= 1e7 ) { // 10MB
181  $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
182  [
183  'doWork' => function () use ( $image ) {
184  return $image->getLocalRefPath();
185  }
186  ]
187  );
188  $srcPath = $work->execute();
189  } else {
190  $srcPath = $image->getLocalRefPath();
191  }
192 
193  if ( $srcPath === false ) { // Failed to get local copy
194  wfDebugLog( 'thumbnail',
195  sprintf( 'Thumbnail failed on %s: could not get local copy of "%s"',
196  wfHostname(), $image->getName() ) );
197 
198  return new MediaTransformError( 'thumbnail_error',
199  $params['width'], $params['height'],
200  wfMessage( 'filemissing' )
201  );
202  }
203 
204  # Use a subshell (brackets) to aggregate stderr from both pipeline commands
205  # before redirecting it to the overall stdout. This works in both Linux and Windows XP.
206  $cmd = '(' . wfEscapeShellArg(
208  "-format=ppm",
209  "-page={$page}",
210  "-size={$params['physicalWidth']}x{$params['physicalHeight']}",
211  $srcPath );
212  if ( $wgDjvuPostProcessor ) {
213  $cmd .= " | {$wgDjvuPostProcessor}";
214  }
215  $cmd .= ' > ' . wfEscapeShellArg( $dstPath ) . ') 2>&1';
216  wfDebug( __METHOD__ . ": $cmd\n" );
217  $retval = '';
218  $err = wfShellExec( $cmd, $retval );
219 
220  $removed = $this->removeBadFile( $dstPath, $retval );
221  if ( $retval != 0 || $removed ) {
222  $this->logErrorForExternalProcess( $retval, $err, $cmd );
223  return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
224  } else {
225  $params = [
226  'width' => $width,
227  'height' => $height,
228  'page' => $page
229  ];
230 
231  return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
232  }
233  }
234 
242  function getDjVuImage( $image, $path ) {
243  if ( !$image ) {
244  $deja = new DjVuImage( $path );
245  } elseif ( !isset( $image->dejaImage ) ) {
246  $deja = $image->dejaImage = new DjVuImage( $path );
247  } else {
248  $deja = $image->dejaImage;
249  }
250 
251  return $deja;
252  }
253 
261  private function getUnserializedMetadata( File $file ) {
262  $metadata = $file->getMetadata();
263  if ( substr( $metadata, 0, 3 ) === '<?xml' ) {
264  // Old style. Not serialized but instead just a raw string of XML.
265  return $metadata;
266  }
267 
268  Wikimedia\suppressWarnings();
269  $unser = unserialize( $metadata );
270  Wikimedia\restoreWarnings();
271  if ( is_array( $unser ) ) {
272  if ( isset( $unser['error'] ) ) {
273  return false;
274  } elseif ( isset( $unser['xml'] ) ) {
275  return $unser['xml'];
276  } else {
277  // Should never ever reach here.
278  throw new MWException( "Error unserializing DjVu metadata." );
279  }
280  }
281 
282  // unserialize failed. Guess it wasn't really serialized after all,
283  return $metadata;
284  }
285 
292  public function getMetaTree( $image, $gettext = false ) {
293  if ( $gettext && isset( $image->djvuTextTree ) ) {
294  return $image->djvuTextTree;
295  }
296  if ( !$gettext && isset( $image->dejaMetaTree ) ) {
297  return $image->dejaMetaTree;
298  }
299 
300  $metadata = $this->getUnserializedMetadata( $image );
301  if ( !$this->isMetadataValid( $image, $metadata ) ) {
302  wfDebug( "DjVu XML metadata is invalid or missing, should have been fixed in upgradeRow\n" );
303 
304  return false;
305  }
306 
307  $trees = $this->extractTreesFromMetadata( $metadata );
308  $image->djvuTextTree = $trees['TextTree'];
309  $image->dejaMetaTree = $trees['MetaTree'];
310 
311  if ( $gettext ) {
312  return $image->djvuTextTree;
313  } else {
314  return $image->dejaMetaTree;
315  }
316  }
317 
323  protected function extractTreesFromMetadata( $metadata ) {
324  Wikimedia\suppressWarnings();
325  try {
326  // Set to false rather than null to avoid further attempts
327  $metaTree = false;
328  $textTree = false;
329  $tree = new SimpleXMLElement( $metadata, LIBXML_PARSEHUGE );
330  if ( $tree->getName() == 'mw-djvu' ) {
332  foreach ( $tree->children() as $b ) {
333  if ( $b->getName() == 'DjVuTxt' ) {
334  // @todo File::djvuTextTree and File::dejaMetaTree are declared
335  // dynamically. Add a public File::$data to facilitate this?
336  $textTree = $b;
337  } elseif ( $b->getName() == 'DjVuXML' ) {
338  $metaTree = $b;
339  }
340  }
341  } else {
342  $metaTree = $tree;
343  }
344  } catch ( Exception $e ) {
345  wfDebug( "Bogus multipage XML metadata\n" );
346  }
347  Wikimedia\restoreWarnings();
348 
349  return [ 'MetaTree' => $metaTree, 'TextTree' => $textTree ];
350  }
351 
352  function getImageSize( $image, $path ) {
353  return $this->getDjVuImage( $image, $path )->getImageSize();
354  }
355 
356  function getThumbType( $ext, $mime, $params = null ) {
357  global $wgDjvuOutputExtension;
358  static $mime;
359  if ( !isset( $mime ) ) {
360  $magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer();
361  $mime = $magic->guessTypesForExtension( $wgDjvuOutputExtension );
362  }
363 
364  return [ $wgDjvuOutputExtension, $mime ];
365  }
366 
367  function getMetadata( $image, $path ) {
368  wfDebug( "Getting DjVu metadata for $path\n" );
369 
370  $xml = $this->getDjVuImage( $image, $path )->retrieveMetaData();
371  if ( $xml === false ) {
372  // Special value so that we don't repetitively try and decode a broken file.
373  return serialize( [ 'error' => 'Error extracting metadata' ] );
374  } else {
375  return serialize( [ 'xml' => $xml ] );
376  }
377  }
378 
379  function getMetadataType( $image ) {
380  return 'djvuxml';
381  }
382 
383  function isMetadataValid( $image, $metadata ) {
384  return !empty( $metadata ) && $metadata != serialize( [] );
385  }
386 
387  function pageCount( File $image ) {
388  $info = $this->getDimensionInfo( $image );
389 
390  return $info ? $info['pageCount'] : false;
391  }
392 
393  function getPageDimensions( File $image, $page ) {
394  $index = $page - 1; // MW starts pages at 1
395 
396  $info = $this->getDimensionInfo( $image );
397  if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
398  return $info['dimensionsByPage'][$index];
399  }
400 
401  return false;
402  }
403 
404  protected function getDimensionInfo( File $file ) {
406  return $cache->getWithSetCallback(
407  $cache->makeKey( 'file-djvu', 'dimensions', $file->getSha1() ),
408  $cache::TTL_INDEFINITE,
409  function () use ( $file ) {
410  $tree = $this->getMetaTree( $file );
411  return $this->getDimensionInfoFromMetaTree( $tree );
412  },
413  [ 'pcTTL' => $cache::TTL_INDEFINITE ]
414  );
415  }
416 
422  protected function getDimensionInfoFromMetaTree( $metatree ) {
423  if ( !$metatree ) {
424  return false;
425  }
426 
427  $dimsByPage = [];
428  $count = count( $metatree->xpath( '//OBJECT' ) );
429  for ( $i = 0; $i < $count; $i++ ) {
430  $o = $metatree->BODY[0]->OBJECT[$i];
431  if ( $o ) {
432  $dimsByPage[$i] = [
433  'width' => (int)$o['width'],
434  'height' => (int)$o['height'],
435  ];
436  } else {
437  $dimsByPage[$i] = false;
438  }
439  }
440 
441  return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
442  }
443 
449  function getPageText( File $image, $page ) {
450  $tree = $this->getMetaTree( $image, true );
451  if ( !$tree ) {
452  return false;
453  }
454 
455  $o = $tree->BODY[0]->PAGE[$page - 1];
456  if ( $o ) {
457  $txt = $o['value'];
458 
459  return $txt;
460  } else {
461  return false;
462  }
463  }
464 }
MediaHandler\removeBadFile
removeBadFile( $dstPath, $retval=0)
Check for zero-sized thumbnails.
Definition: MediaHandler.php:675
DjVuHandler\isMetadataValid
isMetadataValid( $image, $metadata)
Check if the metadata string is valid for this handler.
Definition: DjVuHandler.php:383
MediaTransformError
Basic media transform error class.
Definition: MediaTransformOutput.php:441
ThumbnailImage
Media transform output for images.
Definition: MediaTransformOutput.php:277
DjVuHandler\getParamMap
getParamMap()
Definition: DjVuHandler.php:74
DjVuHandler\getDimensionInfoFromMetaTree
getDimensionInfoFromMetaTree( $metatree)
Given an XML metadata tree, returns dimension information about the document.
Definition: DjVuHandler.php:422
wfMkdirParents
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
Definition: GlobalFunctions.php:2050
File\getMetadata
getMetadata()
Get handler-specific metadata Overridden by LocalFile, UnregisteredLocalFile STUB.
Definition: File.php:658
captcha-old.count
count
Definition: captcha-old.py:249
DjVuHandler\isMultiPage
isMultiPage( $file)
Definition: DjVuHandler.php:67
DjVuHandler\parseParamString
parseParamString( $str)
Definition: DjVuHandler.php:120
DjVuHandler\extractTreesFromMetadata
extractTreesFromMetadata( $metadata)
Extracts metadata and text trees from metadata XML in string form.
Definition: DjVuHandler.php:323
$wgDjvuPostProcessor
$wgDjvuPostProcessor
Shell command for the DJVU post processor Default: pnmtojpeg, since ddjvu generates ppm output Set th...
Definition: DefaultSettings.php:1616
DjVuHandler\getUnserializedMetadata
getUnserializedMetadata(File $file)
Get metadata, unserializing it if necessary.
Definition: DjVuHandler.php:261
DjVuHandler\pageCount
pageCount(File $image)
Page count for a multi-page document, false if unsupported or unknown.
Definition: DjVuHandler.php:387
DjVuHandler\isEnabled
isEnabled()
Definition: DjVuHandler.php:35
File\getSha1
getSha1()
Get the SHA-1 base 36 hash of the file.
Definition: File.php:2137
$params
$params
Definition: styleTest.css.php:44
wfHostname
wfHostname()
Fetch server name for use in error reporting etc.
Definition: GlobalFunctions.php:1392
PoolCounterWorkViaCallback
Convenience class for dealing with PoolCounters using callbacks.
Definition: PoolCounterWorkViaCallback.php:28
serialize
serialize()
Definition: ApiMessageTrait.php:131
DjVuHandler\makeParamString
makeParamString( $params)
Definition: DjVuHandler.php:107
$wgDjvuRenderer
$wgDjvuRenderer
Path of the ddjvu DJVU renderer Enable this and $wgDjvuDump to enable djvu rendering example: $wgDjvu...
Definition: DefaultSettings.php:1586
wfDebugLog
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Definition: GlobalFunctions.php:1082
DjVuHandler\EXPENSIVE_SIZE_LIMIT
const EXPENSIVE_SIZE_LIMIT
Definition: DjVuHandler.php:30
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
DjVuHandler\getMetadata
getMetadata( $image, $path)
Get handler-specific metadata which will be saved in the img_metadata field.
Definition: DjVuHandler.php:367
DjVuHandler\getDjVuImage
getDjVuImage( $image, $path)
Cache an instance of DjVuImage in an Image object, return that instance.
Definition: DjVuHandler.php:242
File
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition: File.php:51
MWException
MediaWiki exception.
Definition: MWException.php:26
PoolCounterWork\execute
execute( $skipcache=false)
Get the result of the work (whatever it is), or the result of the error() function.
Definition: PoolCounterWork.php:104
$wgDjvuToXML
$wgDjvuToXML
Path of the djvutoxml executable This works like djvudump except much, much slower as of version 3....
Definition: DefaultSettings.php:1609
DjVuHandler\getPageDimensions
getPageDimensions(File $image, $page)
Get an associative array of page dimensions Currently "width" and "height" are understood,...
Definition: DjVuHandler.php:393
ImageHandler
Media handler abstract base class for images.
Definition: ImageHandler.php:29
DjVuHandler\isExpensiveToThumbnail
isExpensiveToThumbnail( $file)
True if creating thumbnails from the file is large or otherwise resource-intensive.
Definition: DjVuHandler.php:59
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
DjVuHandler\validateParam
validateParam( $name, $value)
Definition: DjVuHandler.php:86
$wgDjvuOutputExtension
$wgDjvuOutputExtension
File extension for the DJVU post processor output.
Definition: DefaultSettings.php:1621
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:988
$image
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check $image
Definition: hooks.txt:813
ImageHandler\normaliseParams
normaliseParams( $image, &$params)
Definition: ImageHandler.php:86
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:302
$mime
if( $ext=='php'|| $ext=='php5') $mime
Definition: router.php:59
MediaWiki\MediaWikiServices\getInstance
static getInstance()
Returns the global default instance of the top level service locator.
Definition: MediaWikiServices.php:120
DjVuHandler\getImageSize
getImageSize( $image, $path)
Get an image size array like that returned by getimagesize(), or false if it can't be determined.
Definition: DjVuHandler.php:352
$e
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2213
$value
$value
Definition: styleTest.css.php:49
$retval
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a account incomplete not yet checked for validity & $retval
Definition: hooks.txt:244
TransformParameterError
Shortcut class for parameter validation errors.
Definition: MediaTransformOutput.php:487
DjVuHandler\getMetadataType
getMetadataType( $image)
Get a string describing the type of metadata, for display purposes.
Definition: DjVuHandler.php:379
DjVuHandler\getScriptParams
getScriptParams( $params)
Definition: DjVuHandler.php:133
DjVuHandler
Handler for DjVu images.
Definition: DjVuHandler.php:29
unserialize
unserialize( $serialized)
Definition: ApiMessageTrait.php:139
$cache
$cache
Definition: mcc.php:33
ObjectCache\getMainWANInstance
static getMainWANInstance()
Get the main WAN cache object.
Definition: ObjectCache.php:378
DjVuHandler\getThumbType
getThumbType( $ext, $mime, $params=null)
Get the thumbnail extension and MIME type for a given source MIME type.
Definition: DjVuHandler.php:356
MediaHandler\logErrorForExternalProcess
logErrorForExternalProcess( $retval, $err, $cmd)
Log an error that occurred in an external process.
Definition: MediaHandler.php:753
DjVuHandler\doTransform
doTransform( $image, $dstPath, $dstUrl, $params, $flags=0)
Definition: DjVuHandler.php:148
$path
$path
Definition: NoLocalSettings.php:25
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
wfEscapeShellArg
wfEscapeShellArg(... $args)
Version of escapeshellarg() that works better on Windows.
Definition: GlobalFunctions.php:2183
DjVuHandler\getMetaTree
getMetaTree( $image, $gettext=false)
Cache a document tree for the DjVu XML metadata.
Definition: DjVuHandler.php:292
$wgDjvuDump
$wgDjvuDump
Path of the djvudump executable Enable this and $wgDjvuRenderer to enable djvu rendering example: $wg...
Definition: DefaultSettings.php:1579
DjVuHandler\getPageText
getPageText(File $image, $page)
Definition: DjVuHandler.php:449
DjVuHandler\mustRender
mustRender( $file)
Definition: DjVuHandler.php:50
wfMessage
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
DjVuHandler\getDimensionInfo
getDimensionInfo(File $file)
Definition: DjVuHandler.php:404
$ext
$ext
Definition: router.php:55
wfShellExec
wfShellExec( $cmd, &$retval=null, $environ=[], $limits=[], $options=[])
Execute a shell command, with time and memory limits mirrored from the PHP configuration if supported...
Definition: GlobalFunctions.php:2210