MediaWiki  1.33.0
DjVuHandler.php
Go to the documentation of this file.
1 <?php
25 
31 class DjVuHandler extends ImageHandler {
32  const EXPENSIVE_SIZE_LIMIT = 10485760; // 10MiB
33 
37  public function isEnabled() {
39  if ( !$wgDjvuRenderer || ( !$wgDjvuDump && !$wgDjvuToXML ) ) {
40  wfDebug( "DjVu is disabled, please set \$wgDjvuRenderer and \$wgDjvuDump\n" );
41 
42  return false;
43  } else {
44  return true;
45  }
46  }
47 
52  public function mustRender( $file ) {
53  return true;
54  }
55 
61  public function isExpensiveToThumbnail( $file ) {
62  return $file->getSize() > static::EXPENSIVE_SIZE_LIMIT;
63  }
64 
69  public function isMultiPage( $file ) {
70  return true;
71  }
72 
76  public function getParamMap() {
77  return [
78  'img_width' => 'width',
79  'img_page' => 'page',
80  ];
81  }
82 
88  public function validateParam( $name, $value ) {
89  if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
90  // Extra junk on the end of page, probably actually a caption
91  // e.g. [[File:Foo.djvu|thumb|Page 3 of the document shows foo]]
92  return false;
93  }
94  if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) {
95  if ( $value <= 0 ) {
96  return false;
97  } else {
98  return true;
99  }
100  } else {
101  return false;
102  }
103  }
104 
109  public function makeParamString( $params ) {
110  $page = $params['page'] ?? 1;
111  if ( !isset( $params['width'] ) ) {
112  return false;
113  }
114 
115  return "page{$page}-{$params['width']}px";
116  }
117 
122  public function parseParamString( $str ) {
123  $m = false;
124  if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
125  return [ 'width' => $m[2], 'page' => $m[1] ];
126  } else {
127  return false;
128  }
129  }
130 
135  protected function getScriptParams( $params ) {
136  return [
137  'width' => $params['width'],
138  'page' => $params['page'],
139  ];
140  }
141 
150  function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
152 
153  if ( !$this->normaliseParams( $image, $params ) ) {
154  return new TransformParameterError( $params );
155  }
156  $width = $params['width'];
157  $height = $params['height'];
158  $page = $params['page'];
159 
160  if ( $flags & self::TRANSFORM_LATER ) {
161  $params = [
162  'width' => $width,
163  'height' => $height,
164  'page' => $page
165  ];
166 
167  return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
168  }
169 
170  if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
171  return new MediaTransformError(
172  'thumbnail_error',
173  $width,
174  $height,
175  wfMessage( 'thumbnail_dest_directory' )
176  );
177  }
178 
179  // Get local copy source for shell scripts
180  // Thumbnail extraction is very inefficient for large files.
181  // Provide a way to pool count limit the number of downloaders.
182  if ( $image->getSize() >= 1e7 ) { // 10MB
183  $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
184  [
185  'doWork' => function () use ( $image ) {
186  return $image->getLocalRefPath();
187  }
188  ]
189  );
190  $srcPath = $work->execute();
191  } else {
192  $srcPath = $image->getLocalRefPath();
193  }
194 
195  if ( $srcPath === false ) { // Failed to get local copy
196  wfDebugLog( 'thumbnail',
197  sprintf( 'Thumbnail failed on %s: could not get local copy of "%s"',
198  wfHostname(), $image->getName() ) );
199 
200  return new MediaTransformError( 'thumbnail_error',
201  $params['width'], $params['height'],
202  wfMessage( 'filemissing' )
203  );
204  }
205 
206  # Use a subshell (brackets) to aggregate stderr from both pipeline commands
207  # before redirecting it to the overall stdout. This works in both Linux and Windows XP.
208  $cmd = '(' . Shell::escape(
210  "-format=ppm",
211  "-page={$page}",
212  "-size={$params['physicalWidth']}x{$params['physicalHeight']}",
213  $srcPath );
214  if ( $wgDjvuPostProcessor ) {
215  $cmd .= " | {$wgDjvuPostProcessor}";
216  }
217  $cmd .= ' > ' . Shell::escape( $dstPath ) . ') 2>&1';
218  wfDebug( __METHOD__ . ": $cmd\n" );
219  $retval = '';
220  $err = wfShellExec( $cmd, $retval );
221 
222  $removed = $this->removeBadFile( $dstPath, $retval );
223  if ( $retval != 0 || $removed ) {
224  $this->logErrorForExternalProcess( $retval, $err, $cmd );
225  return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
226  } else {
227  $params = [
228  'width' => $width,
229  'height' => $height,
230  'page' => $page
231  ];
232 
233  return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
234  }
235  }
236 
244  function getDjVuImage( $image, $path ) {
245  if ( !$image ) {
246  $deja = new DjVuImage( $path );
247  } elseif ( !isset( $image->dejaImage ) ) {
248  $deja = $image->dejaImage = new DjVuImage( $path );
249  } else {
250  $deja = $image->dejaImage;
251  }
252 
253  return $deja;
254  }
255 
263  private function getUnserializedMetadata( File $file ) {
264  $metadata = $file->getMetadata();
265  if ( substr( $metadata, 0, 3 ) === '<?xml' ) {
266  // Old style. Not serialized but instead just a raw string of XML.
267  return $metadata;
268  }
269 
270  Wikimedia\suppressWarnings();
271  $unser = unserialize( $metadata );
272  Wikimedia\restoreWarnings();
273  if ( is_array( $unser ) ) {
274  if ( isset( $unser['error'] ) ) {
275  return false;
276  } elseif ( isset( $unser['xml'] ) ) {
277  return $unser['xml'];
278  } else {
279  // Should never ever reach here.
280  throw new MWException( "Error unserializing DjVu metadata." );
281  }
282  }
283 
284  // unserialize failed. Guess it wasn't really serialized after all,
285  return $metadata;
286  }
287 
294  public function getMetaTree( $image, $gettext = false ) {
295  if ( $gettext && isset( $image->djvuTextTree ) ) {
296  return $image->djvuTextTree;
297  }
298  if ( !$gettext && isset( $image->dejaMetaTree ) ) {
299  return $image->dejaMetaTree;
300  }
301 
302  $metadata = $this->getUnserializedMetadata( $image );
303  if ( !$this->isMetadataValid( $image, $metadata ) ) {
304  wfDebug( "DjVu XML metadata is invalid or missing, should have been fixed in upgradeRow\n" );
305 
306  return false;
307  }
308 
309  $trees = $this->extractTreesFromMetadata( $metadata );
310  $image->djvuTextTree = $trees['TextTree'];
311  $image->dejaMetaTree = $trees['MetaTree'];
312 
313  if ( $gettext ) {
314  return $image->djvuTextTree;
315  } else {
316  return $image->dejaMetaTree;
317  }
318  }
319 
325  protected function extractTreesFromMetadata( $metadata ) {
326  Wikimedia\suppressWarnings();
327  try {
328  // Set to false rather than null to avoid further attempts
329  $metaTree = false;
330  $textTree = false;
331  $tree = new SimpleXMLElement( $metadata, LIBXML_PARSEHUGE );
332  if ( $tree->getName() == 'mw-djvu' ) {
334  foreach ( $tree->children() as $b ) {
335  if ( $b->getName() == 'DjVuTxt' ) {
336  // @todo File::djvuTextTree and File::dejaMetaTree are declared
337  // dynamically. Add a public File::$data to facilitate this?
338  $textTree = $b;
339  } elseif ( $b->getName() == 'DjVuXML' ) {
340  $metaTree = $b;
341  }
342  }
343  } else {
344  $metaTree = $tree;
345  }
346  } catch ( Exception $e ) {
347  wfDebug( "Bogus multipage XML metadata\n" );
348  }
349  Wikimedia\restoreWarnings();
350 
351  return [ 'MetaTree' => $metaTree, 'TextTree' => $textTree ];
352  }
353 
354  function getImageSize( $image, $path ) {
355  return $this->getDjVuImage( $image, $path )->getImageSize();
356  }
357 
358  public function getThumbType( $ext, $mime, $params = null ) {
359  global $wgDjvuOutputExtension;
360  static $mime;
361  if ( !isset( $mime ) ) {
362  $magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer();
363  $mime = $magic->guessTypesForExtension( $wgDjvuOutputExtension );
364  }
365 
366  return [ $wgDjvuOutputExtension, $mime ];
367  }
368 
369  public function getMetadata( $image, $path ) {
370  wfDebug( "Getting DjVu metadata for $path\n" );
371 
372  $xml = $this->getDjVuImage( $image, $path )->retrieveMetaData();
373  if ( $xml === false ) {
374  // Special value so that we don't repetitively try and decode a broken file.
375  return serialize( [ 'error' => 'Error extracting metadata' ] );
376  } else {
377  return serialize( [ 'xml' => $xml ] );
378  }
379  }
380 
381  function getMetadataType( $image ) {
382  return 'djvuxml';
383  }
384 
385  public function isMetadataValid( $image, $metadata ) {
386  return !empty( $metadata ) && $metadata != serialize( [] );
387  }
388 
389  public function pageCount( File $image ) {
390  $info = $this->getDimensionInfo( $image );
391 
392  return $info ? $info['pageCount'] : false;
393  }
394 
395  public function getPageDimensions( File $image, $page ) {
396  $index = $page - 1; // MW starts pages at 1
397 
398  $info = $this->getDimensionInfo( $image );
399  if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
400  return $info['dimensionsByPage'][$index];
401  }
402 
403  return false;
404  }
405 
406  protected function getDimensionInfo( File $file ) {
407  $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
408  return $cache->getWithSetCallback(
409  $cache->makeKey( 'file-djvu', 'dimensions', $file->getSha1() ),
410  $cache::TTL_INDEFINITE,
411  function () use ( $file ) {
412  $tree = $this->getMetaTree( $file );
413  return $this->getDimensionInfoFromMetaTree( $tree );
414  },
415  [ 'pcTTL' => $cache::TTL_INDEFINITE ]
416  );
417  }
418 
424  protected function getDimensionInfoFromMetaTree( $metatree ) {
425  if ( !$metatree ) {
426  return false;
427  }
428 
429  $dimsByPage = [];
430  $count = count( $metatree->xpath( '//OBJECT' ) );
431  for ( $i = 0; $i < $count; $i++ ) {
432  $o = $metatree->BODY[0]->OBJECT[$i];
433  if ( $o ) {
434  $dimsByPage[$i] = [
435  'width' => (int)$o['width'],
436  'height' => (int)$o['height'],
437  ];
438  } else {
439  $dimsByPage[$i] = false;
440  }
441  }
442 
443  return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
444  }
445 
451  function getPageText( File $image, $page ) {
452  $tree = $this->getMetaTree( $image, true );
453  if ( !$tree ) {
454  return false;
455  }
456 
457  $o = $tree->BODY[0]->PAGE[$page - 1];
458  if ( $o ) {
459  $txt = $o['value'];
460 
461  return $txt;
462  } else {
463  return false;
464  }
465  }
466 }
MediaHandler\removeBadFile
removeBadFile( $dstPath, $retval=0)
Check for zero-sized thumbnails.
Definition: MediaHandler.php:675
DjVuHandler\isMetadataValid
isMetadataValid( $image, $metadata)
Check if the metadata string is valid for this handler.
Definition: DjVuHandler.php:385
MediaWiki\Shell\Shell
Executes shell commands.
Definition: Shell.php:44
MediaTransformError
Basic media transform error class.
Definition: MediaTransformError.php:29
ThumbnailImage
Media transform output for images.
Definition: ThumbnailImage.php:29
$file
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Definition: router.php:42
DjVuHandler\getParamMap
getParamMap()
Definition: DjVuHandler.php:76
DjVuHandler\getDimensionInfoFromMetaTree
getDimensionInfoFromMetaTree( $metatree)
Given an XML metadata tree, returns dimension information about the document.
Definition: DjVuHandler.php:424
wfMkdirParents
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
Definition: GlobalFunctions.php:2008
captcha-old.count
count
Definition: captcha-old.py:249
DjVuHandler\isMultiPage
isMultiPage( $file)
Definition: DjVuHandler.php:69
DjVuHandler\parseParamString
parseParamString( $str)
Definition: DjVuHandler.php:122
DjVuHandler\extractTreesFromMetadata
extractTreesFromMetadata( $metadata)
Extracts metadata and text trees from metadata XML in string form.
Definition: DjVuHandler.php:325
$wgDjvuPostProcessor
$wgDjvuPostProcessor
Shell command for the DJVU post processor Default: pnmtojpeg, since ddjvu generates ppm output Set th...
Definition: DefaultSettings.php:1627
DjVuHandler\getUnserializedMetadata
getUnserializedMetadata(File $file)
Get metadata, unserializing it if necessary.
Definition: DjVuHandler.php:263
DjVuHandler\pageCount
pageCount(File $image)
Page count for a multi-page document, false if unsupported or unknown.
Definition: DjVuHandler.php:389
DjVuHandler\isEnabled
isEnabled()
Definition: DjVuHandler.php:37
$params
$params
Definition: styleTest.css.php:44
wfHostname
wfHostname()
Fetch server name for use in error reporting etc.
Definition: GlobalFunctions.php:1352
PoolCounterWorkViaCallback
Convenience class for dealing with PoolCounters using callbacks.
Definition: PoolCounterWorkViaCallback.php:28
serialize
serialize()
Definition: ApiMessageTrait.php:134
DjVuHandler\makeParamString
makeParamString( $params)
Definition: DjVuHandler.php:109
$wgDjvuRenderer
$wgDjvuRenderer
Path of the ddjvu DJVU renderer Enable this and $wgDjvuDump to enable djvu rendering example: $wgDjvu...
Definition: DefaultSettings.php:1597
wfDebugLog
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Definition: GlobalFunctions.php:1043
DjVuHandler\EXPENSIVE_SIZE_LIMIT
const EXPENSIVE_SIZE_LIMIT
Definition: DjVuHandler.php:32
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
DjVuHandler\getMetadata
getMetadata( $image, $path)
Get handler-specific metadata which will be saved in the img_metadata field.
Definition: DjVuHandler.php:369
DjVuHandler\getDjVuImage
getDjVuImage( $image, $path)
Cache an instance of DjVuImage in an Image object, return that instance.
Definition: DjVuHandler.php:244
File
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition: File.php:52
MWException
MediaWiki exception.
Definition: MWException.php:26
PoolCounterWork\execute
execute( $skipcache=false)
Get the result of the work (whatever it is), or the result of the error() function.
Definition: PoolCounterWork.php:104
$wgDjvuToXML
$wgDjvuToXML
Path of the djvutoxml executable This works like djvudump except much, much slower as of version 3....
Definition: DefaultSettings.php:1620
DjVuHandler\getPageDimensions
getPageDimensions(File $image, $page)
Get an associative array of page dimensions Currently "width" and "height" are understood,...
Definition: DjVuHandler.php:395
ImageHandler
Media handler abstract base class for images.
Definition: ImageHandler.php:29
DjVuHandler\isExpensiveToThumbnail
isExpensiveToThumbnail( $file)
True if creating thumbnails from the file is large or otherwise resource-intensive.
Definition: DjVuHandler.php:61
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
DjVuHandler\validateParam
validateParam( $name, $value)
Definition: DjVuHandler.php:88
$wgDjvuOutputExtension
$wgDjvuOutputExtension
File extension for the DJVU post processor output.
Definition: DefaultSettings.php:1632
$image
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check $image
Definition: hooks.txt:780
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:949
ImageHandler\normaliseParams
normaliseParams( $image, &$params)
Definition: ImageHandler.php:86
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:271
MediaWiki\MediaWikiServices\getInstance
static getInstance()
Returns the global default instance of the top level service locator.
Definition: MediaWikiServices.php:124
DjVuHandler\getImageSize
getImageSize( $image, $path)
Get an image size array like that returned by getimagesize(), or false if it can't be determined.
Definition: DjVuHandler.php:354
$e
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2162
$value
$value
Definition: styleTest.css.php:49
TransformParameterError
Shortcut class for parameter validation errors.
Definition: TransformParameterError.php:29
DjVuHandler\getMetadataType
getMetadataType( $image)
Get a string describing the type of metadata, for display purposes.
Definition: DjVuHandler.php:381
DjVuHandler\getScriptParams
getScriptParams( $params)
Definition: DjVuHandler.php:135
DjVuHandler
Handler for DjVu images.
Definition: DjVuHandler.php:31
unserialize
unserialize( $serialized)
Definition: ApiMessageTrait.php:142
$cache
$cache
Definition: mcc.php:33
DjVuHandler\getThumbType
getThumbType( $ext, $mime, $params=null)
Get the thumbnail extension and MIME type for a given source MIME type.
Definition: DjVuHandler.php:358
MediaHandler\logErrorForExternalProcess
logErrorForExternalProcess( $retval, $err, $cmd)
Log an error that occurred in an external process.
Definition: MediaHandler.php:753
DjVuHandler\doTransform
doTransform( $image, $dstPath, $dstUrl, $params, $flags=0)
Definition: DjVuHandler.php:150
$path
$path
Definition: NoLocalSettings.php:25
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
DjVuHandler\getMetaTree
getMetaTree( $image, $gettext=false)
Cache a document tree for the DjVu XML metadata.
Definition: DjVuHandler.php:294
$ext
if(!is_readable( $file)) $ext
Definition: router.php:48
$wgDjvuDump
$wgDjvuDump
Path of the djvudump executable Enable this and $wgDjvuRenderer to enable djvu rendering example: $wg...
Definition: DefaultSettings.php:1590
MediaWikiServices
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
DjVuHandler\getPageText
getPageText(File $image, $page)
Definition: DjVuHandler.php:451
DjVuHandler\mustRender
mustRender( $file)
Definition: DjVuHandler.php:52
wfMessage
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
DjVuHandler\getDimensionInfo
getDimensionInfo(File $file)
Definition: DjVuHandler.php:406
wfShellExec
wfShellExec( $cmd, &$retval=null, $environ=[], $limits=[], $options=[])
Execute a shell command, with time and memory limits mirrored from the PHP configuration if supported...
Definition: GlobalFunctions.php:2168