MediaWiki REL1_28
DjVu.php
Go to the documentation of this file.
1<?php
30 const EXPENSIVE_SIZE_LIMIT = 10485760; // 10MiB
31
35 function isEnabled() {
37 if ( !$wgDjvuRenderer || ( !$wgDjvuDump && !$wgDjvuToXML ) ) {
38 wfDebug( "DjVu is disabled, please set \$wgDjvuRenderer and \$wgDjvuDump\n" );
39
40 return false;
41 } else {
42 return true;
43 }
44 }
45
50 public function mustRender( $file ) {
51 return true;
52 }
53
59 public function isExpensiveToThumbnail( $file ) {
60 return $file->getSize() > static::EXPENSIVE_SIZE_LIMIT;
61 }
62
67 public function isMultiPage( $file ) {
68 return true;
69 }
70
74 public function getParamMap() {
75 return [
76 'img_width' => 'width',
77 'img_page' => 'page',
78 ];
79 }
80
86 public function validateParam( $name, $value ) {
87 if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
88 // Extra junk on the end of page, probably actually a caption
89 // e.g. [[File:Foo.djvu|thumb|Page 3 of the document shows foo]]
90 return false;
91 }
92 if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) {
93 if ( $value <= 0 ) {
94 return false;
95 } else {
96 return true;
97 }
98 } else {
99 return false;
100 }
101 }
102
107 public function makeParamString( $params ) {
108 $page = isset( $params['page'] ) ? $params['page'] : 1;
109 if ( !isset( $params['width'] ) ) {
110 return false;
111 }
112
113 return "page{$page}-{$params['width']}px";
114 }
115
120 public function parseParamString( $str ) {
121 $m = false;
122 if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
123 return [ 'width' => $m[2], 'page' => $m[1] ];
124 } else {
125 return false;
126 }
127 }
128
134 return [
135 'width' => $params['width'],
136 'page' => $params['page'],
137 ];
138 }
139
148 function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
150
151 if ( !$this->normaliseParams( $image, $params ) ) {
152 return new TransformParameterError( $params );
153 }
154 $width = $params['width'];
155 $height = $params['height'];
156 $page = $params['page'];
157
158 if ( $flags & self::TRANSFORM_LATER ) {
159 $params = [
160 'width' => $width,
161 'height' => $height,
162 'page' => $page
163 ];
164
165 return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
166 }
167
168 if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
169 return new MediaTransformError(
170 'thumbnail_error',
171 $width,
172 $height,
173 wfMessage( 'thumbnail_dest_directory' )->text()
174 );
175 }
176
177 // Get local copy source for shell scripts
178 // Thumbnail extraction is very inefficient for large files.
179 // Provide a way to pool count limit the number of downloaders.
180 if ( $image->getSize() >= 1e7 ) { // 10MB
181 $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
182 [
183 'doWork' => function () use ( $image ) {
184 return $image->getLocalRefPath();
185 }
186 ]
187 );
188 $srcPath = $work->execute();
189 } else {
190 $srcPath = $image->getLocalRefPath();
191 }
192
193 if ( $srcPath === false ) { // Failed to get local copy
194 wfDebugLog( 'thumbnail',
195 sprintf( 'Thumbnail failed on %s: could not get local copy of "%s"',
196 wfHostname(), $image->getName() ) );
197
198 return new MediaTransformError( 'thumbnail_error',
199 $params['width'], $params['height'],
200 wfMessage( 'filemissing' )->text()
201 );
202 }
203
204 # Use a subshell (brackets) to aggregate stderr from both pipeline commands
205 # before redirecting it to the overall stdout. This works in both Linux and Windows XP.
206 $cmd = '(' . wfEscapeShellArg(
208 "-format=ppm",
209 "-page={$page}",
210 "-size={$params['physicalWidth']}x{$params['physicalHeight']}",
211 $srcPath );
212 if ( $wgDjvuPostProcessor ) {
213 $cmd .= " | {$wgDjvuPostProcessor}";
214 }
215 $cmd .= ' > ' . wfEscapeShellArg( $dstPath ) . ') 2>&1';
216 wfDebug( __METHOD__ . ": $cmd\n" );
217 $retval = '';
218 $err = wfShellExec( $cmd, $retval );
219
220 $removed = $this->removeBadFile( $dstPath, $retval );
221 if ( $retval != 0 || $removed ) {
222 $this->logErrorForExternalProcess( $retval, $err, $cmd );
223 return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
224 } else {
225 $params = [
226 'width' => $width,
227 'height' => $height,
228 'page' => $page
229 ];
230
231 return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
232 }
233 }
234
242 function getDjVuImage( $image, $path ) {
243 if ( !$image ) {
244 $deja = new DjVuImage( $path );
245 } elseif ( !isset( $image->dejaImage ) ) {
246 $deja = $image->dejaImage = new DjVuImage( $path );
247 } else {
248 $deja = $image->dejaImage;
249 }
250
251 return $deja;
252 }
253
261 private function getUnserializedMetadata( File $file ) {
262 $metadata = $file->getMetadata();
263 if ( substr( $metadata, 0, 3 ) === '<?xml' ) {
264 // Old style. Not serialized but instead just a raw string of XML.
265 return $metadata;
266 }
267
268 MediaWiki\suppressWarnings();
269 $unser = unserialize( $metadata );
270 MediaWiki\restoreWarnings();
271 if ( is_array( $unser ) ) {
272 if ( isset( $unser['error'] ) ) {
273 return false;
274 } elseif ( isset( $unser['xml'] ) ) {
275 return $unser['xml'];
276 } else {
277 // Should never ever reach here.
278 throw new MWException( "Error unserializing DjVu metadata." );
279 }
280 }
281
282 // unserialize failed. Guess it wasn't really serialized after all,
283 return $metadata;
284 }
285
292 public function getMetaTree( $image, $gettext = false ) {
293 if ( $gettext && isset( $image->djvuTextTree ) ) {
294 return $image->djvuTextTree;
295 }
296 if ( !$gettext && isset( $image->dejaMetaTree ) ) {
297 return $image->dejaMetaTree;
298 }
299
300 $metadata = $this->getUnserializedMetadata( $image );
301 if ( !$this->isMetadataValid( $image, $metadata ) ) {
302 wfDebug( "DjVu XML metadata is invalid or missing, should have been fixed in upgradeRow\n" );
303
304 return false;
305 }
306
307 MediaWiki\suppressWarnings();
308 try {
309 // Set to false rather than null to avoid further attempts
310 $image->dejaMetaTree = false;
311 $image->djvuTextTree = false;
312 $tree = new SimpleXMLElement( $metadata, LIBXML_PARSEHUGE );
313 if ( $tree->getName() == 'mw-djvu' ) {
315 foreach ( $tree->children() as $b ) {
316 if ( $b->getName() == 'DjVuTxt' ) {
317 // @todo File::djvuTextTree and File::dejaMetaTree are declared
318 // dynamically. Add a public File::$data to facilitate this?
319 $image->djvuTextTree = $b;
320 } elseif ( $b->getName() == 'DjVuXML' ) {
321 $image->dejaMetaTree = $b;
322 }
323 }
324 } else {
325 $image->dejaMetaTree = $tree;
326 }
327 } catch ( Exception $e ) {
328 wfDebug( "Bogus multipage XML metadata on '{$image->getName()}'\n" );
329 }
330 MediaWiki\restoreWarnings();
331 if ( $gettext ) {
332 return $image->djvuTextTree;
333 } else {
334 return $image->dejaMetaTree;
335 }
336 }
337
338 function getImageSize( $image, $path ) {
339 return $this->getDjVuImage( $image, $path )->getImageSize();
340 }
341
342 function getThumbType( $ext, $mime, $params = null ) {
344 static $mime;
345 if ( !isset( $mime ) ) {
346 $magic = MimeMagic::singleton();
347 $mime = $magic->guessTypesForExtension( $wgDjvuOutputExtension );
348 }
349
350 return [ $wgDjvuOutputExtension, $mime ];
351 }
352
353 function getMetadata( $image, $path ) {
354 wfDebug( "Getting DjVu metadata for $path\n" );
355
356 $xml = $this->getDjVuImage( $image, $path )->retrieveMetaData();
357 if ( $xml === false ) {
358 // Special value so that we don't repetitively try and decode a broken file.
359 return serialize( [ 'error' => 'Error extracting metadata' ] );
360 } else {
361 return serialize( [ 'xml' => $xml ] );
362 }
363 }
364
366 return 'djvuxml';
367 }
368
369 function isMetadataValid( $image, $metadata ) {
370 return !empty( $metadata ) && $metadata != serialize( [] );
371 }
372
373 function pageCount( File $image ) {
374 $info = $this->getDimensionInfo( $image );
375
376 return $info ? $info['pageCount'] : false;
377 }
378
380 $index = $page - 1; // MW starts pages at 1
381
382 $info = $this->getDimensionInfo( $image );
383 if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
384 return $info['dimensionsByPage'][$index];
385 }
386
387 return false;
388 }
389
390 protected function getDimensionInfo( File $file ) {
391 $cache = ObjectCache::getMainWANInstance();
392 return $cache->getWithSetCallback(
393 $cache->makeKey( 'file-djvu', 'dimensions', $file->getSha1() ),
394 $cache::TTL_INDEFINITE,
395 function () use ( $file ) {
396 $tree = $this->getMetaTree( $file );
397 if ( !$tree ) {
398 return false;
399 }
400
401 $dimsByPage = [];
402 $count = count( $tree->xpath( '//OBJECT' ) );
403 for ( $i = 0; $i < $count; $i++ ) {
404 $o = $tree->BODY[0]->OBJECT[$i];
405 if ( $o ) {
406 $dimsByPage[$i] = [
407 'width' => (int)$o['width'],
408 'height' => (int)$o['height'],
409 ];
410 } else {
411 $dimsByPage[$i] = false;
412 }
413 }
414
415 return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
416 },
417 [ 'pcTTL' => $cache::TTL_INDEFINITE ]
418 );
419 }
420
427 $tree = $this->getMetaTree( $image, true );
428 if ( !$tree ) {
429 return false;
430 }
431
432 $o = $tree->BODY[0]->PAGE[$page - 1];
433 if ( $o ) {
434 $txt = $o['value'];
435
436 return $txt;
437 } else {
438 return false;
439 }
440 }
441}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
serialize()
unserialize( $serialized)
$wgDjvuRenderer
Path of the ddjvu DJVU renderer Enable this and $wgDjvuDump to enable djvu rendering example: $wgDjvu...
$wgDjvuOutputExtension
File extension for the DJVU post processor output.
$wgDjvuToXML
Path of the djvutoxml executable This works like djvudump except much, much slower as of version 3....
$wgDjvuPostProcessor
Shell command for the DJVU post processor Default: pnmtojpeg, since ddjvu generates ppm output Set th...
$wgDjvuDump
Path of the djvudump executable Enable this and $wgDjvuRenderer to enable djvu rendering example: $wg...
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfShellExec( $cmd, &$retval=null, $environ=[], $limits=[], $options=[])
Execute a shell command, with time and memory limits mirrored from the PHP configuration if supported...
wfHostname()
Fetch server name for use in error reporting etc.
wfEscapeShellArg()
Version of escapeshellarg() that works better on Windows.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
Handler for DjVu images.
Definition DjVu.php:29
getDjVuImage( $image, $path)
Cache an instance of DjVuImage in an Image object, return that instance.
Definition DjVu.php:242
makeParamString( $params)
Definition DjVu.php:107
isEnabled()
Definition DjVu.php:35
isExpensiveToThumbnail( $file)
True if creating thumbnails from the file is large or otherwise resource-intensive.
Definition DjVu.php:59
validateParam( $name, $value)
Definition DjVu.php:86
isMultiPage( $file)
Definition DjVu.php:67
getDimensionInfo(File $file)
Definition DjVu.php:390
const EXPENSIVE_SIZE_LIMIT
Definition DjVu.php:30
getScriptParams( $params)
Definition DjVu.php:133
doTransform( $image, $dstPath, $dstUrl, $params, $flags=0)
Definition DjVu.php:148
pageCount(File $image)
Page count for a multi-page document, false if unsupported or unknown.
Definition DjVu.php:373
getParamMap()
Definition DjVu.php:74
getPageText(File $image, $page)
Definition DjVu.php:426
isMetadataValid( $image, $metadata)
Check if the metadata string is valid for this handler.
Definition DjVu.php:369
getPageDimensions(File $image, $page)
Get an associative array of page dimensions Currently "width" and "height" are understood,...
Definition DjVu.php:379
getMetadataType( $image)
Get a string describing the type of metadata, for display purposes.
Definition DjVu.php:365
getMetaTree( $image, $gettext=false)
Cache a document tree for the DjVu XML metadata.
Definition DjVu.php:292
getThumbType( $ext, $mime, $params=null)
Get the thumbnail extension and MIME type for a given source MIME type.
Definition DjVu.php:342
getUnserializedMetadata(File $file)
Get metadata, unserializing it if neccessary.
Definition DjVu.php:261
mustRender( $file)
Definition DjVu.php:50
parseParamString( $str)
Definition DjVu.php:120
getMetadata( $image, $path)
Get handler-specific metadata which will be saved in the img_metadata field.
Definition DjVu.php:353
getImageSize( $image, $path)
Get an image size array like that returned by getimagesize(), or false if it can't be determined.
Definition DjVu.php:338
Support for detecting/validating DjVu image files and getting some basic file metadata (resolution et...
Definition DjVuImage.php:36
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:50
getSha1()
Get the SHA-1 base 36 hash of the file.
Definition File.php:2116
getMetadata()
Get handler-specific metadata Overridden by LocalFile, UnregisteredLocalFile STUB.
Definition File.php:638
Media handler abstract base class for images.
normaliseParams( $image, &$params)
MediaWiki exception.
logErrorForExternalProcess( $retval, $err, $cmd)
Log an error that occurred in an external process.
removeBadFile( $dstPath, $retval=0)
Check for zero-sized thumbnails.
Basic media transform error class.
static singleton()
Get an instance of this class.
Definition MimeMagic.php:29
Convenience class for dealing with PoolCounters using callbacks.
execute( $skipcache=false)
Get the result of the work (whatever it is), or the result of the error() function.
Media transform output for images.
Shortcut class for parameter validation errors.
when a variable name is used in a function
Definition design.txt:94
when a variable name is used in a it is silently declared as a new local masking the global
Definition design.txt:95
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition design.txt:18
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account incomplete not yet checked for validity & $retval
Definition hooks.txt:268
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check $image
Definition hooks.txt:917
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition hooks.txt:2710
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:304
namespace are movable Hooks may change this value to override the return value of MWNamespace::isMovable(). 'NewDifferenceEngine' do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached $page
Definition hooks.txt:2534
returning false will NOT prevent logging $e
Definition hooks.txt:2110
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
$cache
Definition mcc.php:33
if( $ext=='php'|| $ext=='php5') $mime
Definition router.php:65
$params