MediaWiki REL1_32
PdfHandler.php
Go to the documentation of this file.
1<?php
24class PdfHandler extends ImageHandler {
25 public static $messages = [
26 'main' => 'pdf-file-page-warning',
27 'header' => 'pdf-file-page-warning-header',
28 'info' => 'pdf-file-page-warning-info',
29 'footer' => 'pdf-file-page-warning-footer',
30 ];
31
35 function isEnabled() {
36 global $wgPdfProcessor, $wgPdfPostProcessor, $wgPdfInfo;
37
38 if ( !isset( $wgPdfProcessor ) || !isset( $wgPdfPostProcessor ) || !isset( $wgPdfInfo ) ) {
39 wfDebug( "PdfHandler is disabled, please set the following\n" );
40 wfDebug( "variables in LocalSettings.php:\n" );
41 wfDebug( "\$wgPdfProcessor, \$wgPdfPostProcessor, \$wgPdfInfo\n" );
42 return false;
43 }
44 return true;
45 }
46
51 function mustRender( $file ) {
52 return true;
53 }
54
59 function isMultiPage( $file ) {
60 return true;
61 }
62
68 function validateParam( $name, $value ) {
69 if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
70 // Extra junk on the end of page, probably actually a caption
71 // e.g. [[File:Foo.pdf|thumb|Page 3 of the document shows foo]]
72 return false;
73 }
74 if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) {
75 return ( $value > 0 );
76 }
77 return false;
78 }
79
85 $page = isset( $params['page'] ) ? $params['page'] : 1;
86 if ( !isset( $params['width'] ) ) {
87 return false;
88 }
89 return "page{$page}-{$params['width']}px";
90 }
91
96 function parseParamString( $str ) {
97 $m = [];
98
99 if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
100 return [ 'width' => $m[2], 'page' => $m[1] ];
101 }
102
103 return false;
104 }
105
111 return [
112 'width' => $params['width'],
113 'page' => $params['page'],
114 ];
115 }
116
120 function getParamMap() {
121 return [
122 'img_width' => 'width',
123 'img_page' => 'page',
124 ];
125 }
126
133 protected function doThumbError( $width, $height, $msg ) {
134 return new MediaTransformError( 'thumbnail_error',
135 $width, $height, wfMessage( $msg )->inContentLanguage()->text() );
136 }
137
146 function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
147 global $wgPdfProcessor, $wgPdfPostProcessor, $wgPdfHandlerDpi, $wgPdfHandlerJpegQuality;
148
149 if ( !$this->normaliseParams( $image, $params ) ) {
150 return new TransformParameterError( $params );
151 }
152
153 $width = (int)$params['width'];
154 $height = (int)$params['height'];
155 $page = (int)$params['page'];
156
157 if ( $page > $this->pageCount( $image ) ) {
158 return $this->doThumbError( $width, $height, 'pdf_page_error' );
159 }
160
161 if ( $flags & self::TRANSFORM_LATER ) {
162 return new ThumbnailImage( $image, $dstUrl, false, [
163 'width' => $width,
164 'height' => $height,
165 'page' => $page,
166 ] );
167 }
168
169 if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
170 return $this->doThumbError( $width, $height, 'thumbnail_dest_directory' );
171 }
172
173 // Thumbnail extraction is very inefficient for large files.
174 // Provide a way to pool count limit the number of downloaders.
175 if ( $image->getSize() >= 1e7 ) { // 10MB
176 $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
177 [
178 'doWork' => function () use ( $image ) {
179 return $image->getLocalRefPath();
180 }
181 ]
182 );
183 $srcPath = $work->execute();
184 } else {
185 $srcPath = $image->getLocalRefPath();
186 }
187
188 if ( $srcPath === false ) { // could not download original
189 return $this->doThumbError( $width, $height, 'filemissing' );
190 }
191
192 $cmd = '(' . wfEscapeShellArg(
193 $wgPdfProcessor,
194 "-sDEVICE=jpeg",
195 "-sOutputFile=-",
196 "-dFirstPage={$page}",
197 "-dLastPage={$page}",
198 "-dSAFER",
199 "-r{$wgPdfHandlerDpi}",
200 "-dBATCH",
201 "-dNOPAUSE",
202 "-q",
203 $srcPath
204 );
205 $cmd .= " | " . wfEscapeShellArg(
206 $wgPdfPostProcessor,
207 "-depth",
208 "8",
209 "-quality",
210 $wgPdfHandlerJpegQuality,
211 "-resize",
212 $width,
213 "-",
214 $dstPath
215 );
216 $cmd .= ")";
217
218 wfDebug( __METHOD__ . ": $cmd\n" );
219 $retval = '';
220 $err = wfShellExecWithStderr( $cmd, $retval );
221
222 $removed = $this->removeBadFile( $dstPath, $retval );
223
224 if ( $retval != 0 || $removed ) {
225 wfDebugLog( 'thumbnail',
226 sprintf( 'thumbnail failed on %s: error %d "%s" from "%s"',
227 wfHostname(), $retval, trim( $err ), $cmd ) );
228 return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
229 } else {
230 return new ThumbnailImage( $image, $dstUrl, $dstPath, [
231 'width' => $width,
232 'height' => $height,
233 'page' => $page,
234 ] );
235 }
236 }
237
244 function getPdfImage( $image, $path ) {
245 if ( !$image ) {
246 $pdfimg = new PdfImage( $path );
247 } elseif ( !isset( $image->pdfImage ) ) {
248 $pdfimg = $image->pdfImage = new PdfImage( $path );
249 } else {
250 $pdfimg = $image->pdfImage;
251 }
252
253 return $pdfimg;
254 }
255
261 function getMetaArray( $image ) {
262 if ( isset( $image->pdfMetaArray ) ) {
263 return $image->pdfMetaArray;
264 }
265
266 $metadata = $image->getMetadata();
267
268 if ( !$this->isMetadataValid( $image, $metadata ) ) {
269 wfDebug( "Pdf metadata is invalid or missing, should have been fixed in upgradeRow\n" );
270 return false;
271 }
272
273 $work = new PoolCounterWorkViaCallback(
274 'PdfHandler-unserialize-metadata',
275 $image->getName(),
276 [
280 'doWork' => function () use ( $image, $metadata ) {
282 $image->pdfMetaArray = unserialize( $metadata );
284 },
285 ]
286 );
287 $work->execute();
288
289 return $image->pdfMetaArray;
290 }
291
297 function getImageSize( $image, $path ) {
298 return $this->getPdfImage( $image, $path )->getImageSize();
299 }
300
307 function getThumbType( $ext, $mime, $params = null ) {
308 global $wgPdfOutputExtension;
309 static $mime;
310
311 if ( !isset( $mime ) ) {
312 $magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer();
313 $mime = $magic->guessTypesForExtension( $wgPdfOutputExtension );
314 }
315 return [ $wgPdfOutputExtension, $mime ];
316 }
317
323 function getMetadata( $image, $path ) {
324 return serialize( $this->getPdfImage( $image, $path )->retrieveMetaData() );
325 }
326
332 function isMetadataValid( $image, $metadata ) {
333 if ( !$metadata || $metadata === serialize( [] ) ) {
334 return self::METADATA_BAD;
335 } elseif ( strpos( $metadata, 'mergedMetadata' ) === false ) {
337 }
338 return self::METADATA_GOOD;
339 }
340
346 function formatMetadata( $image, $context = false ) {
347 $meta = $image->getMetadata();
348
349 if ( !$meta ) {
350 return false;
351 }
353 $meta = unserialize( $meta );
355
356 if ( !isset( $meta['mergedMetadata'] )
357 || !is_array( $meta['mergedMetadata'] )
358 || count( $meta['mergedMetadata'] ) < 1
359 ) {
360 return false;
361 }
362
363 // Inherited from MediaHandler.
364 return $this->formatMetadataHelper( $meta['mergedMetadata'], $context );
365 }
366
371 function pageCount( File $image ) {
372 $info = $this->getDimensionInfo( $image );
373
374 return $info ? $info['pageCount'] : false;
375 }
376
382 function getPageDimensions( File $image, $page ) {
383 $index = $page; // MW starts pages at 1, as they are stored here
384
385 $info = $this->getDimensionInfo( $image );
386 if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
387 return $info['dimensionsByPage'][$index];
388 }
389
390 return false;
391 }
392
393 protected function getDimensionInfo( File $file ) {
394 $cache = ObjectCache::getMainWANInstance();
395 return $cache->getWithSetCallback(
396 $cache->makeKey( 'file-pdf', 'dimensions', $file->getSha1() ),
397 $cache::TTL_INDEFINITE,
398 function () use ( $file ) {
399 $data = $this->getMetaArray( $file );
400 if ( !$data || !isset( $data['Pages'] ) ) {
401 return false;
402 }
403 unset( $data['text'] ); // lower peak RAM
404
405 $dimsByPage = [];
406 $count = intval( $data['Pages'] );
407 for ( $i = 1; $i <= $count; $i++ ) {
408 $dimsByPage[$i] = PdfImage::getPageSize( $data, $i );
409 }
410
411 return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
412 },
413 [ 'pcTTL' => $cache::TTL_INDEFINITE ]
414 );
415 }
416
422 function getPageText( File $image, $page ) {
423 $data = $this->getMetaArray( $image );
424 if ( !$data || !isset( $data['text'] ) || !isset( $data['text'][$page - 1] ) ) {
425 return false;
426 }
427 return $data['text'][$page - 1];
428 }
429
436 function getWarningConfig( $file ) {
437 return [
438 'messages' => self::$messages,
439 'link' => '//www.mediawiki.org/wiki/Special:MyLanguage/Help:Security/PDF_files',
440 'module' => 'pdfhandler.messages',
441 ];
442 }
443
449 $resourceLoader->register( 'pdfhandler.messages', [
450 'messages' => array_values( self::$messages ),
451 ] );
452 }
453}
This list may contain false positives That usually means there is additional text with links below the first Each row contains links to the first and second as well as the first line of the second redirect text
serialize()
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfEscapeShellArg(... $args)
Version of escapeshellarg() that works better on Windows.
wfRestoreWarnings()
wfHostname()
Fetch server name for use in error reporting etc.
wfShellExecWithStderr( $cmd, &$retval=null, $environ=[], $limits=[])
Execute a shell command, returning both stdout and stderr.
wfSuppressWarnings( $end=false)
Reference-counted warning suppression.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:51
Media handler abstract base class for images.
normaliseParams( $image, &$params)
const METADATA_COMPATIBLE
formatMetadataHelper( $metadataArray, $context=false)
sorts the visible/invisible field.
const METADATA_GOOD
removeBadFile( $dstPath, $retval=0)
Check for zero-sized thumbnails.
Basic media transform error class.
Copyright © 2007 Martin Seidel (Xarax) jodeldi@gmx.de
getThumbType( $ext, $mime, $params=null)
getMetaArray( $image)
getPdfImage( $image, $path)
static registerWarningModule(&$resourceLoader)
Register a module with the warning messages in it.
validateParam( $name, $value)
getPageDimensions(File $image, $page)
getDimensionInfo(File $file)
getMetadata( $image, $path)
isMetadataValid( $image, $metadata)
formatMetadata( $image, $context=false)
getImageSize( $image, $path)
isMultiPage( $file)
getWarningConfig( $file)
Adds a warning about PDFs being potentially dangerous to the file page.
static $messages
getScriptParams( $params)
getPageText(File $image, $page)
makeParamString( $params)
parseParamString( $str)
pageCount(File $image)
doThumbError( $width, $height, $msg)
doTransform( $image, $dstPath, $dstUrl, $params, $flags=0)
mustRender( $file)
inspired by djvuimage from Brion Vibber modified and written by xarax
Definition PdfImage.php:32
static getPageSize( $data, $page)
Definition PdfImage.php:74
Convenience class for dealing with PoolCounters using callbacks.
execute( $skipcache=false)
Get the result of the work (whatever it is), or the result of the error() function.
Media transform output for images.
Shortcut class for parameter validation errors.
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account incomplete not yet checked for validity & $retval
Definition hooks.txt:266
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check $image
Definition hooks.txt:925
do that in ParserLimitReportFormat instead use this to modify the parameters of the image all existing parser cache entries will be invalid To avoid you ll need to handle that somehow(e.g. with the RejectParserCacheValue hook) because MediaWiki won 't do it for you. & $defaults also a ContextSource after deleting those rows but within the same transaction you ll probably need to make sure the header is varied on and they can depend only on the ResourceLoaderContext $context
Definition hooks.txt:2885
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
do that in ParserLimitReportFormat instead use this to modify the parameters of the image all existing parser cache entries will be invalid To avoid you ll need to handle that somehow(e.g. with the RejectParserCacheValue hook) because MediaWiki won 't do it for you. & $defaults also a ContextSource after deleting those rows but within the same transaction you ll probably need to make sure the header is varied on and they can depend only on the ResourceLoaderContext such as when responding to a resource loader request or generating HTML output & $resourceLoader
Definition hooks.txt:2892
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback function
Definition injection.txt:30
$cache
Definition mcc.php:33
if( $ext=='php'|| $ext=='php5') $mime
Definition router.php:59
if(!is_readable( $file)) $ext
Definition router.php:55
$params