MediaWiki REL1_33
PdfHandler.php
Go to the documentation of this file.
1<?php
24class PdfHandler extends ImageHandler {
25 public static $messages = [
26 'main' => 'pdf-file-page-warning',
27 'header' => 'pdf-file-page-warning-header',
28 'info' => 'pdf-file-page-warning-info',
29 'footer' => 'pdf-file-page-warning-footer',
30 ];
31
35 public function isEnabled() {
37
38 if ( !isset( $wgPdfProcessor ) || !isset( $wgPdfPostProcessor ) || !isset( $wgPdfInfo ) ) {
39 wfDebug( "PdfHandler is disabled, please set the following\n" );
40 wfDebug( "variables in LocalSettings.php:\n" );
41 wfDebug( "\$wgPdfProcessor, \$wgPdfPostProcessor, \$wgPdfInfo\n" );
42 return false;
43 }
44 return true;
45 }
46
51 public function mustRender( $file ) {
52 return true;
53 }
54
59 public function isMultiPage( $file ) {
60 return true;
61 }
62
68 public function validateParam( $name, $value ) {
69 if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
70 // Extra junk on the end of page, probably actually a caption
71 // e.g. [[File:Foo.pdf|thumb|Page 3 of the document shows foo]]
72 return false;
73 }
74 if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) {
75 return ( $value > 0 );
76 }
77 return false;
78 }
79
84 public function makeParamString( $params ) {
85 $page = $params['page'] ?? 1;
86 if ( !isset( $params['width'] ) ) {
87 return false;
88 }
89 return "page{$page}-{$params['width']}px";
90 }
91
96 public function parseParamString( $str ) {
97 $m = [];
98
99 if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
100 return [ 'width' => $m[2], 'page' => $m[1] ];
101 }
102
103 return false;
104 }
105
110 public function getScriptParams( $params ) {
111 return [
112 'width' => $params['width'],
113 'page' => $params['page'],
114 ];
115 }
116
120 public function getParamMap() {
121 return [
122 'img_width' => 'width',
123 'img_page' => 'page',
124 ];
125 }
126
133 protected function doThumbError( $width, $height, $msg ) {
134 return new MediaTransformError( 'thumbnail_error',
135 $width, $height, wfMessage( $msg )->inContentLanguage()->text() );
136 }
137
146 public function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
148
149 if ( !$this->normaliseParams( $image, $params ) ) {
150 return new TransformParameterError( $params );
151 }
152
153 $width = (int)$params['width'];
154 $height = (int)$params['height'];
155 $page = (int)$params['page'];
156
157 if ( $page > $this->pageCount( $image ) ) {
158 return $this->doThumbError( $width, $height, 'pdf_page_error' );
159 }
160
161 if ( $flags & self::TRANSFORM_LATER ) {
162 return new ThumbnailImage( $image, $dstUrl, false, [
163 'width' => $width,
164 'height' => $height,
165 'page' => $page,
166 ] );
167 }
168
169 if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
170 return $this->doThumbError( $width, $height, 'thumbnail_dest_directory' );
171 }
172
173 // Thumbnail extraction is very inefficient for large files.
174 // Provide a way to pool count limit the number of downloaders.
175 if ( $image->getSize() >= 1e7 ) { // 10MB
176 $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
177 [
178 'doWork' => function () use ( $image ) {
179 return $image->getLocalRefPath();
180 }
181 ]
182 );
183 $srcPath = $work->execute();
184 } else {
185 $srcPath = $image->getLocalRefPath();
186 }
187
188 if ( $srcPath === false ) { // could not download original
189 return $this->doThumbError( $width, $height, 'filemissing' );
190 }
191
192 $cmd = '(' . wfEscapeShellArg(
193 $wgPdfProcessor,
194 "-sDEVICE=jpeg",
195 "-sOutputFile=-",
196 "-dFirstPage={$page}",
197 "-dLastPage={$page}",
198 "-dSAFER",
199 "-r{$wgPdfHandlerDpi}",
200 "-dBATCH",
201 "-dNOPAUSE",
202 "-q",
203 $srcPath
204 );
205 $cmd .= " | " . wfEscapeShellArg(
206 $wgPdfPostProcessor,
207 "-depth",
208 "8",
209 "-quality",
210 $wgPdfHandlerJpegQuality,
211 "-resize",
212 $width,
213 "-",
214 $dstPath
215 );
216 $cmd .= ")";
217
218 wfDebug( __METHOD__ . ": $cmd\n" );
219 $retval = '';
220 $err = wfShellExecWithStderr( $cmd, $retval );
221
222 $removed = $this->removeBadFile( $dstPath, $retval );
223
224 if ( $retval != 0 || $removed ) {
225 wfDebugLog( 'thumbnail',
226 sprintf( 'thumbnail failed on %s: error %d "%s" from "%s"',
227 wfHostname(), $retval, trim( $err ), $cmd ) );
228 return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
229 } else {
230 return new ThumbnailImage( $image, $dstUrl, $dstPath, [
231 'width' => $width,
232 'height' => $height,
233 'page' => $page,
234 ] );
235 }
236 }
237
244 private function getPdfImage( $image, $path ) {
245 if ( !$image ) {
246 $pdfimg = new PdfImage( $path );
247 } elseif ( !isset( $image->pdfImage ) ) {
248 $pdfimg = $image->pdfImage = new PdfImage( $path );
249 } else {
250 $pdfimg = $image->pdfImage;
251 }
252
253 return $pdfimg;
254 }
255
260 private function getMetaArray( $image ) {
261 if ( isset( $image->pdfMetaArray ) ) {
262 return $image->pdfMetaArray;
263 }
264
265 $metadata = $image->getMetadata();
266
267 if ( !$this->isMetadataValid( $image, $metadata ) ) {
268 wfDebug( "Pdf metadata is invalid or missing, should have been fixed in upgradeRow\n" );
269 return false;
270 }
271
272 $work = new PoolCounterWorkViaCallback(
273 'PdfHandler-unserialize-metadata',
274 $image->getName(),
275 [
279 'doWork' => function () use ( $image, $metadata ) {
281 $image->pdfMetaArray = unserialize( $metadata );
283 },
284 ]
285 );
286 $work->execute();
287
288 return $image->pdfMetaArray;
289 }
290
296 public function getImageSize( $image, $path ) {
297 return $this->getPdfImage( $image, $path )->getImageSize();
298 }
299
306 public function getThumbType( $ext, $mime, $params = null ) {
308 static $mime;
309
310 if ( !isset( $mime ) ) {
311 $magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer();
312 $mime = $magic->guessTypesForExtension( $wgPdfOutputExtension );
313 }
314 return [ $wgPdfOutputExtension, $mime ];
315 }
316
322 public function getMetadata( $image, $path ) {
323 return serialize( $this->getPdfImage( $image, $path )->retrieveMetaData() );
324 }
325
331 public function isMetadataValid( $image, $metadata ) {
332 if ( !$metadata || $metadata === serialize( [] ) ) {
333 return self::METADATA_BAD;
334 } elseif ( strpos( $metadata, 'mergedMetadata' ) === false ) {
336 }
337 return self::METADATA_GOOD;
338 }
339
345 public function formatMetadata( $image, $context = false ) {
346 $meta = $image->getMetadata();
347
348 if ( !$meta ) {
349 return false;
350 }
352 $meta = unserialize( $meta );
354
355 if ( !isset( $meta['mergedMetadata'] )
356 || !is_array( $meta['mergedMetadata'] )
357 || count( $meta['mergedMetadata'] ) < 1
358 ) {
359 return false;
360 }
361
362 // Inherited from MediaHandler.
363 return $this->formatMetadataHelper( $meta['mergedMetadata'], $context );
364 }
365
370 public function pageCount( File $image ) {
371 $info = $this->getDimensionInfo( $image );
372
373 return $info ? $info['pageCount'] : false;
374 }
375
381 public function getPageDimensions( File $image, $page ) {
382 $index = $page; // MW starts pages at 1, as they are stored here
383
384 $info = $this->getDimensionInfo( $image );
385 if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
386 return $info['dimensionsByPage'][$index];
387 }
388
389 return false;
390 }
391
392 protected function getDimensionInfo( File $file ) {
393 $cache = ObjectCache::getMainWANInstance();
394 return $cache->getWithSetCallback(
395 $cache->makeKey( 'file-pdf', 'dimensions', $file->getSha1() ),
396 $cache::TTL_INDEFINITE,
397 function () use ( $file ) {
398 $data = $this->getMetaArray( $file );
399 if ( !$data || !isset( $data['Pages'] ) ) {
400 return false;
401 }
402 unset( $data['text'] ); // lower peak RAM
403
404 $dimsByPage = [];
405 $count = intval( $data['Pages'] );
406 for ( $i = 1; $i <= $count; $i++ ) {
407 $dimsByPage[$i] = PdfImage::getPageSize( $data, $i );
408 }
409
410 return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
411 },
412 [ 'pcTTL' => $cache::TTL_INDEFINITE ]
413 );
414 }
415
421 public function getPageText( File $image, $page ) {
422 $data = $this->getMetaArray( $image );
423 if ( !$data || !isset( $data['text'] ) || !isset( $data['text'][$page - 1] ) ) {
424 return false;
425 }
426 return $data['text'][$page - 1];
427 }
428
435 public function getWarningConfig( $file ) {
436 return [
437 'messages' => self::$messages,
438 'link' => '//www.mediawiki.org/wiki/Special:MyLanguage/Help:Security/PDF_files',
439 'module' => 'pdfhandler.messages',
440 ];
441 }
442
447 public static function registerWarningModule( &$resourceLoader ) {
448 $resourceLoader->register( 'pdfhandler.messages', [
449 'messages' => array_values( self::$messages ),
450 ] );
451 }
452}
This list may contain false positives That usually means there is additional text with links below the first Each row contains links to the first and second as well as the first line of the second redirect text
serialize()
unserialize( $serialized)
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfEscapeShellArg(... $args)
Version of escapeshellarg() that works better on Windows.
wfHostname()
Fetch server name for use in error reporting etc.
wfShellExecWithStderr( $cmd, &$retval=null, $environ=[], $limits=[])
Execute a shell command, returning both stdout and stderr.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:52
Media handler abstract base class for images.
normaliseParams( $image, &$params)
const METADATA_COMPATIBLE
formatMetadataHelper( $metadataArray, $context=false)
sorts the visible/invisible field.
const METADATA_GOOD
removeBadFile( $dstPath, $retval=0)
Check for zero-sized thumbnails.
Basic media transform error class.
Copyright © 2007 Martin Seidel (Xarax) jodeldi@gmx.de
getThumbType( $ext, $mime, $params=null)
getMetaArray( $image)
getPdfImage( $image, $path)
static registerWarningModule(&$resourceLoader)
Register a module with the warning messages in it.
validateParam( $name, $value)
getPageDimensions(File $image, $page)
getDimensionInfo(File $file)
getMetadata( $image, $path)
isMetadataValid( $image, $metadata)
formatMetadata( $image, $context=false)
getImageSize( $image, $path)
isMultiPage( $file)
getWarningConfig( $file)
Adds a warning about PDFs being potentially dangerous to the file page.
static $messages
getScriptParams( $params)
getPageText(File $image, $page)
makeParamString( $params)
parseParamString( $str)
pageCount(File $image)
doThumbError( $width, $height, $msg)
doTransform( $image, $dstPath, $dstUrl, $params, $flags=0)
mustRender( $file)
inspired by djvuimage from Brion Vibber modified and written by xarax
Definition PdfImage.php:32
static getPageSize( $data, $page)
Definition PdfImage.php:74
Convenience class for dealing with PoolCounters using callbacks.
execute( $skipcache=false)
Get the result of the work (whatever it is), or the result of the error() function.
Media transform output for images.
Shortcut class for parameter validation errors.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check $image
Definition hooks.txt:886
do that in ParserLimitReportFormat instead use this to modify the parameters of the image all existing parser cache entries will be invalid To avoid you ll need to handle that somehow(e.g. with the RejectParserCacheValue hook) because MediaWiki won 't do it for you. & $defaults also a ContextSource after deleting those rows but within the same transaction you ll probably need to make sure the header is varied on and they can depend only on the ResourceLoaderContext $context
Definition hooks.txt:2848
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
do that in ParserLimitReportFormat instead use this to modify the parameters of the image all existing parser cache entries will be invalid To avoid you ll need to handle that somehow(e.g. with the RejectParserCacheValue hook) because MediaWiki won 't do it for you. & $defaults also a ContextSource after deleting those rows but within the same transaction you ll probably need to make sure the header is varied on and they can depend only on the ResourceLoaderContext such as when responding to a resource loader request or generating HTML output & $resourceLoader
Definition hooks.txt:2859
$data
Utility to generate mapping file used in mw.Title (phpCharToUpper.json)
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback function
Definition injection.txt:30
$cache
Definition mcc.php:33
if(!is_readable( $file)) $ext
Definition router.php:48
$params