MediaWiki REL1_30
PdfHandler_body.php
Go to the documentation of this file.
1<?php
24class PdfHandler extends ImageHandler {
25 public static $messages = [
26 'main' => 'pdf-file-page-warning',
27 'header' => 'pdf-file-page-warning-header',
28 'info' => 'pdf-file-page-warning-info',
29 'footer' => 'pdf-file-page-warning-footer',
30 ];
31
35 function isEnabled() {
36 global $wgPdfProcessor, $wgPdfPostProcessor, $wgPdfInfo;
37
38 if ( !isset( $wgPdfProcessor ) || !isset( $wgPdfPostProcessor ) || !isset( $wgPdfInfo ) ) {
39 wfDebug( "PdfHandler is disabled, please set the following\n" );
40 wfDebug( "variables in LocalSettings.php:\n" );
41 wfDebug( "\$wgPdfProcessor, \$wgPdfPostProcessor, \$wgPdfInfo\n" );
42 return false;
43 }
44 return true;
45 }
46
51 function mustRender( $file ) {
52 return true;
53 }
54
59 function isMultiPage( $file ) {
60 return true;
61 }
62
68 function validateParam( $name, $value ) {
69 if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
70 // Extra junk on the end of page, probably actually a caption
71 // e.g. [[File:Foo.pdf|thumb|Page 3 of the document shows foo]]
72 return false;
73 }
74 if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) {
75 return ( $value > 0 );
76 }
77 return false;
78 }
79
85 $page = isset( $params['page'] ) ? $params['page'] : 1;
86 if ( !isset( $params['width'] ) ) {
87 return false;
88 }
89 return "page{$page}-{$params['width']}px";
90 }
91
96 function parseParamString( $str ) {
97 $m = false;
98
99 if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
100 return [ 'width' => $m[2], 'page' => $m[1] ];
101 }
102
103 return false;
104 }
105
111 return [
112 'width' => $params['width'],
113 'page' => $params['page'],
114 ];
115 }
116
120 function getParamMap() {
121 return [
122 'img_width' => 'width',
123 'img_page' => 'page',
124 ];
125 }
126
133 protected function doThumbError( $width, $height, $msg ) {
134 return new MediaTransformError( 'thumbnail_error',
135 $width, $height, wfMessage( $msg )->inContentLanguage()->text() );
136 }
137
146 function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
147 global $wgPdfProcessor, $wgPdfPostProcessor, $wgPdfHandlerDpi, $wgPdfHandlerJpegQuality;
148
149 if ( !$this->normaliseParams( $image, $params ) ) {
150 return new TransformParameterError( $params );
151 }
152
153 $width = (int)$params['width'];
154 $height = (int)$params['height'];
155 $page = (int)$params['page'];
156
157 if ( $page > $this->pageCount( $image ) ) {
158 return $this->doThumbError( $width, $height, 'pdf_page_error' );
159 }
160
161 if ( $flags & self::TRANSFORM_LATER ) {
162 return new ThumbnailImage( $image, $dstUrl, $width, $height, false, $page );
163 }
164
165 if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
166 return $this->doThumbError( $width, $height, 'thumbnail_dest_directory' );
167 }
168
169 // Thumbnail extraction is very inefficient for large files.
170 // Provide a way to pool count limit the number of downloaders.
171 if ( $image->getSize() >= 1e7 ) { // 10MB
172 $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
173 [
174 'doWork' => function () use ( $image ) {
175 return $image->getLocalRefPath();
176 }
177 ]
178 );
179 $srcPath = $work->execute();
180 } else {
181 $srcPath = $image->getLocalRefPath();
182 }
183
184 if ( $srcPath === false ) { // could not download original
185 return $this->doThumbError( $width, $height, 'filemissing' );
186 }
187
188 $cmd = '(' . wfEscapeShellArg(
189 $wgPdfProcessor,
190 "-sDEVICE=jpeg",
191 "-sOutputFile=-",
192 "-dFirstPage={$page}",
193 "-dLastPage={$page}",
194 "-dSAFER",
195 "-r{$wgPdfHandlerDpi}",
196 "-dBATCH",
197 "-dNOPAUSE",
198 "-q",
199 $srcPath
200 );
201 $cmd .= " | " . wfEscapeShellArg(
202 $wgPdfPostProcessor,
203 "-depth",
204 "8",
205 "-quality",
206 $wgPdfHandlerJpegQuality,
207 "-resize",
208 $width,
209 "-",
210 $dstPath
211 );
212 $cmd .= ")";
213
214 wfDebug( __METHOD__ . ": $cmd\n" );
215 $retval = '';
216 $err = wfShellExecWithStderr( $cmd, $retval );
217
218 $removed = $this->removeBadFile( $dstPath, $retval );
219
220 if ( $retval != 0 || $removed ) {
221 wfDebugLog( 'thumbnail',
222 sprintf( 'thumbnail failed on %s: error %d "%s" from "%s"',
223 wfHostname(), $retval, trim( $err ), $cmd ) );
224 return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
225 } else {
226 return new ThumbnailImage( $image, $dstUrl, $width, $height, $dstPath, $page );
227 }
228 }
229
235 function getPdfImage( $image, $path ) {
236 if ( !$image ) {
237 $pdfimg = new PdfImage( $path );
238 } elseif ( !isset( $image->pdfImage ) ) {
239 $pdfimg = $image->pdfImage = new PdfImage( $path );
240 } else {
241 $pdfimg = $image->pdfImage;
242 }
243
244 return $pdfimg;
245 }
246
251 function getMetaArray( $image ) {
252 if ( isset( $image->pdfMetaArray ) ) {
253 return $image->pdfMetaArray;
254 }
255
256 $metadata = $image->getMetadata();
257
258 if ( !$this->isMetadataValid( $image, $metadata ) ) {
259 wfDebug( "Pdf metadata is invalid or missing, should have been fixed in upgradeRow\n" );
260 return false;
261 }
262
263 $work = new PoolCounterWorkViaCallback(
264 'PdfHandler-unserialize-metadata',
265 $image->getName(),
266 [
267 'doWork' => function () use ( $image, $metadata ) {
269 $image->pdfMetaArray = unserialize( $metadata );
271 },
272 ]
273 );
274 $work->execute();
275
276 return $image->pdfMetaArray;
277 }
278
284 function getImageSize( $image, $path ) {
285 return $this->getPdfImage( $image, $path )->getImageSize();
286 }
287
294 function getThumbType( $ext, $mime, $params = null ) {
295 global $wgPdfOutputExtension;
296 static $mime;
297
298 if ( !isset( $mime ) ) {
299 $magic = MimeMagic::singleton();
300 $mime = $magic->guessTypesForExtension( $wgPdfOutputExtension );
301 }
302 return [ $wgPdfOutputExtension, $mime ];
303 }
304
310 function getMetadata( $image, $path ) {
311 return serialize( $this->getPdfImage( $image, $path )->retrieveMetaData() );
312 }
313
319 function isMetadataValid( $image, $metadata ) {
320 if ( !$metadata || $metadata === serialize( [] ) ) {
321 return self::METADATA_BAD;
322 } elseif ( strpos( $metadata, 'mergedMetadata' ) === false ) {
324 }
325 return self::METADATA_GOOD;
326 }
327
333 function formatMetadata( $image, $context = false ) {
334 $meta = $image->getMetadata();
335
336 if ( !$meta ) {
337 return false;
338 }
340 $meta = unserialize( $meta );
342
343 if ( !isset( $meta['mergedMetadata'] )
344 || !is_array( $meta['mergedMetadata'] )
345 || count( $meta['mergedMetadata'] ) < 1
346 ) {
347 return false;
348 }
349
350 // Inherited from MediaHandler.
351 return $this->formatMetadataHelper( $meta['mergedMetadata'], $context );
352 }
353
358 function pageCount( File $image ) {
359 $info = $this->getDimensionInfo( $image );
360
361 return $info ? $info['pageCount'] : false;
362 }
363
369 function getPageDimensions( File $image, $page ) {
370 $index = $page; // MW starts pages at 1, as they are stored here
371
372 $info = $this->getDimensionInfo( $image );
373 if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
374 return $info['dimensionsByPage'][$index];
375 }
376
377 return false;
378 }
379
380 protected function getDimensionInfo( File $file ) {
381 $cache = ObjectCache::getMainWANInstance();
382 return $cache->getWithSetCallback(
383 $cache->makeKey( 'file-pdf', 'dimensions', $file->getSha1() ),
384 $cache::TTL_INDEFINITE,
385 function () use ( $file ) {
386 $data = $this->getMetaArray( $file );
387 if ( !$data || !isset( $data['Pages'] ) ) {
388 return false;
389 }
390 unset( $data['text'] ); // lower peak RAM
391
392 $dimsByPage = [];
393 $count = intval( $data['Pages'] );
394 for ( $i = 1; $i <= $count; $i++ ) {
395 $dimsByPage[$i] = PdfImage::getPageSize( $data, $i );
396 }
397
398 return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
399 },
400 [ 'pcTTL' => $cache::TTL_INDEFINITE ]
401 );
402 }
403
409 function getPageText( File $image, $page ) {
410 $data = $this->getMetaArray( $image );
411 if ( !$data || !isset( $data['text'] ) || !isset( $data['text'][$page - 1] ) ) {
412 return false;
413 }
414 return $data['text'][$page - 1];
415 }
416
423 function getWarningConfig( $file ) {
424 return [
425 'messages' => self::$messages,
426 'link' => '//www.mediawiki.org/wiki/Special:MyLanguage/Help:Security/PDF_files',
427 'module' => 'pdfhandler.messages',
428 ];
429 }
430
436 $resourceLoader->register( 'pdfhandler.messages', [
437 'messages' => array_values( self::$messages ),
438 ] );
439 }
440}
serialize()
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfRestoreWarnings()
wfHostname()
Fetch server name for use in error reporting etc.
wfEscapeShellArg()
Version of escapeshellarg() that works better on Windows.
wfShellExecWithStderr( $cmd, &$retval=null, $environ=[], $limits=[])
Execute a shell command, returning both stdout and stderr.
wfSuppressWarnings( $end=false)
Reference-counted warning suppression.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:51
Media handler abstract base class for images.
normaliseParams( $image, &$params)
const METADATA_COMPATIBLE
formatMetadataHelper( $metadataArray, $context=false)
sorts the visible/invisible field.
const METADATA_GOOD
removeBadFile( $dstPath, $retval=0)
Check for zero-sized thumbnails.
Basic media transform error class.
static singleton()
Get an instance of this class.
Definition MimeMagic.php:33
Copyright © 2007 Martin Seidel (Xarax) jodeldi@gmx.de
getThumbType( $ext, $mime, $params=null)
getMetaArray( $image)
getPdfImage( $image, $path)
static registerWarningModule(&$resourceLoader)
Register a module with the warning messages in it.
validateParam( $name, $value)
getPageDimensions(File $image, $page)
getDimensionInfo(File $file)
getMetadata( $image, $path)
isMetadataValid( $image, $metadata)
formatMetadata( $image, $context=false)
getImageSize( $image, $path)
isMultiPage( $file)
getWarningConfig( $file)
Adds a warning about PDFs being potentially dangerous to the file page.
getScriptParams( $params)
getPageText(File $image, $page)
makeParamString( $params)
parseParamString( $str)
pageCount(File $image)
doThumbError( $width, $height, $msg)
doTransform( $image, $dstPath, $dstUrl, $params, $flags=0)
mustRender( $file)
inspired by djvuimage from Brion Vibber modified and written by xarax
static getPageSize( $data, $page)
Convenience class for dealing with PoolCounters using callbacks.
execute( $skipcache=false)
Get the result of the work (whatever it is), or the result of the error() function.
Media transform output for images.
Shortcut class for parameter validation errors.
when a variable name is used in a function
Definition design.txt:94
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition design.txt:18
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account incomplete not yet checked for validity & $retval
Definition hooks.txt:266
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check $image
Definition hooks.txt:893
do that in ParserLimitReportFormat instead use this to modify the parameters of the image all existing parser cache entries will be invalid To avoid you ll need to handle that somehow(e.g. with the RejectParserCacheValue hook) because MediaWiki won 't do it for you. & $defaults also a ContextSource after deleting those rows but within the same transaction you ll probably need to make sure the header is varied on and they can depend only on the ResourceLoaderContext $context
Definition hooks.txt:2780
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition hooks.txt:2805
do that in ParserLimitReportFormat instead use this to modify the parameters of the image all existing parser cache entries will be invalid To avoid you ll need to handle that somehow(e.g. with the RejectParserCacheValue hook) because MediaWiki won 't do it for you. & $defaults also a ContextSource after deleting those rows but within the same transaction you ll probably need to make sure the header is varied on and they can depend only on the ResourceLoaderContext such as when responding to a resource loader request or generating HTML output & $resourceLoader
Definition hooks.txt:2787
$cache
Definition mcc.php:33
if( $ext=='php'|| $ext=='php5') $mime
Definition router.php:59
$params