MediaWiki REL1_34
PdfHandler.php
Go to the documentation of this file.
1<?php
2
4
27class PdfHandler extends ImageHandler {
28 public static $messages = [
29 'main' => 'pdf-file-page-warning',
30 'header' => 'pdf-file-page-warning-header',
31 'info' => 'pdf-file-page-warning-info',
32 'footer' => 'pdf-file-page-warning-footer',
33 ];
34
38 public function isEnabled() {
39 global $wgPdfProcessor, $wgPdfPostProcessor, $wgPdfInfo;
40
41 if ( !isset( $wgPdfProcessor ) || !isset( $wgPdfPostProcessor ) || !isset( $wgPdfInfo ) ) {
42 wfDebug( "PdfHandler is disabled, please set the following\n" );
43 wfDebug( "variables in LocalSettings.php:\n" );
44 wfDebug( "\$wgPdfProcessor, \$wgPdfPostProcessor, \$wgPdfInfo\n" );
45 return false;
46 }
47 return true;
48 }
49
54 public function mustRender( $file ) {
55 return true;
56 }
57
62 public function isMultiPage( $file ) {
63 return true;
64 }
65
71 public function validateParam( $name, $value ) {
72 if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
73 // Extra junk on the end of page, probably actually a caption
74 // e.g. [[File:Foo.pdf|thumb|Page 3 of the document shows foo]]
75 return false;
76 }
77 if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) {
78 return ( $value > 0 );
79 }
80 return false;
81 }
82
87 public function makeParamString( $params ) {
88 $page = $params['page'] ?? 1;
89 if ( !isset( $params['width'] ) ) {
90 return false;
91 }
92 return "page{$page}-{$params['width']}px";
93 }
94
99 public function parseParamString( $str ) {
100 $m = [];
101
102 if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
103 return [ 'width' => $m[2], 'page' => $m[1] ];
104 }
105
106 return false;
107 }
108
113 public function getScriptParams( $params ) {
114 return [
115 'width' => $params['width'],
116 'page' => $params['page'],
117 ];
118 }
119
123 public function getParamMap() {
124 return [
125 'img_width' => 'width',
126 'img_page' => 'page',
127 ];
128 }
129
136 protected function doThumbError( $width, $height, $msg ) {
137 return new MediaTransformError( 'thumbnail_error',
138 $width, $height, wfMessage( $msg )->inContentLanguage()->text() );
139 }
140
149 public function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
150 global $wgPdfProcessor, $wgPdfPostProcessor, $wgPdfHandlerDpi, $wgPdfHandlerJpegQuality;
151
152 if ( !$this->normaliseParams( $image, $params ) ) {
153 return new TransformParameterError( $params );
154 }
155
156 $width = (int)$params['width'];
157 $height = (int)$params['height'];
158 $page = (int)$params['page'];
159
160 if ( $page > $this->pageCount( $image ) ) {
161 return $this->doThumbError( $width, $height, 'pdf_page_error' );
162 }
163
164 if ( $flags & self::TRANSFORM_LATER ) {
165 return new ThumbnailImage( $image, $dstUrl, false, [
166 'width' => $width,
167 'height' => $height,
168 'page' => $page,
169 ] );
170 }
171
172 if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
173 return $this->doThumbError( $width, $height, 'thumbnail_dest_directory' );
174 }
175
176 // Thumbnail extraction is very inefficient for large files.
177 // Provide a way to pool count limit the number of downloaders.
178 if ( $image->getSize() >= 1e7 ) { // 10MB
179 $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
180 [
181 'doWork' => function () use ( $image ) {
182 return $image->getLocalRefPath();
183 }
184 ]
185 );
186 $srcPath = $work->execute();
187 } else {
188 $srcPath = $image->getLocalRefPath();
189 }
190
191 if ( $srcPath === false ) { // could not download original
192 return $this->doThumbError( $width, $height, 'filemissing' );
193 }
194
195 $cmd = '(' . wfEscapeShellArg(
196 $wgPdfProcessor,
197 "-sDEVICE=jpeg",
198 "-sOutputFile=-",
199 "-dFirstPage={$page}",
200 "-dLastPage={$page}",
201 "-dSAFER",
202 "-r{$wgPdfHandlerDpi}",
203 "-dBATCH",
204 "-dNOPAUSE",
205 "-q",
206 $srcPath
207 );
208 $cmd .= " | " . wfEscapeShellArg(
209 $wgPdfPostProcessor,
210 "-depth",
211 "8",
212 "-quality",
213 $wgPdfHandlerJpegQuality,
214 "-resize",
215 $width,
216 "-",
217 $dstPath
218 );
219 $cmd .= ")";
220
221 wfDebug( __METHOD__ . ": $cmd\n" );
222 $retval = '';
223 $err = wfShellExecWithStderr( $cmd, $retval );
224
225 $removed = $this->removeBadFile( $dstPath, $retval );
226
227 if ( $retval != 0 || $removed ) {
228 wfDebugLog( 'thumbnail',
229 sprintf( 'thumbnail failed on %s: error %d "%s" from "%s"',
230 wfHostname(), $retval, trim( $err ), $cmd ) );
231 return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
232 } else {
233 return new ThumbnailImage( $image, $dstUrl, $dstPath, [
234 'width' => $width,
235 'height' => $height,
236 'page' => $page,
237 ] );
238 }
239 }
240
247 private function getPdfImage( $image, $path ) {
248 if ( !$image ) {
249 $pdfimg = new PdfImage( $path );
250 } elseif ( !isset( $image->pdfImage ) ) {
251 $pdfimg = $image->pdfImage = new PdfImage( $path );
252 } else {
253 $pdfimg = $image->pdfImage;
254 }
255
256 return $pdfimg;
257 }
258
263 private function getMetaArray( $image ) {
264 if ( isset( $image->pdfMetaArray ) ) {
265 return $image->pdfMetaArray;
266 }
267
268 $metadata = $image->getMetadata();
269
270 if ( !$this->isMetadataValid( $image, $metadata ) ) {
271 wfDebug( "Pdf metadata is invalid or missing, should have been fixed in upgradeRow\n" );
272 return false;
273 }
274
275 $work = new PoolCounterWorkViaCallback(
276 'PdfHandler-unserialize-metadata',
277 $image->getName(),
278 [
282 'doWork' => function () use ( $image, $metadata ) {
283 Wikimedia\suppressWarnings();
284 $image->pdfMetaArray = unserialize( $metadata );
285 Wikimedia\restoreWarnings();
286 },
287 ]
288 );
289 $work->execute();
290
291 return $image->pdfMetaArray;
292 }
293
299 public function getImageSize( $image, $path ) {
300 return $this->getPdfImage( $image, $path )->getImageSize();
301 }
302
309 public function getThumbType( $ext, $mime, $params = null ) {
310 global $wgPdfOutputExtension;
311 static $mime;
312
313 if ( !isset( $mime ) ) {
314 $magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer();
315 $mime = $magic->guessTypesForExtension( $wgPdfOutputExtension );
316 }
317 return [ $wgPdfOutputExtension, $mime ];
318 }
319
325 public function getMetadata( $image, $path ) {
326 return serialize( $this->getPdfImage( $image, $path )->retrieveMetaData() );
327 }
328
334 public function isMetadataValid( $image, $metadata ) {
335 if ( !$metadata || $metadata === serialize( [] ) ) {
336 return self::METADATA_BAD;
337 } elseif ( strpos( $metadata, 'mergedMetadata' ) === false ) {
339 }
340 return self::METADATA_GOOD;
341 }
342
348 public function formatMetadata( $image, $context = false ) {
349 $meta = $image->getMetadata();
350
351 if ( !$meta ) {
352 return false;
353 }
354 Wikimedia\suppressWarnings();
355 $meta = unserialize( $meta );
356 Wikimedia\restoreWarnings();
357
358 if ( !isset( $meta['mergedMetadata'] )
359 || !is_array( $meta['mergedMetadata'] )
360 || count( $meta['mergedMetadata'] ) < 1
361 ) {
362 return false;
363 }
364
365 // Inherited from MediaHandler.
366 return $this->formatMetadataHelper( $meta['mergedMetadata'], $context );
367 }
368
373 public function pageCount( File $image ) {
374 $info = $this->getDimensionInfo( $image );
375
376 return $info ? $info['pageCount'] : false;
377 }
378
384 public function getPageDimensions( File $image, $page ) {
385 $index = $page; // MW starts pages at 1, as they are stored here
386
387 $info = $this->getDimensionInfo( $image );
388 if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
389 return $info['dimensionsByPage'][$index];
390 }
391
392 return false;
393 }
394
395 protected function getDimensionInfo( File $file ) {
396 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
397 return $cache->getWithSetCallback(
398 $cache->makeKey( 'file-pdf', 'dimensions', $file->getSha1() ),
399 $cache::TTL_INDEFINITE,
400 function () use ( $file ) {
401 $data = $this->getMetaArray( $file );
402 if ( !$data || !isset( $data['Pages'] ) ) {
403 return false;
404 }
405 unset( $data['text'] ); // lower peak RAM
406
407 $dimsByPage = [];
408 $count = intval( $data['Pages'] );
409 for ( $i = 1; $i <= $count; $i++ ) {
410 $dimsByPage[$i] = PdfImage::getPageSize( $data, $i );
411 }
412
413 return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
414 },
415 [ 'pcTTL' => $cache::TTL_INDEFINITE ]
416 );
417 }
418
424 public function getPageText( File $image, $page ) {
425 $data = $this->getMetaArray( $image );
426 if ( !$data || !isset( $data['text'] ) || !isset( $data['text'][$page - 1] ) ) {
427 return false;
428 }
429 return $data['text'][$page - 1];
430 }
431
438 public function getWarningConfig( $file ) {
439 return [
440 'messages' => self::$messages,
441 'link' => '//www.mediawiki.org/wiki/Special:MyLanguage/Help:Security/PDF_files',
442 'module' => 'pdfhandler.messages',
443 ];
444 }
445
450 public static function registerWarningModule( &$resourceLoader ) {
451 $resourceLoader->register( 'pdfhandler.messages', [
452 'messages' => array_values( self::$messages ),
453 ] );
454 }
455}
serialize()
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfEscapeShellArg(... $args)
Version of escapeshellarg() that works better on Windows.
wfHostname()
Get host name of the current machine, for use in error reporting.
wfShellExecWithStderr( $cmd, &$retval=null, $environ=[], $limits=[])
Execute a shell command, returning both stdout and stderr.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:61
Media handler abstract base class for images.
normaliseParams( $image, &$params)
const METADATA_COMPATIBLE
formatMetadataHelper( $metadataArray, $context=false)
sorts the visible/invisible field.
const METADATA_GOOD
removeBadFile( $dstPath, $retval=0)
Check for zero-sized thumbnails.
Basic media transform error class.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Copyright © 2007 Martin Seidel (Xarax) jodeldi@gmx.de
getThumbType( $ext, $mime, $params=null)
getMetaArray( $image)
getPdfImage( $image, $path)
static registerWarningModule(&$resourceLoader)
Register a module with the warning messages in it.
validateParam( $name, $value)
getPageDimensions(File $image, $page)
getDimensionInfo(File $file)
getMetadata( $image, $path)
isMetadataValid( $image, $metadata)
formatMetadata( $image, $context=false)
getImageSize( $image, $path)
isMultiPage( $file)
getWarningConfig( $file)
Adds a warning about PDFs being potentially dangerous to the file page.
static $messages
getScriptParams( $params)
getPageText(File $image, $page)
makeParamString( $params)
parseParamString( $str)
pageCount(File $image)
doThumbError( $width, $height, $msg)
doTransform( $image, $dstPath, $dstUrl, $params, $flags=0)
mustRender( $file)
inspired by djvuimage from Brion Vibber modified and written by xarax
Definition PdfImage.php:32
static getPageSize( $data, $page)
Definition PdfImage.php:74
Convenience class for dealing with PoolCounters using callbacks.
execute( $skipcache=false)
Get the result of the work (whatever it is), or the result of the error() function.
Media transform output for images.
Shortcut class for parameter validation errors.
$resourceLoader
Definition load.php:44
$context
Definition load.php:45
$cache
Definition mcc.php:33
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition router.php:42
if(!is_readable( $file)) $ext
Definition router.php:48