MediaWiki  1.34.0
PdfHandler.php
Go to the documentation of this file.
1 <?php
2 
4 
27 class PdfHandler extends ImageHandler {
28  public static $messages = [
29  'main' => 'pdf-file-page-warning',
30  'header' => 'pdf-file-page-warning-header',
31  'info' => 'pdf-file-page-warning-info',
32  'footer' => 'pdf-file-page-warning-footer',
33  ];
34 
38  public function isEnabled() {
39  global $wgPdfProcessor, $wgPdfPostProcessor, $wgPdfInfo;
40 
41  if ( !isset( $wgPdfProcessor ) || !isset( $wgPdfPostProcessor ) || !isset( $wgPdfInfo ) ) {
42  wfDebug( "PdfHandler is disabled, please set the following\n" );
43  wfDebug( "variables in LocalSettings.php:\n" );
44  wfDebug( "\$wgPdfProcessor, \$wgPdfPostProcessor, \$wgPdfInfo\n" );
45  return false;
46  }
47  return true;
48  }
49 
54  public function mustRender( $file ) {
55  return true;
56  }
57 
62  public function isMultiPage( $file ) {
63  return true;
64  }
65 
71  public function validateParam( $name, $value ) {
72  if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
73  // Extra junk on the end of page, probably actually a caption
74  // e.g. [[File:Foo.pdf|thumb|Page 3 of the document shows foo]]
75  return false;
76  }
77  if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) {
78  return ( $value > 0 );
79  }
80  return false;
81  }
82 
87  public function makeParamString( $params ) {
88  $page = $params['page'] ?? 1;
89  if ( !isset( $params['width'] ) ) {
90  return false;
91  }
92  return "page{$page}-{$params['width']}px";
93  }
94 
99  public function parseParamString( $str ) {
100  $m = [];
101 
102  if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
103  return [ 'width' => $m[2], 'page' => $m[1] ];
104  }
105 
106  return false;
107  }
108 
113  public function getScriptParams( $params ) {
114  return [
115  'width' => $params['width'],
116  'page' => $params['page'],
117  ];
118  }
119 
123  public function getParamMap() {
124  return [
125  'img_width' => 'width',
126  'img_page' => 'page',
127  ];
128  }
129 
136  protected function doThumbError( $width, $height, $msg ) {
137  return new MediaTransformError( 'thumbnail_error',
138  $width, $height, wfMessage( $msg )->inContentLanguage()->text() );
139  }
140 
149  public function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
150  global $wgPdfProcessor, $wgPdfPostProcessor, $wgPdfHandlerDpi, $wgPdfHandlerJpegQuality;
151 
152  if ( !$this->normaliseParams( $image, $params ) ) {
153  return new TransformParameterError( $params );
154  }
155 
156  $width = (int)$params['width'];
157  $height = (int)$params['height'];
158  $page = (int)$params['page'];
159 
160  if ( $page > $this->pageCount( $image ) ) {
161  return $this->doThumbError( $width, $height, 'pdf_page_error' );
162  }
163 
164  if ( $flags & self::TRANSFORM_LATER ) {
165  return new ThumbnailImage( $image, $dstUrl, false, [
166  'width' => $width,
167  'height' => $height,
168  'page' => $page,
169  ] );
170  }
171 
172  if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
173  return $this->doThumbError( $width, $height, 'thumbnail_dest_directory' );
174  }
175 
176  // Thumbnail extraction is very inefficient for large files.
177  // Provide a way to pool count limit the number of downloaders.
178  if ( $image->getSize() >= 1e7 ) { // 10MB
179  $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
180  [
181  'doWork' => function () use ( $image ) {
182  return $image->getLocalRefPath();
183  }
184  ]
185  );
186  $srcPath = $work->execute();
187  } else {
188  $srcPath = $image->getLocalRefPath();
189  }
190 
191  if ( $srcPath === false ) { // could not download original
192  return $this->doThumbError( $width, $height, 'filemissing' );
193  }
194 
195  $cmd = '(' . wfEscapeShellArg(
196  $wgPdfProcessor,
197  "-sDEVICE=jpeg",
198  "-sOutputFile=-",
199  "-dFirstPage={$page}",
200  "-dLastPage={$page}",
201  "-dSAFER",
202  "-r{$wgPdfHandlerDpi}",
203  "-dBATCH",
204  "-dNOPAUSE",
205  "-q",
206  $srcPath
207  );
208  $cmd .= " | " . wfEscapeShellArg(
209  $wgPdfPostProcessor,
210  "-depth",
211  "8",
212  "-quality",
213  $wgPdfHandlerJpegQuality,
214  "-resize",
215  $width,
216  "-",
217  $dstPath
218  );
219  $cmd .= ")";
220 
221  wfDebug( __METHOD__ . ": $cmd\n" );
222  $retval = '';
223  $err = wfShellExecWithStderr( $cmd, $retval );
224 
225  $removed = $this->removeBadFile( $dstPath, $retval );
226 
227  if ( $retval != 0 || $removed ) {
228  wfDebugLog( 'thumbnail',
229  sprintf( 'thumbnail failed on %s: error %d "%s" from "%s"',
230  wfHostname(), $retval, trim( $err ), $cmd ) );
231  return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
232  } else {
233  return new ThumbnailImage( $image, $dstUrl, $dstPath, [
234  'width' => $width,
235  'height' => $height,
236  'page' => $page,
237  ] );
238  }
239  }
240 
247  private function getPdfImage( $image, $path ) {
248  if ( !$image ) {
249  $pdfimg = new PdfImage( $path );
250  } elseif ( !isset( $image->pdfImage ) ) {
251  $pdfimg = $image->pdfImage = new PdfImage( $path );
252  } else {
253  $pdfimg = $image->pdfImage;
254  }
255 
256  return $pdfimg;
257  }
258 
263  private function getMetaArray( $image ) {
264  if ( isset( $image->pdfMetaArray ) ) {
265  return $image->pdfMetaArray;
266  }
267 
268  $metadata = $image->getMetadata();
269 
270  if ( !$this->isMetadataValid( $image, $metadata ) ) {
271  wfDebug( "Pdf metadata is invalid or missing, should have been fixed in upgradeRow\n" );
272  return false;
273  }
274 
275  $work = new PoolCounterWorkViaCallback(
276  'PdfHandler-unserialize-metadata',
277  $image->getName(),
278  [
282  'doWork' => function () use ( $image, $metadata ) {
283  Wikimedia\suppressWarnings();
284  $image->pdfMetaArray = unserialize( $metadata );
285  Wikimedia\restoreWarnings();
286  },
287  ]
288  );
289  $work->execute();
290 
291  return $image->pdfMetaArray;
292  }
293 
299  public function getImageSize( $image, $path ) {
300  return $this->getPdfImage( $image, $path )->getImageSize();
301  }
302 
309  public function getThumbType( $ext, $mime, $params = null ) {
310  global $wgPdfOutputExtension;
311  static $mime;
312 
313  if ( !isset( $mime ) ) {
314  $magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer();
315  $mime = $magic->guessTypesForExtension( $wgPdfOutputExtension );
316  }
317  return [ $wgPdfOutputExtension, $mime ];
318  }
319 
325  public function getMetadata( $image, $path ) {
326  return serialize( $this->getPdfImage( $image, $path )->retrieveMetaData() );
327  }
328 
334  public function isMetadataValid( $image, $metadata ) {
335  if ( !$metadata || $metadata === serialize( [] ) ) {
336  return self::METADATA_BAD;
337  } elseif ( strpos( $metadata, 'mergedMetadata' ) === false ) {
339  }
340  return self::METADATA_GOOD;
341  }
342 
348  public function formatMetadata( $image, $context = false ) {
349  $meta = $image->getMetadata();
350 
351  if ( !$meta ) {
352  return false;
353  }
354  Wikimedia\suppressWarnings();
355  $meta = unserialize( $meta );
356  Wikimedia\restoreWarnings();
357 
358  if ( !isset( $meta['mergedMetadata'] )
359  || !is_array( $meta['mergedMetadata'] )
360  || count( $meta['mergedMetadata'] ) < 1
361  ) {
362  return false;
363  }
364 
365  // Inherited from MediaHandler.
366  return $this->formatMetadataHelper( $meta['mergedMetadata'], $context );
367  }
368 
373  public function pageCount( File $image ) {
374  $info = $this->getDimensionInfo( $image );
375 
376  return $info ? $info['pageCount'] : false;
377  }
378 
384  public function getPageDimensions( File $image, $page ) {
385  $index = $page; // MW starts pages at 1, as they are stored here
386 
387  $info = $this->getDimensionInfo( $image );
388  if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
389  return $info['dimensionsByPage'][$index];
390  }
391 
392  return false;
393  }
394 
395  protected function getDimensionInfo( File $file ) {
396  $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
397  return $cache->getWithSetCallback(
398  $cache->makeKey( 'file-pdf', 'dimensions', $file->getSha1() ),
399  $cache::TTL_INDEFINITE,
400  function () use ( $file ) {
401  $data = $this->getMetaArray( $file );
402  if ( !$data || !isset( $data['Pages'] ) ) {
403  return false;
404  }
405  unset( $data['text'] ); // lower peak RAM
406 
407  $dimsByPage = [];
408  $count = intval( $data['Pages'] );
409  for ( $i = 1; $i <= $count; $i++ ) {
410  $dimsByPage[$i] = PdfImage::getPageSize( $data, $i );
411  }
412 
413  return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
414  },
415  [ 'pcTTL' => $cache::TTL_INDEFINITE ]
416  );
417  }
418 
424  public function getPageText( File $image, $page ) {
425  $data = $this->getMetaArray( $image );
426  if ( !$data || !isset( $data['text'] ) || !isset( $data['text'][$page - 1] ) ) {
427  return false;
428  }
429  return $data['text'][$page - 1];
430  }
431 
438  public function getWarningConfig( $file ) {
439  return [
440  'messages' => self::$messages,
441  'link' => '//www.mediawiki.org/wiki/Special:MyLanguage/Help:Security/PDF_files',
442  'module' => 'pdfhandler.messages',
443  ];
444  }
445 
450  public static function registerWarningModule( &$resourceLoader ) {
451  $resourceLoader->register( 'pdfhandler.messages', [
452  'messages' => array_values( self::$messages ),
453  ] );
454  }
455 }
MediaHandler\removeBadFile
removeBadFile( $dstPath, $retval=0)
Check for zero-sized thumbnails.
Definition: MediaHandler.php:675
PdfHandler
Copyright © 2007 Martin Seidel (Xarax) jodeldi@gmx.de
Definition: PdfHandler.php:27
MediaTransformError
Basic media transform error class.
Definition: MediaTransformError.php:29
ThumbnailImage
Media transform output for images.
Definition: ThumbnailImage.php:29
PdfHandler\mustRender
mustRender( $file)
Definition: PdfHandler.php:54
wfMkdirParents
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
Definition: GlobalFunctions.php:1966
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:117
$resourceLoader
$resourceLoader
Definition: load.php:44
PdfHandler\parseParamString
parseParamString( $str)
Definition: PdfHandler.php:99
PdfHandler\isMultiPage
isMultiPage( $file)
Definition: PdfHandler.php:62
PdfHandler\isMetadataValid
isMetadataValid( $image, $metadata)
Definition: PdfHandler.php:334
PdfImage\getPageSize
static getPageSize( $data, $page)
Definition: PdfImage.php:74
PdfHandler\makeParamString
makeParamString( $params)
Definition: PdfHandler.php:87
$file
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
wfHostname
wfHostname()
Fetch server name for use in error reporting etc.
Definition: GlobalFunctions.php:1326
PdfHandler\getDimensionInfo
getDimensionInfo(File $file)
Definition: PdfHandler.php:395
wfMessage
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Definition: GlobalFunctions.php:1264
PdfHandler\getWarningConfig
getWarningConfig( $file)
Adds a warning about PDFs being potentially dangerous to the file page.
Definition: PdfHandler.php:438
PdfHandler\getPageText
getPageText(File $image, $page)
Definition: PdfHandler.php:424
PoolCounterWorkViaCallback
Convenience class for dealing with PoolCounters using callbacks.
Definition: PoolCounterWorkViaCallback.php:28
serialize
serialize()
Definition: ApiMessageTrait.php:138
wfDebugLog
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Definition: GlobalFunctions.php:1007
PdfImage
inspired by djvuimage from Brion Vibber modified and written by xarax
Definition: PdfImage.php:32
MediaWiki\MediaWikiServices\getInstance
static getInstance()
Returns the global default instance of the top level service locator.
Definition: MediaWikiServices.php:138
MediaHandler\METADATA_COMPATIBLE
const METADATA_COMPATIBLE
Definition: MediaHandler.php:34
File
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition: File.php:61
PoolCounterWork\execute
execute( $skipcache=false)
Get the result of the work (whatever it is), or the result of the error() function.
Definition: PoolCounterWork.php:106
ImageHandler
Media handler abstract base class for images.
Definition: ImageHandler.php:29
PdfHandler\pageCount
pageCount(File $image)
Definition: PdfHandler.php:373
PdfHandler\registerWarningModule
static registerWarningModule(&$resourceLoader)
Register a module with the warning messages in it.
Definition: PdfHandler.php:450
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:913
ImageHandler\normaliseParams
normaliseParams( $image, &$params)
Definition: ImageHandler.php:86
PdfHandler\getMetaArray
getMetaArray( $image)
Definition: PdfHandler.php:263
PdfHandler\getImageSize
getImageSize( $image, $path)
Definition: PdfHandler.php:299
PdfHandler\doThumbError
doThumbError( $width, $height, $msg)
Definition: PdfHandler.php:136
PdfHandler\getMetadata
getMetadata( $image, $path)
Definition: PdfHandler.php:325
TransformParameterError
Shortcut class for parameter validation errors.
Definition: TransformParameterError.php:29
PdfHandler\formatMetadata
formatMetadata( $image, $context=false)
Definition: PdfHandler.php:348
PdfHandler\getScriptParams
getScriptParams( $params)
Definition: PdfHandler.php:113
$context
$context
Definition: load.php:45
unserialize
unserialize( $serialized)
Definition: ApiMessageTrait.php:146
PdfHandler\doTransform
doTransform( $image, $dstPath, $dstUrl, $params, $flags=0)
Definition: PdfHandler.php:149
PdfHandler\getParamMap
getParamMap()
Definition: PdfHandler.php:123
$cache
$cache
Definition: mcc.php:33
MediaHandler\formatMetadataHelper
formatMetadataHelper( $metadataArray, $context=false)
sorts the visible/invisible field.
Definition: MediaHandler.php:494
$path
$path
Definition: NoLocalSettings.php:25
MediaHandler\METADATA_BAD
const METADATA_BAD
Definition: MediaHandler.php:33
wfEscapeShellArg
wfEscapeShellArg(... $args)
Version of escapeshellarg() that works better on Windows.
Definition: GlobalFunctions.php:2099
$ext
if(!is_readable( $file)) $ext
Definition: router.php:48
PdfHandler\getPageDimensions
getPageDimensions(File $image, $page)
Definition: PdfHandler.php:384
PdfHandler\isEnabled
isEnabled()
Definition: PdfHandler.php:38
PdfHandler\getThumbType
getThumbType( $ext, $mime, $params=null)
Definition: PdfHandler.php:309
PdfHandler\getPdfImage
getPdfImage( $image, $path)
Definition: PdfHandler.php:247
PdfHandler\$messages
static $messages
Definition: PdfHandler.php:28
MediaHandler\METADATA_GOOD
const METADATA_GOOD
Definition: MediaHandler.php:32
wfShellExecWithStderr
wfShellExecWithStderr( $cmd, &$retval=null, $environ=[], $limits=[])
Execute a shell command, returning both stdout and stderr.
Definition: GlobalFunctions.php:2180
PdfHandler\validateParam
validateParam( $name, $value)
Definition: PdfHandler.php:71