MediaWiki REL1_33
Go to the documentation of this file.
24class PdfHandler extends ImageHandler {
25 public static $messages = [
26 'main' => 'pdf-file-page-warning',
27 'header' => 'pdf-file-page-warning-header',
28 'info' => 'pdf-file-page-warning-info',
29 'footer' => 'pdf-file-page-warning-footer',
30 ];
35 public function isEnabled() {
38 if ( !isset( $wgPdfProcessor ) || !isset( $wgPdfPostProcessor ) || !isset( $wgPdfInfo ) ) {
39 wfDebug( "PdfHandler is disabled, please set the following\n" );
40 wfDebug( "variables in LocalSettings.php:\n" );
41 wfDebug( "\$wgPdfProcessor, \$wgPdfPostProcessor, \$wgPdfInfo\n" );
42 return false;
43 }
44 return true;
45 }
51 public function mustRender( $file ) {
52 return true;
53 }
59 public function isMultiPage( $file ) {
60 return true;
61 }
68 public function validateParam( $name, $value ) {
69 if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
70 // Extra junk on the end of page, probably actually a caption
71 // e.g. [[File:Foo.pdf|thumb|Page 3 of the document shows foo]]
72 return false;
73 }
74 if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) {
75 return ( $value > 0 );
76 }
77 return false;
78 }
84 public function makeParamString( $params ) {
85 $page = $params['page'] ?? 1;
86 if ( !isset( $params['width'] ) ) {
87 return false;
88 }
89 return "page{$page}-{$params['width']}px";
90 }
96 public function parseParamString( $str ) {
97 $m = [];
99 if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
100 return [ 'width' => $m[2], 'page' => $m[1] ];
101 }
103 return false;
104 }
110 public function getScriptParams( $params ) {
111 return [
112 'width' => $params['width'],
113 'page' => $params['page'],
114 ];
115 }
120 public function getParamMap() {
121 return [
122 'img_width' => 'width',
123 'img_page' => 'page',
124 ];
125 }
133 protected function doThumbError( $width, $height, $msg ) {
134 return new MediaTransformError( 'thumbnail_error',
135 $width, $height, wfMessage( $msg )->inContentLanguage()->text() );
136 }
146 public function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
149 if ( !$this->normaliseParams( $image, $params ) ) {
150 return new TransformParameterError( $params );
151 }
153 $width = (int)$params['width'];
154 $height = (int)$params['height'];
155 $page = (int)$params['page'];
157 if ( $page > $this->pageCount( $image ) ) {
158 return $this->doThumbError( $width, $height, 'pdf_page_error' );
159 }
161 if ( $flags & self::TRANSFORM_LATER ) {
162 return new ThumbnailImage( $image, $dstUrl, false, [
163 'width' => $width,
164 'height' => $height,
165 'page' => $page,
166 ] );
167 }
169 if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
170 return $this->doThumbError( $width, $height, 'thumbnail_dest_directory' );
171 }
173 // Thumbnail extraction is very inefficient for large files.
174 // Provide a way to pool count limit the number of downloaders.
175 if ( $image->getSize() >= 1e7 ) { // 10MB
176 $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
177 [
178 'doWork' => function () use ( $image ) {
179 return $image->getLocalRefPath();
180 }
181 ]
182 );
183 $srcPath = $work->execute();
184 } else {
185 $srcPath = $image->getLocalRefPath();
186 }
188 if ( $srcPath === false ) { // could not download original
189 return $this->doThumbError( $width, $height, 'filemissing' );
190 }
192 $cmd = '(' . wfEscapeShellArg(
193 $wgPdfProcessor,
194 "-sDEVICE=jpeg",
195 "-sOutputFile=-",
196 "-dFirstPage={$page}",
197 "-dLastPage={$page}",
198 "-dSAFER",
199 "-r{$wgPdfHandlerDpi}",
200 "-dBATCH",
201 "-dNOPAUSE",
202 "-q",
203 $srcPath
204 );
205 $cmd .= " | " . wfEscapeShellArg(
206 $wgPdfPostProcessor,
207 "-depth",
208 "8",
209 "-quality",
210 $wgPdfHandlerJpegQuality,
211 "-resize",
212 $width,
213 "-",
214 $dstPath
215 );
216 $cmd .= ")";
218 wfDebug( __METHOD__ . ": $cmd\n" );
219 $retval = '';
220 $err = wfShellExecWithStderr( $cmd, $retval );
222 $removed = $this->removeBadFile( $dstPath, $retval );
224 if ( $retval != 0 || $removed ) {
225 wfDebugLog( 'thumbnail',
226 sprintf( 'thumbnail failed on %s: error %d "%s" from "%s"',
227 wfHostname(), $retval, trim( $err ), $cmd ) );
228 return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
229 } else {
230 return new ThumbnailImage( $image, $dstUrl, $dstPath, [
231 'width' => $width,
232 'height' => $height,
233 'page' => $page,
234 ] );
235 }
236 }
244 private function getPdfImage( $image, $path ) {
245 if ( !$image ) {
246 $pdfimg = new PdfImage( $path );
247 } elseif ( !isset( $image->pdfImage ) ) {
248 $pdfimg = $image->pdfImage = new PdfImage( $path );
249 } else {
250 $pdfimg = $image->pdfImage;
251 }
253 return $pdfimg;
254 }
260 private function getMetaArray( $image ) {
261 if ( isset( $image->pdfMetaArray ) ) {
262 return $image->pdfMetaArray;
263 }
265 $metadata = $image->getMetadata();
267 if ( !$this->isMetadataValid( $image, $metadata ) ) {
268 wfDebug( "Pdf metadata is invalid or missing, should have been fixed in upgradeRow\n" );
269 return false;
270 }
272 $work = new PoolCounterWorkViaCallback(
273 'PdfHandler-unserialize-metadata',
274 $image->getName(),
275 [
279 'doWork' => function () use ( $image, $metadata ) {
281 $image->pdfMetaArray = unserialize( $metadata );
283 },
284 ]
285 );
286 $work->execute();
288 return $image->pdfMetaArray;
289 }
296 public function getImageSize( $image, $path ) {
297 return $this->getPdfImage( $image, $path )->getImageSize();
298 }
306 public function getThumbType( $ext, $mime, $params = null ) {
308 static $mime;
310 if ( !isset( $mime ) ) {
311 $magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer();
312 $mime = $magic->guessTypesForExtension( $wgPdfOutputExtension );
313 }
314 return [ $wgPdfOutputExtension, $mime ];
315 }
322 public function getMetadata( $image, $path ) {
323 return serialize( $this->getPdfImage( $image, $path )->retrieveMetaData() );
324 }
331 public function isMetadataValid( $image, $metadata ) {
332 if ( !$metadata || $metadata === serialize( [] ) ) {
333 return self::METADATA_BAD;
334 } elseif ( strpos( $metadata, 'mergedMetadata' ) === false ) {
336 }
337 return self::METADATA_GOOD;
338 }
345 public function formatMetadata( $image, $context = false ) {
346 $meta = $image->getMetadata();
348 if ( !$meta ) {
349 return false;
350 }
352 $meta = unserialize( $meta );
355 if ( !isset( $meta['mergedMetadata'] )
356 || !is_array( $meta['mergedMetadata'] )
357 || count( $meta['mergedMetadata'] ) < 1
358 ) {
359 return false;
360 }
362 // Inherited from MediaHandler.
363 return $this->formatMetadataHelper( $meta['mergedMetadata'], $context );
364 }
370 public function pageCount( File $image ) {
371 $info = $this->getDimensionInfo( $image );
373 return $info ? $info['pageCount'] : false;
374 }
381 public function getPageDimensions( File $image, $page ) {
382 $index = $page; // MW starts pages at 1, as they are stored here
384 $info = $this->getDimensionInfo( $image );
385 if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
386 return $info['dimensionsByPage'][$index];
387 }
389 return false;
390 }
392 protected function getDimensionInfo( File $file ) {
393 $cache = ObjectCache::getMainWANInstance();
394 return $cache->getWithSetCallback(
395 $cache->makeKey( 'file-pdf', 'dimensions', $file->getSha1() ),
396 $cache::TTL_INDEFINITE,
397 function () use ( $file ) {
398 $data = $this->getMetaArray( $file );
399 if ( !$data || !isset( $data['Pages'] ) ) {
400 return false;
401 }
402 unset( $data['text'] ); // lower peak RAM
404 $dimsByPage = [];
405 $count = intval( $data['Pages'] );
406 for ( $i = 1; $i <= $count; $i++ ) {
407 $dimsByPage[$i] = PdfImage::getPageSize( $data, $i );
408 }
410 return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
411 },
412 [ 'pcTTL' => $cache::TTL_INDEFINITE ]
413 );
414 }
421 public function getPageText( File $image, $page ) {
422 $data = $this->getMetaArray( $image );
423 if ( !$data || !isset( $data['text'] ) || !isset( $data['text'][$page - 1] ) ) {
424 return false;
425 }
426 return $data['text'][$page - 1];
427 }
435 public function getWarningConfig( $file ) {
436 return [
437 'messages' => self::$messages,
438 'link' => '//',
439 'module' => 'pdfhandler.messages',
440 ];
441 }
447 public static function registerWarningModule( &$resourceLoader ) {
448 $resourceLoader->register( 'pdfhandler.messages', [
449 'messages' => array_values( self::$messages ),
450 ] );
451 }
This list may contain false positives That usually means there is additional text with links below the first Each row contains links to the first and second as well as the first line of the second redirect text
unserialize( $serialized)
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfEscapeShellArg(... $args)
Version of escapeshellarg() that works better on Windows.
Fetch server name for use in error reporting etc.
wfShellExecWithStderr( $cmd, &$retval=null, $environ=[], $limits=[])
Execute a shell command, returning both stdout and stderr.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:52
Media handler abstract base class for images.
normaliseParams( $image, &$params)
formatMetadataHelper( $metadataArray, $context=false)
sorts the visible/invisible field.
removeBadFile( $dstPath, $retval=0)
Check for zero-sized thumbnails.
Basic media transform error class.
Copyright © 2007 Martin Seidel (Xarax)
getThumbType( $ext, $mime, $params=null)
getMetaArray( $image)
getPdfImage( $image, $path)
static registerWarningModule(&$resourceLoader)
Register a module with the warning messages in it.
validateParam( $name, $value)
getPageDimensions(File $image, $page)
getDimensionInfo(File $file)
getMetadata( $image, $path)
isMetadataValid( $image, $metadata)
formatMetadata( $image, $context=false)
getImageSize( $image, $path)
isMultiPage( $file)
getWarningConfig( $file)
Adds a warning about PDFs being potentially dangerous to the file page.
static $messages
getScriptParams( $params)
getPageText(File $image, $page)
makeParamString( $params)
parseParamString( $str)
pageCount(File $image)
doThumbError( $width, $height, $msg)
doTransform( $image, $dstPath, $dstUrl, $params, $flags=0)
mustRender( $file)
inspired by djvuimage from Brion Vibber modified and written by xarax
Definition PdfImage.php:32
static getPageSize( $data, $page)
Definition PdfImage.php:74
Convenience class for dealing with PoolCounters using callbacks.
execute( $skipcache=false)
Get the result of the work (whatever it is), or the result of the error() function.
Media transform output for images.
Shortcut class for parameter validation errors.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check $image
Definition hooks.txt:886
do that in ParserLimitReportFormat instead use this to modify the parameters of the image all existing parser cache entries will be invalid To avoid you ll need to handle that somehow(e.g. with the RejectParserCacheValue hook) because MediaWiki won 't do it for you. & $defaults also a ContextSource after deleting those rows but within the same transaction you ll probably need to make sure the header is varied on and they can depend only on the ResourceLoaderContext $context
Definition hooks.txt:2848
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
do that in ParserLimitReportFormat instead use this to modify the parameters of the image all existing parser cache entries will be invalid To avoid you ll need to handle that somehow(e.g. with the RejectParserCacheValue hook) because MediaWiki won 't do it for you. & $defaults also a ContextSource after deleting those rows but within the same transaction you ll probably need to make sure the header is varied on and they can depend only on the ResourceLoaderContext such as when responding to a resource loader request or generating HTML output & $resourceLoader
Definition hooks.txt:2859
Utility to generate mapping file used in mw.Title (phpCharToUpper.json)
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback function
Definition injection.txt:30
Definition mcc.php:33
if(!is_readable( $file)) $ext
Definition router.php:48