Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 167 |
|
0.00% |
0 / 21 |
CRAP | |
0.00% |
0 / 1 |
| PdfHandler | |
0.00% |
0 / 167 |
|
0.00% |
0 / 21 |
3192 | |
0.00% |
0 / 1 |
| mustRender | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| isMultiPage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| validateParam | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
| makeParamString | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| parseParamString | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| getScriptParams | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
| getParamMap | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
| doThumbError | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| doTransform | |
0.00% |
0 / 69 |
|
0.00% |
0 / 1 |
90 | |||
| getPdfImage | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
| getSizeAndMetadata | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
| getThumbType | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
| isFileMetadataValid | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
| formatMetadata | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
| formatTag | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
56 | |||
| pageCount | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| getPageDimensions | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
| getDimensionInfo | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
30 | |||
| getPageText | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
| getWarningConfig | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
| useSplitMetadata | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Copyright © 2007 Martin Seidel (Xarax) <jodeldi@gmx.de> |
| 4 | * |
| 5 | * This program is free software; you can redistribute it and/or modify |
| 6 | * it under the terms of the GNU General Public License as published by |
| 7 | * the Free Software Foundation; either version 2 of the License, or |
| 8 | * (at your option) any later version. |
| 9 | * |
| 10 | * This program is distributed in the hope that it will be useful, |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | * GNU General Public License for more details. |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License along |
| 16 | * with this program; if not, write to the Free Software Foundation, Inc., |
| 17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 18 | * http://www.gnu.org/copyleft/gpl.html |
| 19 | */ |
| 20 | |
| 21 | namespace MediaWiki\Extension\PdfHandler; |
| 22 | |
| 23 | use ImageHandler; |
| 24 | use MediaTransformError; |
| 25 | use MediaTransformOutput; |
| 26 | use MediaWiki\Context\IContextSource; |
| 27 | use MediaWiki\FileRepo\File\File; |
| 28 | use MediaWiki\MediaWikiServices; |
| 29 | use MediaWiki\PoolCounter\PoolCounterWorkViaCallback; |
| 30 | use ThumbnailImage; |
| 31 | use TransformParameterError; |
| 32 | |
| 33 | /** |
| 34 | * Inspired by djvuhandler from Tim Starling |
| 35 | * Modified and written by Xarax |
| 36 | */ |
| 37 | class PdfHandler extends ImageHandler { |
| 38 | /** |
| 39 | * Keep in sync with pdfhandler.messages in extension.json |
| 40 | * |
| 41 | * @see getWarningConfig |
| 42 | */ |
| 43 | private const MESSAGES = [ |
| 44 | 'main' => 'pdf-file-page-warning', |
| 45 | 'header' => 'pdf-file-page-warning-header', |
| 46 | 'info' => 'pdf-file-page-warning-info', |
| 47 | 'footer' => 'pdf-file-page-warning-footer', |
| 48 | ]; |
| 49 | |
| 50 | /** |
| 51 | * 10MB is considered a large file |
| 52 | */ |
| 53 | private const LARGE_FILE = 1e7; |
| 54 | |
| 55 | /** |
| 56 | * Key for getHandlerState for value of type PdfImage |
| 57 | */ |
| 58 | private const STATE_PDF_IMAGE = 'pdfImage'; |
| 59 | |
| 60 | /** |
| 61 | * Key for getHandlerState for dimension info |
| 62 | */ |
| 63 | private const STATE_DIMENSION_INFO = 'pdfDimensionInfo'; |
| 64 | |
| 65 | /** |
| 66 | * @param File $file |
| 67 | * @return bool |
| 68 | */ |
| 69 | public function mustRender( $file ) { |
| 70 | return true; |
| 71 | } |
| 72 | |
| 73 | /** |
| 74 | * @param File $file |
| 75 | * @return bool |
| 76 | */ |
| 77 | public function isMultiPage( $file ) { |
| 78 | return true; |
| 79 | } |
| 80 | |
| 81 | /** |
| 82 | * @param string $name |
| 83 | * @param string $value |
| 84 | * @return bool |
| 85 | */ |
| 86 | public function validateParam( $name, $value ) { |
| 87 | if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) { |
| 88 | // Extra junk on the end of page, probably actually a caption |
| 89 | // e.g. [[File:Foo.pdf|thumb|Page 3 of the document shows foo]] |
| 90 | return false; |
| 91 | } |
| 92 | if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) { |
| 93 | return ( $value > 0 ); |
| 94 | } |
| 95 | return false; |
| 96 | } |
| 97 | |
| 98 | /** |
| 99 | * @param array $params |
| 100 | * @return bool|string |
| 101 | */ |
| 102 | public function makeParamString( $params ) { |
| 103 | $page = $params['page'] ?? 1; |
| 104 | if ( !isset( $params['width'] ) ) { |
| 105 | return false; |
| 106 | } |
| 107 | return "page{$page}-{$params['width']}px"; |
| 108 | } |
| 109 | |
| 110 | /** |
| 111 | * @param string $str |
| 112 | * @return array|bool |
| 113 | */ |
| 114 | public function parseParamString( $str ) { |
| 115 | $m = []; |
| 116 | |
| 117 | if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) { |
| 118 | return [ 'width' => $m[2], 'page' => $m[1] ]; |
| 119 | } |
| 120 | |
| 121 | return false; |
| 122 | } |
| 123 | |
| 124 | /** |
| 125 | * @param array $params |
| 126 | * @return array |
| 127 | */ |
| 128 | public function getScriptParams( $params ) { |
| 129 | return [ |
| 130 | 'width' => $params['width'], |
| 131 | 'page' => $params['page'], |
| 132 | ]; |
| 133 | } |
| 134 | |
| 135 | /** |
| 136 | * @return array |
| 137 | */ |
| 138 | public function getParamMap() { |
| 139 | return [ |
| 140 | 'img_width' => 'width', |
| 141 | 'img_page' => 'page', |
| 142 | ]; |
| 143 | } |
| 144 | |
| 145 | /** |
| 146 | * @param int $width |
| 147 | * @param int $height |
| 148 | * @param string $msg |
| 149 | * @return MediaTransformError |
| 150 | */ |
| 151 | protected function doThumbError( $width, $height, $msg ) { |
| 152 | return new MediaTransformError( 'thumbnail_error', |
| 153 | $width, $height, wfMessage( $msg )->inContentLanguage()->text() ); |
| 154 | } |
| 155 | |
| 156 | /** |
| 157 | * @param File $image |
| 158 | * @param string $dstPath |
| 159 | * @param string $dstUrl |
| 160 | * @param array $params |
| 161 | * @param int $flags |
| 162 | * @return MediaTransformError|MediaTransformOutput|ThumbnailImage|TransformParameterError |
| 163 | */ |
| 164 | public function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { |
| 165 | global $wgPdfProcessor, $wgPdfPostProcessor, $wgPdfHandlerDpi, $wgPdfHandlerJpegQuality; |
| 166 | |
| 167 | if ( !$this->normaliseParams( $image, $params ) ) { |
| 168 | return new TransformParameterError( $params ); |
| 169 | } |
| 170 | |
| 171 | $width = (int)$params['width']; |
| 172 | $height = (int)$params['height']; |
| 173 | $page = (int)$params['page']; |
| 174 | |
| 175 | if ( $page > $this->pageCount( $image ) ) { |
| 176 | return $this->doThumbError( $width, $height, 'pdf_page_error' ); |
| 177 | } |
| 178 | |
| 179 | if ( $flags & self::TRANSFORM_LATER ) { |
| 180 | return new ThumbnailImage( $image, $dstUrl, false, [ |
| 181 | 'width' => $width, |
| 182 | 'height' => $height, |
| 183 | 'page' => $page, |
| 184 | ] ); |
| 185 | } |
| 186 | |
| 187 | if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) { |
| 188 | return $this->doThumbError( $width, $height, 'thumbnail_dest_directory' ); |
| 189 | } |
| 190 | |
| 191 | // Thumbnail extraction is very inefficient for large files. |
| 192 | // Provide a way to pool count limit the number of downloaders. |
| 193 | if ( $image->getSize() >= self::LARGE_FILE ) { |
| 194 | $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ), |
| 195 | [ |
| 196 | 'doWork' => static function () use ( $image ) { |
| 197 | return $image->getLocalRefPath(); |
| 198 | } |
| 199 | ] |
| 200 | ); |
| 201 | $srcPath = $work->execute(); |
| 202 | } else { |
| 203 | $srcPath = $image->getLocalRefPath(); |
| 204 | } |
| 205 | |
| 206 | if ( $srcPath === false ) { |
| 207 | // could not download original |
| 208 | return $this->doThumbError( $width, $height, 'filemissing' ); |
| 209 | } |
| 210 | |
| 211 | $cmd = '(' . wfEscapeShellArg( |
| 212 | $wgPdfProcessor, |
| 213 | "-sDEVICE=jpeg", |
| 214 | "-sOutputFile=-", |
| 215 | "-sstdout=%stderr", |
| 216 | "-dFirstPage={$page}", |
| 217 | "-dLastPage={$page}", |
| 218 | "-dSAFER", |
| 219 | "-r{$wgPdfHandlerDpi}", |
| 220 | // CropBox defines the region that the PDF viewer application is expected to display or print. |
| 221 | "-dUseCropBox", |
| 222 | "-dBATCH", |
| 223 | "-dNOPAUSE", |
| 224 | "-q", |
| 225 | $srcPath |
| 226 | ); |
| 227 | $cmd .= " | " . wfEscapeShellArg( |
| 228 | $wgPdfPostProcessor, |
| 229 | "-depth", |
| 230 | "8", |
| 231 | "-quality", |
| 232 | $wgPdfHandlerJpegQuality, |
| 233 | "-resize", |
| 234 | (string)$width, |
| 235 | "-", |
| 236 | $dstPath |
| 237 | ); |
| 238 | $cmd .= ")"; |
| 239 | |
| 240 | wfDebug( __METHOD__ . ": $cmd\n" ); |
| 241 | $retval = ''; |
| 242 | $err = wfShellExecWithStderr( $cmd, $retval ); |
| 243 | |
| 244 | $removed = $this->removeBadFile( $dstPath, $retval ); |
| 245 | |
| 246 | if ( $retval != 0 || $removed ) { |
| 247 | wfDebugLog( 'thumbnail', |
| 248 | sprintf( 'thumbnail failed on %s: error %d "%s" from "%s"', |
| 249 | wfHostname(), $retval, trim( $err ), $cmd ) ); |
| 250 | return new MediaTransformError( 'thumbnail_error', $width, $height, $err ); |
| 251 | } |
| 252 | |
| 253 | return new ThumbnailImage( $image, $dstUrl, $dstPath, [ |
| 254 | 'width' => $width, |
| 255 | 'height' => $height, |
| 256 | 'page' => $page, |
| 257 | ] ); |
| 258 | } |
| 259 | |
| 260 | /** |
| 261 | * @param \MediaHandlerState $state |
| 262 | * @param string $path |
| 263 | * @return PdfImage |
| 264 | */ |
| 265 | private function getPdfImage( $state, $path ) { |
| 266 | $pdfImg = $state->getHandlerState( self::STATE_PDF_IMAGE ); |
| 267 | if ( !$pdfImg ) { |
| 268 | $pdfImg = new PdfImage( $path ); |
| 269 | $state->setHandlerState( self::STATE_PDF_IMAGE, $pdfImg ); |
| 270 | } |
| 271 | return $pdfImg; |
| 272 | } |
| 273 | |
| 274 | /** |
| 275 | * @param \MediaHandlerState $state |
| 276 | * @param string $path |
| 277 | * @return array|bool |
| 278 | */ |
| 279 | public function getSizeAndMetadata( $state, $path ) { |
| 280 | $metadata = $this->getPdfImage( $state, $path )->retrieveMetaData(); |
| 281 | $sizes = PdfImage::getPageSize( $metadata, 1 ); |
| 282 | if ( $sizes ) { |
| 283 | return $sizes + [ 'metadata' => $metadata ]; |
| 284 | } |
| 285 | |
| 286 | return [ 'metadata' => $metadata ]; |
| 287 | } |
| 288 | |
| 289 | /** |
| 290 | * @param string $ext |
| 291 | * @param string $mime |
| 292 | * @param null $params |
| 293 | * @return array |
| 294 | */ |
| 295 | public function getThumbType( $ext, $mime, $params = null ) { |
| 296 | global $wgPdfOutputExtension; |
| 297 | static $mime; |
| 298 | |
| 299 | if ( $mime === null ) { |
| 300 | $magic = MediaWikiServices::getInstance()->getMimeAnalyzer(); |
| 301 | $mime = $magic->guessTypesForExtension( $wgPdfOutputExtension ); |
| 302 | } |
| 303 | return [ $wgPdfOutputExtension, $mime ]; |
| 304 | } |
| 305 | |
| 306 | /** |
| 307 | * @param File $file |
| 308 | * @return bool|int |
| 309 | */ |
| 310 | public function isFileMetadataValid( $file ) { |
| 311 | $data = $file->getMetadataItems( [ 'mergedMetadata', 'pages' ] ); |
| 312 | if ( !isset( $data['pages'] ) ) { |
| 313 | return self::METADATA_BAD; |
| 314 | } |
| 315 | |
| 316 | if ( !isset( $data['mergedMetadata'] ) ) { |
| 317 | return self::METADATA_COMPATIBLE; |
| 318 | } |
| 319 | |
| 320 | return self::METADATA_GOOD; |
| 321 | } |
| 322 | |
| 323 | /** |
| 324 | * @param File $image |
| 325 | * @param bool|IContextSource $context Context to use (optional) |
| 326 | * @return bool|array |
| 327 | */ |
| 328 | public function formatMetadata( $image, $context = false ) { |
| 329 | $mergedMetadata = $image->getMetadataItem( 'mergedMetadata' ); |
| 330 | |
| 331 | if ( !is_array( $mergedMetadata ) || !count( $mergedMetadata ) ) { |
| 332 | return false; |
| 333 | } |
| 334 | |
| 335 | // Inherited from MediaHandler. |
| 336 | return $this->formatMetadataHelper( $mergedMetadata, $context ); |
| 337 | } |
| 338 | |
| 339 | /** @inheritDoc */ |
| 340 | protected function formatTag( string $key, $vals, $context = false ) { |
| 341 | switch ( $key ) { |
| 342 | case 'pdf-Producer': |
| 343 | case 'pdf-Version': |
| 344 | return htmlspecialchars( $vals ); |
| 345 | case 'pdf-PageSize': |
| 346 | foreach ( $vals as &$val ) { |
| 347 | $val = htmlspecialchars( $val ); |
| 348 | } |
| 349 | return $vals; |
| 350 | case 'pdf-Encrypted': |
| 351 | // @todo: The value isn't i18n-ised; should be done here. |
| 352 | // For reference, if encrypted this field's value looks like: |
| 353 | // "yes (print:yes copy:no change:no addNotes:no)" |
| 354 | return htmlspecialchars( $vals ); |
| 355 | default: |
| 356 | break; |
| 357 | } |
| 358 | // Use default formatting |
| 359 | return false; |
| 360 | } |
| 361 | |
| 362 | /** |
| 363 | * @param File $image |
| 364 | * @return bool|int |
| 365 | */ |
| 366 | public function pageCount( File $image ) { |
| 367 | $info = $this->getDimensionInfo( $image ); |
| 368 | |
| 369 | return $info ? $info['pageCount'] : false; |
| 370 | } |
| 371 | |
| 372 | /** |
| 373 | * @param File $image |
| 374 | * @param int $page |
| 375 | * @return array|bool |
| 376 | */ |
| 377 | public function getPageDimensions( File $image, $page ) { |
| 378 | // MW starts pages at 1, as they are stored here |
| 379 | $index = $page; |
| 380 | |
| 381 | $info = $this->getDimensionInfo( $image ); |
| 382 | if ( $info && isset( $info['dimensionsByPage'][$index] ) ) { |
| 383 | return $info['dimensionsByPage'][$index]; |
| 384 | } |
| 385 | |
| 386 | return false; |
| 387 | } |
| 388 | |
| 389 | /** |
| 390 | * @param File $file |
| 391 | * @return bool|mixed |
| 392 | */ |
| 393 | protected function getDimensionInfo( File $file ) { |
| 394 | $info = $file->getHandlerState( self::STATE_DIMENSION_INFO ); |
| 395 | if ( !$info ) { |
| 396 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
| 397 | $info = $cache->getWithSetCallback( |
| 398 | $cache->makeKey( 'file-pdf-dimensions', $file->getSha1() ), |
| 399 | $cache::TTL_MONTH, |
| 400 | static function () use ( $file ) { |
| 401 | $data = $file->getMetadataItems( PdfImage::ITEMS_FOR_PAGE_SIZE ); |
| 402 | if ( !$data || !isset( $data['Pages'] ) ) { |
| 403 | return false; |
| 404 | } |
| 405 | |
| 406 | $dimsByPage = []; |
| 407 | $count = intval( $data['Pages'] ); |
| 408 | for ( $i = 1; $i <= $count; $i++ ) { |
| 409 | $dimsByPage[$i] = PdfImage::getPageSize( $data, $i ); |
| 410 | } |
| 411 | |
| 412 | return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ]; |
| 413 | } |
| 414 | ); |
| 415 | } |
| 416 | $file->setHandlerState( self::STATE_DIMENSION_INFO, $info ); |
| 417 | return $info; |
| 418 | } |
| 419 | |
| 420 | /** |
| 421 | * @param File $image |
| 422 | * @param int $page |
| 423 | * @return bool |
| 424 | */ |
| 425 | public function getPageText( File $image, $page ) { |
| 426 | $pageTexts = $image->getMetadataItem( 'text' ); |
| 427 | if ( !is_array( $pageTexts ) || !isset( $pageTexts[$page - 1] ) ) { |
| 428 | return false; |
| 429 | } |
| 430 | return $pageTexts[$page - 1]; |
| 431 | } |
| 432 | |
| 433 | /** |
| 434 | * Adds a warning about PDFs being potentially dangerous to the file |
| 435 | * page. Multiple messages with this base will be used. |
| 436 | * @param File $file |
| 437 | * @return array |
| 438 | */ |
| 439 | public function getWarningConfig( $file ) { |
| 440 | return [ |
| 441 | 'messages' => self::MESSAGES, |
| 442 | 'link' => '//www.mediawiki.org/wiki/Special:MyLanguage/Help:Security/PDF_files', |
| 443 | 'module' => 'pdfhandler.messages', |
| 444 | ]; |
| 445 | } |
| 446 | |
| 447 | /** @inheritDoc */ |
| 448 | public function useSplitMetadata() { |
| 449 | return true; |
| 450 | } |
| 451 | } |