Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 167 |
|
0.00% |
0 / 21 |
CRAP | |
0.00% |
0 / 1 |
PdfHandler | |
0.00% |
0 / 167 |
|
0.00% |
0 / 21 |
3192 | |
0.00% |
0 / 1 |
mustRender | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isMultiPage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
validateParam | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
makeParamString | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
parseParamString | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getScriptParams | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
getParamMap | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
doThumbError | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
doTransform | |
0.00% |
0 / 69 |
|
0.00% |
0 / 1 |
90 | |||
getPdfImage | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
getSizeAndMetadata | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
getThumbType | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
isFileMetadataValid | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
formatMetadata | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
formatTag | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
56 | |||
pageCount | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
getPageDimensions | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
getDimensionInfo | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
30 | |||
getPageText | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
getWarningConfig | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
useSplitMetadata | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\PdfHandler; |
4 | |
5 | use File; |
6 | use ImageHandler; |
7 | use MediaTransformError; |
8 | use MediaTransformOutput; |
9 | use MediaWiki\Context\IContextSource; |
10 | use MediaWiki\MediaWikiServices; |
11 | use MediaWiki\PoolCounter\PoolCounterWorkViaCallback; |
12 | use ThumbnailImage; |
13 | use TransformParameterError; |
14 | |
15 | /** |
16 | * Copyright © 2007 Martin Seidel (Xarax) <jodeldi@gmx.de> |
17 | * |
18 | * Inspired by djvuhandler from Tim Starling |
19 | * Modified and written by Xarax |
20 | * |
21 | * This program is free software; you can redistribute it and/or modify |
22 | * it under the terms of the GNU General Public License as published by |
23 | * the Free Software Foundation; either version 2 of the License, or |
24 | * (at your option) any later version. |
25 | * |
26 | * This program is distributed in the hope that it will be useful, |
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
29 | * GNU General Public License for more details. |
30 | * |
31 | * You should have received a copy of the GNU General Public License along |
32 | * with this program; if not, write to the Free Software Foundation, Inc., |
33 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
34 | * http://www.gnu.org/copyleft/gpl.html |
35 | */ |
36 | |
37 | class PdfHandler extends ImageHandler { |
38 | /** |
39 | * Keep in sync with pdfhandler.messages in extension.json |
40 | * |
41 | * @see getWarningConfig |
42 | */ |
43 | private const MESSAGES = [ |
44 | 'main' => 'pdf-file-page-warning', |
45 | 'header' => 'pdf-file-page-warning-header', |
46 | 'info' => 'pdf-file-page-warning-info', |
47 | 'footer' => 'pdf-file-page-warning-footer', |
48 | ]; |
49 | |
50 | /** |
51 | * 10MB is considered a large file |
52 | */ |
53 | private const LARGE_FILE = 1e7; |
54 | |
55 | /** |
56 | * Key for getHandlerState for value of type PdfImage |
57 | */ |
58 | private const STATE_PDF_IMAGE = 'pdfImage'; |
59 | |
60 | /** |
61 | * Key for getHandlerState for dimension info |
62 | */ |
63 | private const STATE_DIMENSION_INFO = 'pdfDimensionInfo'; |
64 | |
65 | /** |
66 | * @param File $file |
67 | * @return bool |
68 | */ |
69 | public function mustRender( $file ) { |
70 | return true; |
71 | } |
72 | |
73 | /** |
74 | * @param File $file |
75 | * @return bool |
76 | */ |
77 | public function isMultiPage( $file ) { |
78 | return true; |
79 | } |
80 | |
81 | /** |
82 | * @param string $name |
83 | * @param string $value |
84 | * @return bool |
85 | */ |
86 | public function validateParam( $name, $value ) { |
87 | if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) { |
88 | // Extra junk on the end of page, probably actually a caption |
89 | // e.g. [[File:Foo.pdf|thumb|Page 3 of the document shows foo]] |
90 | return false; |
91 | } |
92 | if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) { |
93 | return ( $value > 0 ); |
94 | } |
95 | return false; |
96 | } |
97 | |
98 | /** |
99 | * @param array $params |
100 | * @return bool|string |
101 | */ |
102 | public function makeParamString( $params ) { |
103 | $page = $params['page'] ?? 1; |
104 | if ( !isset( $params['width'] ) ) { |
105 | return false; |
106 | } |
107 | return "page{$page}-{$params['width']}px"; |
108 | } |
109 | |
110 | /** |
111 | * @param string $str |
112 | * @return array|bool |
113 | */ |
114 | public function parseParamString( $str ) { |
115 | $m = []; |
116 | |
117 | if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) { |
118 | return [ 'width' => $m[2], 'page' => $m[1] ]; |
119 | } |
120 | |
121 | return false; |
122 | } |
123 | |
124 | /** |
125 | * @param array $params |
126 | * @return array |
127 | */ |
128 | public function getScriptParams( $params ) { |
129 | return [ |
130 | 'width' => $params['width'], |
131 | 'page' => $params['page'], |
132 | ]; |
133 | } |
134 | |
135 | /** |
136 | * @return array |
137 | */ |
138 | public function getParamMap() { |
139 | return [ |
140 | 'img_width' => 'width', |
141 | 'img_page' => 'page', |
142 | ]; |
143 | } |
144 | |
145 | /** |
146 | * @param int $width |
147 | * @param int $height |
148 | * @param string $msg |
149 | * @return MediaTransformError |
150 | */ |
151 | protected function doThumbError( $width, $height, $msg ) { |
152 | return new MediaTransformError( 'thumbnail_error', |
153 | $width, $height, wfMessage( $msg )->inContentLanguage()->text() ); |
154 | } |
155 | |
156 | /** |
157 | * @param File $image |
158 | * @param string $dstPath |
159 | * @param string $dstUrl |
160 | * @param array $params |
161 | * @param int $flags |
162 | * @return MediaTransformError|MediaTransformOutput|ThumbnailImage|TransformParameterError |
163 | */ |
164 | public function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { |
165 | global $wgPdfProcessor, $wgPdfPostProcessor, $wgPdfHandlerDpi, $wgPdfHandlerJpegQuality; |
166 | |
167 | if ( !$this->normaliseParams( $image, $params ) ) { |
168 | return new TransformParameterError( $params ); |
169 | } |
170 | |
171 | $width = (int)$params['width']; |
172 | $height = (int)$params['height']; |
173 | $page = (int)$params['page']; |
174 | |
175 | if ( $page > $this->pageCount( $image ) ) { |
176 | return $this->doThumbError( $width, $height, 'pdf_page_error' ); |
177 | } |
178 | |
179 | if ( $flags & self::TRANSFORM_LATER ) { |
180 | return new ThumbnailImage( $image, $dstUrl, false, [ |
181 | 'width' => $width, |
182 | 'height' => $height, |
183 | 'page' => $page, |
184 | ] ); |
185 | } |
186 | |
187 | if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) { |
188 | return $this->doThumbError( $width, $height, 'thumbnail_dest_directory' ); |
189 | } |
190 | |
191 | // Thumbnail extraction is very inefficient for large files. |
192 | // Provide a way to pool count limit the number of downloaders. |
193 | if ( $image->getSize() >= self::LARGE_FILE ) { |
194 | $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ), |
195 | [ |
196 | 'doWork' => static function () use ( $image ) { |
197 | return $image->getLocalRefPath(); |
198 | } |
199 | ] |
200 | ); |
201 | $srcPath = $work->execute(); |
202 | } else { |
203 | $srcPath = $image->getLocalRefPath(); |
204 | } |
205 | |
206 | if ( $srcPath === false ) { |
207 | // could not download original |
208 | return $this->doThumbError( $width, $height, 'filemissing' ); |
209 | } |
210 | |
211 | $cmd = '(' . wfEscapeShellArg( |
212 | $wgPdfProcessor, |
213 | "-sDEVICE=jpeg", |
214 | "-sOutputFile=-", |
215 | "-sstdout=%stderr", |
216 | "-dFirstPage={$page}", |
217 | "-dLastPage={$page}", |
218 | "-dSAFER", |
219 | "-r{$wgPdfHandlerDpi}", |
220 | // CropBox defines the region that the PDF viewer application is expected to display or print. |
221 | "-dUseCropBox", |
222 | "-dBATCH", |
223 | "-dNOPAUSE", |
224 | "-q", |
225 | $srcPath |
226 | ); |
227 | $cmd .= " | " . wfEscapeShellArg( |
228 | $wgPdfPostProcessor, |
229 | "-depth", |
230 | "8", |
231 | "-quality", |
232 | $wgPdfHandlerJpegQuality, |
233 | "-resize", |
234 | (string)$width, |
235 | "-", |
236 | $dstPath |
237 | ); |
238 | $cmd .= ")"; |
239 | |
240 | wfDebug( __METHOD__ . ": $cmd\n" ); |
241 | $retval = ''; |
242 | $err = wfShellExecWithStderr( $cmd, $retval ); |
243 | |
244 | $removed = $this->removeBadFile( $dstPath, $retval ); |
245 | |
246 | if ( $retval != 0 || $removed ) { |
247 | wfDebugLog( 'thumbnail', |
248 | sprintf( 'thumbnail failed on %s: error %d "%s" from "%s"', |
249 | wfHostname(), $retval, trim( $err ), $cmd ) ); |
250 | return new MediaTransformError( 'thumbnail_error', $width, $height, $err ); |
251 | } |
252 | |
253 | return new ThumbnailImage( $image, $dstUrl, $dstPath, [ |
254 | 'width' => $width, |
255 | 'height' => $height, |
256 | 'page' => $page, |
257 | ] ); |
258 | } |
259 | |
260 | /** |
261 | * @param \MediaHandlerState $state |
262 | * @param string $path |
263 | * @return PdfImage |
264 | */ |
265 | private function getPdfImage( $state, $path ) { |
266 | $pdfImg = $state->getHandlerState( self::STATE_PDF_IMAGE ); |
267 | if ( !$pdfImg ) { |
268 | $pdfImg = new PdfImage( $path ); |
269 | $state->setHandlerState( self::STATE_PDF_IMAGE, $pdfImg ); |
270 | } |
271 | return $pdfImg; |
272 | } |
273 | |
274 | /** |
275 | * @param \MediaHandlerState $state |
276 | * @param string $path |
277 | * @return array|bool |
278 | */ |
279 | public function getSizeAndMetadata( $state, $path ) { |
280 | $metadata = $this->getPdfImage( $state, $path )->retrieveMetaData(); |
281 | $sizes = PdfImage::getPageSize( $metadata, 1 ); |
282 | if ( $sizes ) { |
283 | return $sizes + [ 'metadata' => $metadata ]; |
284 | } |
285 | |
286 | return [ 'metadata' => $metadata ]; |
287 | } |
288 | |
289 | /** |
290 | * @param string $ext |
291 | * @param string $mime |
292 | * @param null $params |
293 | * @return array |
294 | */ |
295 | public function getThumbType( $ext, $mime, $params = null ) { |
296 | global $wgPdfOutputExtension; |
297 | static $mime; |
298 | |
299 | if ( !isset( $mime ) ) { |
300 | $magic = MediaWikiServices::getInstance()->getMimeAnalyzer(); |
301 | $mime = $magic->guessTypesForExtension( $wgPdfOutputExtension ); |
302 | } |
303 | return [ $wgPdfOutputExtension, $mime ]; |
304 | } |
305 | |
306 | /** |
307 | * @param File $file |
308 | * @return bool|int |
309 | */ |
310 | public function isFileMetadataValid( $file ) { |
311 | $data = $file->getMetadataItems( [ 'mergedMetadata', 'pages' ] ); |
312 | if ( !isset( $data['pages'] ) ) { |
313 | return self::METADATA_BAD; |
314 | } |
315 | |
316 | if ( !isset( $data['mergedMetadata'] ) ) { |
317 | return self::METADATA_COMPATIBLE; |
318 | } |
319 | |
320 | return self::METADATA_GOOD; |
321 | } |
322 | |
323 | /** |
324 | * @param File $image |
325 | * @param bool|IContextSource $context Context to use (optional) |
326 | * @return bool|array |
327 | */ |
328 | public function formatMetadata( $image, $context = false ) { |
329 | $mergedMetadata = $image->getMetadataItem( 'mergedMetadata' ); |
330 | |
331 | if ( !is_array( $mergedMetadata ) || !count( $mergedMetadata ) ) { |
332 | return false; |
333 | } |
334 | |
335 | // Inherited from MediaHandler. |
336 | return $this->formatMetadataHelper( $mergedMetadata, $context ); |
337 | } |
338 | |
339 | /** @inheritDoc */ |
340 | protected function formatTag( string $key, $vals, $context = false ) { |
341 | switch ( $key ) { |
342 | case 'pdf-Producer': |
343 | case 'pdf-Version': |
344 | return htmlspecialchars( $vals ); |
345 | case 'pdf-PageSize': |
346 | foreach ( $vals as &$val ) { |
347 | $val = htmlspecialchars( $val ); |
348 | } |
349 | return $vals; |
350 | case 'pdf-Encrypted': |
351 | // @todo: The value isn't i18n-ised; should be done here. |
352 | // For reference, if encrypted this field's value looks like: |
353 | // "yes (print:yes copy:no change:no addNotes:no)" |
354 | return htmlspecialchars( $vals ); |
355 | default: |
356 | break; |
357 | } |
358 | // Use default formatting |
359 | return false; |
360 | } |
361 | |
362 | /** |
363 | * @param File $image |
364 | * @return bool|int |
365 | */ |
366 | public function pageCount( File $image ) { |
367 | $info = $this->getDimensionInfo( $image ); |
368 | |
369 | return $info ? $info['pageCount'] : false; |
370 | } |
371 | |
372 | /** |
373 | * @param File $image |
374 | * @param int $page |
375 | * @return array|bool |
376 | */ |
377 | public function getPageDimensions( File $image, $page ) { |
378 | // MW starts pages at 1, as they are stored here |
379 | $index = $page; |
380 | |
381 | $info = $this->getDimensionInfo( $image ); |
382 | if ( $info && isset( $info['dimensionsByPage'][$index] ) ) { |
383 | return $info['dimensionsByPage'][$index]; |
384 | } |
385 | |
386 | return false; |
387 | } |
388 | |
389 | /** |
390 | * @param File $file |
391 | * @return bool|mixed |
392 | */ |
393 | protected function getDimensionInfo( File $file ) { |
394 | $info = $file->getHandlerState( self::STATE_DIMENSION_INFO ); |
395 | if ( !$info ) { |
396 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
397 | $info = $cache->getWithSetCallback( |
398 | $cache->makeKey( 'file-pdf-dimensions', $file->getSha1() ), |
399 | $cache::TTL_MONTH, |
400 | static function () use ( $file ) { |
401 | $data = $file->getMetadataItems( PdfImage::ITEMS_FOR_PAGE_SIZE ); |
402 | if ( !$data || !isset( $data['Pages'] ) ) { |
403 | return false; |
404 | } |
405 | |
406 | $dimsByPage = []; |
407 | $count = intval( $data['Pages'] ); |
408 | for ( $i = 1; $i <= $count; $i++ ) { |
409 | $dimsByPage[$i] = PdfImage::getPageSize( $data, $i ); |
410 | } |
411 | |
412 | return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ]; |
413 | } |
414 | ); |
415 | } |
416 | $file->setHandlerState( self::STATE_DIMENSION_INFO, $info ); |
417 | return $info; |
418 | } |
419 | |
420 | /** |
421 | * @param File $image |
422 | * @param int $page |
423 | * @return bool |
424 | */ |
425 | public function getPageText( File $image, $page ) { |
426 | $pageTexts = $image->getMetadataItem( 'text' ); |
427 | if ( !is_array( $pageTexts ) || !isset( $pageTexts[$page - 1] ) ) { |
428 | return false; |
429 | } |
430 | return $pageTexts[$page - 1]; |
431 | } |
432 | |
433 | /** |
434 | * Adds a warning about PDFs being potentially dangerous to the file |
435 | * page. Multiple messages with this base will be used. |
436 | * @param File $file |
437 | * @return array |
438 | */ |
439 | public function getWarningConfig( $file ) { |
440 | return [ |
441 | 'messages' => self::MESSAGES, |
442 | 'link' => '//www.mediawiki.org/wiki/Special:MyLanguage/Help:Security/PDF_files', |
443 | 'module' => 'pdfhandler.messages', |
444 | ]; |
445 | } |
446 | |
447 | public function useSplitMetadata() { |
448 | return true; |
449 | } |
450 | } |