Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 198 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
PagedTiffImage | |
0.00% |
0 / 198 |
|
0.00% |
0 / 9 |
5256 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
isValid | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getImageSize | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
getPageSize | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
resetMetaData | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
retrieveMetaData | |
0.00% |
0 / 59 |
|
0.00% |
0 / 1 |
182 | |||
parseTiffinfoOutput | |
0.00% |
0 / 57 |
|
0.00% |
0 / 1 |
702 | |||
parseExiv2Output | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
parseIdentifyOutput | |
0.00% |
0 / 57 |
|
0.00% |
0 / 1 |
600 |
1 | <?php |
2 | /** |
3 | * Copyright © Wikimedia Deutschland, 2009 |
4 | * Authors Hallo Welt! Medienwerkstatt GmbH |
5 | * Authors Sebastian Ulbricht, Daniel Lynge, Marc Reymann, Markus Glaser |
6 | * |
7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; either version 2 of the License, or |
10 | * (at your option) any later version. |
11 | * |
12 | * This program is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | * GNU General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU General Public License along |
18 | * with this program; if not, write to the Free Software Foundation, Inc., |
19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
20 | * http://www.gnu.org/copyleft/gpl.html |
21 | */ |
22 | |
23 | namespace MediaWiki\Extension\PagedTiffHandler; |
24 | |
25 | use BitmapMetadataHandler; |
26 | use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface; |
27 | use MediaWiki\Shell\CommandFactory; |
28 | |
29 | /** |
30 | * inspired by djvuimage from Brion Vibber |
31 | * modified and written by xarax |
32 | * adapted to tiff by Hallo Welt! - Medienwerkstatt GmbH |
33 | */ |
34 | |
35 | class PagedTiffImage { |
36 | /** @var array|null */ |
37 | private $metadata = null; |
38 | |
39 | /** @var string */ |
40 | private $filename; |
41 | |
42 | /** @var CommandFactory */ |
43 | private $commandFactory; |
44 | |
45 | /** @var StatsdDataFactoryInterface */ |
46 | private $statsdFactory; |
47 | |
48 | /** |
49 | * @param CommandFactory $commandFactory |
50 | * @param StatsdDataFactoryInterface $statsdFactory |
51 | * @param string $filename |
52 | */ |
53 | public function __construct( |
54 | CommandFactory $commandFactory, |
55 | StatsdDataFactoryInterface $statsdFactory, |
56 | $filename |
57 | ) { |
58 | $this->commandFactory = $commandFactory; |
59 | $this->statsdFactory = $statsdFactory; |
60 | $this->filename = $filename; |
61 | } |
62 | |
63 | /** |
64 | * Called by MimeMagick functions. |
65 | * @return int |
66 | */ |
67 | public function isValid() { |
68 | return count( $this->retrieveMetaData() ); |
69 | } |
70 | |
71 | /** |
72 | * Returns an array that corresponds to the native PHP function getimagesize(). |
73 | * @return array|false |
74 | */ |
75 | public function getImageSize() { |
76 | $data = $this->retrieveMetaData(); |
77 | $size = self::getPageSize( $data, 1 ); |
78 | |
79 | if ( $size ) { |
80 | $width = $size['width']; |
81 | $height = $size['height']; |
82 | return [ $width, $height, 'Tiff', |
83 | "width=\"$width\" height=\"$height\"" ]; |
84 | } |
85 | return false; |
86 | } |
87 | |
88 | /** |
89 | * Returns an array with width and height of the tiff page. |
90 | * @param array $data |
91 | * @param int $page |
92 | * @return array|false |
93 | */ |
94 | public static function getPageSize( $data, $page ) { |
95 | if ( isset( $data['page_data'][$page] ) ) { |
96 | return [ |
97 | 'width' => intval( $data['page_data'][$page]['width'] ), |
98 | 'height' => intval( $data['page_data'][$page]['height'] ) |
99 | ]; |
100 | } |
101 | return false; |
102 | } |
103 | |
104 | public function resetMetaData() { |
105 | $this->metadata = null; |
106 | } |
107 | |
108 | /** |
109 | * Reads metadata of the tiff file via shell command and returns an associative array. |
110 | * @return array Associative array. Layout: |
111 | * meta['page_count'] = number of pages |
112 | * meta['first_page'] = number of first page |
113 | * meta['last_page'] = number of last page |
114 | * meta['page_data'] = metadata per page |
115 | * meta['exif'] = Exif, XMP and IPTC |
116 | * meta['errors'] = identify-errors |
117 | * meta['warnings'] = identify-warnings |
118 | */ |
119 | public function retrieveMetaData() { |
120 | global $wgImageMagickIdentifyCommand, $wgExiv2Command, $wgTiffUseExiv; |
121 | global $wgTiffUseTiffinfo, $wgTiffTiffinfoCommand; |
122 | global $wgShowEXIF, $wgShellboxShell; |
123 | |
124 | if ( $this->metadata !== null ) { |
125 | return $this->metadata; |
126 | } |
127 | |
128 | $command = $this->commandFactory |
129 | ->createBoxed( 'pagedtiffhandler' ) |
130 | ->disableNetwork() |
131 | ->firejailDefaultSeccomp() |
132 | ->routeName( 'pagedtiffhandler-metadata' ); |
133 | $command |
134 | ->params( $wgShellboxShell, 'scripts/retrieveMetaData.sh' ) |
135 | ->inputFileFromFile( |
136 | 'scripts/retrieveMetaData.sh', |
137 | __DIR__ . '/../scripts/retrieveMetaData.sh' ) |
138 | ->inputFileFromFile( 'file.tiff', $this->filename ) |
139 | ->environment( [ |
140 | 'TIFF_USETIFFINFO' => $wgTiffUseTiffinfo ? 'yes' : 'no', |
141 | 'TIFF_TIFFINFO' => $wgTiffTiffinfoCommand, |
142 | 'TIFF_IDENTIFY' => $wgImageMagickIdentifyCommand, |
143 | 'TIFF_USEEXIV' => $wgTiffUseExiv ? 'yes' : 'no', |
144 | 'TIFF_EXIV2' => $wgExiv2Command, |
145 | ] ); |
146 | if ( $wgTiffUseTiffinfo ) { |
147 | $command->outputFileToString( 'info' ); |
148 | } else { |
149 | $command->outputFileToString( 'identified' ); |
150 | } |
151 | if ( $wgTiffUseExiv ) { |
152 | $command |
153 | ->outputFileToString( 'extended' ) |
154 | ->outputFileToString( 'exiv_exit_code' ); |
155 | } |
156 | |
157 | $result = $command->execute(); |
158 | // Record in statsd |
159 | $this->statsdFactory->increment( 'pagedtiffhandler.shell.retrieve_meta_data' ); |
160 | |
161 | $overallExit = $result->getExitCode(); |
162 | if ( $overallExit == 10 ) { |
163 | // tiffinfo failure |
164 | wfDebug( __METHOD__ . ": tiffinfo command failed: {$this->filename}" ); |
165 | return [ 'errors' => [ "tiffinfo command failed: {$this->filename}" ] ]; |
166 | } elseif ( $overallExit == 11 ) { |
167 | // identify failure |
168 | wfDebug( __METHOD__ . ": identify command failed: {$this->filename}" ); |
169 | return [ 'errors' => [ "identify command failed: {$this->filename}" ] ]; |
170 | } |
171 | |
172 | if ( $wgTiffUseTiffinfo ) { |
173 | $this->metadata = $this->parseTiffinfoOutput( $result->getFileContents( 'info' ) ); |
174 | } else { |
175 | $this->metadata = $this->parseIdentifyOutput( $result->getFileContents( 'identified' ) ); |
176 | } |
177 | |
178 | $this->metadata['exif'] = []; |
179 | |
180 | if ( !empty( $this->metadata['errors'] ) ) { |
181 | wfDebug( __METHOD__ . ": found errors, skipping EXIF extraction" ); |
182 | } elseif ( $wgTiffUseExiv ) { |
183 | $exivExit = (int)trim( $result->getFileContents( 'exiv_exit_code' ) ); |
184 | if ( $exivExit != 0 ) { |
185 | // FIXME: $data is immediately overwritten? |
186 | $data = [ 'errors' => [ "exiv command failed: {$this->filename}" ] ]; |
187 | wfDebug( __METHOD__ . ": exiv command failed: {$this->filename}" ); |
188 | // don't fail - we are missing info, just report |
189 | } |
190 | |
191 | $data = $this->parseExiv2Output( $result->getFileContents( 'extended' ) ); |
192 | |
193 | $this->metadata['exif'] = $data; |
194 | } elseif ( $wgShowEXIF ) { |
195 | wfDebug( __METHOD__ . ": using internal Exif( {$this->filename} )" ); |
196 | $this->metadata['exif'] = BitmapMetadataHandler::Tiff( $this->filename ); |
197 | } |
198 | |
199 | unset( $this->metadata['exif']['Image'] ); |
200 | unset( $this->metadata['exif']['filename'] ); |
201 | unset( $this->metadata['exif']['Base filename'] ); |
202 | unset( $this->metadata['exif']['XMLPacket'] ); |
203 | unset( $this->metadata['exif']['ImageResources'] ); |
204 | |
205 | $this->metadata['TIFF_METADATA_VERSION'] = PagedTiffHandler::TIFF_METADATA_VERSION; |
206 | |
207 | return $this->metadata; |
208 | } |
209 | |
210 | /** |
211 | * helper function of retrieveMetaData(). |
212 | * parses shell return from tiffinfo-command into an array. |
213 | * @param string $dump |
214 | * @return array |
215 | */ |
216 | protected function parseTiffinfoOutput( $dump ) { |
217 | global $wgTiffTiffinfoRejectMessages, $wgTiffTiffinfoBypassMessages; |
218 | |
219 | # HACK: width and length are given on a single line... |
220 | $dump = preg_replace( '/ Image Length:/', "\n Image Length:", $dump ); |
221 | $rows = preg_split( '/[\r\n]+\s*/', $dump ); |
222 | |
223 | $state = new PagedTiffInfoParserState(); |
224 | |
225 | $ignoreIFDs = []; |
226 | $ignore = false; |
227 | |
228 | foreach ( $rows as $row ) { |
229 | $row = trim( $row ); |
230 | |
231 | # ignore XML rows |
232 | if ( preg_match( '/^<|^$/', $row ) ) { |
233 | continue; |
234 | } |
235 | |
236 | $error = false; |
237 | |
238 | # handle fatal errors |
239 | foreach ( $wgTiffTiffinfoRejectMessages as $pattern ) { |
240 | if ( preg_match( $pattern, trim( $row ) ) ) { |
241 | $state->addError( $row ); |
242 | $error = true; |
243 | break; |
244 | } |
245 | } |
246 | |
247 | if ( $error ) { |
248 | continue; |
249 | } |
250 | |
251 | $m = []; |
252 | |
253 | if ( preg_match( '/^TIFF Directory at offset 0x[a-f0-9]+ \((\d+)\)/', $row, $m ) ) { |
254 | # new IFD starting, flush previous page |
255 | |
256 | if ( $ignore ) { |
257 | $state->resetPage(); |
258 | } else { |
259 | $ok = $state->finishPage(); |
260 | |
261 | if ( !$ok ) { |
262 | $error = true; |
263 | continue; |
264 | } |
265 | } |
266 | |
267 | # check if the next IFD is to be ignored |
268 | $offset = (int)$m[1]; |
269 | $ignore = !empty( $ignoreIFDs[ $offset ] ); |
270 | } elseif ( preg_match( '#^(TIFF.*?Directory): (.*?/.*?): (.*)#i', $row, $m ) ) { |
271 | # handle warnings |
272 | |
273 | $bypass = false; |
274 | $msg = $m[3]; |
275 | |
276 | foreach ( $wgTiffTiffinfoBypassMessages as $pattern ) { |
277 | if ( preg_match( $pattern, trim( $row ) ) ) { |
278 | $bypass = true; |
279 | break; |
280 | } |
281 | } |
282 | |
283 | if ( !$bypass ) { |
284 | $state->addWarning( $msg ); |
285 | } |
286 | } elseif ( preg_match( '/^\s*(.*?)\s*:\s*(.*?)\s*$/', $row, $m ) ) { |
287 | # handle key/value pair |
288 | |
289 | [ , $key, $value ] = $m; |
290 | |
291 | if ( $key == 'Page Number' && preg_match( '/(\d+)-(\d+)/', $value, $m ) ) { |
292 | $state->setPageProperty( 'page', (string)( (int)$m[1] + 1 ) ); |
293 | } elseif ( $key == 'Samples/Pixel' ) { |
294 | if ( $value == '4' ) { |
295 | $state->setPageProperty( 'alpha', 'true' ); |
296 | } |
297 | } elseif ( $key == 'Extra samples' ) { |
298 | if ( preg_match( '/.*alpha.*/', $value ) ) { |
299 | $state->setPageProperty( 'alpha', 'true' ); |
300 | } |
301 | } elseif ( $key == 'Image Width' || $key == 'PixelXDimension' ) { |
302 | $state->setPageProperty( 'width', (string)( (int)$value ) ); |
303 | } elseif ( $key == 'Image Length' || $key == 'PixelYDimension' ) { |
304 | $state->setPageProperty( 'height', (string)( (int)$value ) ); |
305 | } elseif ( preg_match( '/.*IFDOffset/', $key ) ) { |
306 | # ignore extra IFDs, |
307 | # see <http://www.awaresystems.be/imaging/tiff/tifftags/exififd.html> |
308 | # Note: we assume that we will always see the reference before the actual IFD, |
309 | # so we know which IFDs to ignore |
310 | // Offset is usually in hex |
311 | if ( preg_match( '/^0x[0-9A-Fa-f]+$/', $value ) ) { |
312 | $value = hexdec( substr( $value, 2 ) ); |
313 | } |
314 | $offset = (int)$value; |
315 | $ignoreIFDs[$offset] = true; |
316 | } |
317 | } else { |
318 | // strange line |
319 | } |
320 | |
321 | } |
322 | |
323 | $state->finish( !$ignore ); |
324 | |
325 | return $state->getMetadata(); |
326 | } |
327 | |
328 | /** |
329 | * helper function of retrieveMetaData(). |
330 | * parses shell return from exiv2-command into an array. |
331 | * @param string $dump |
332 | * @return array |
333 | */ |
334 | protected function parseExiv2Output( $dump ) { |
335 | $result = []; |
336 | preg_match_all( '/^(\w+)\s+(.+)$/m', $dump, $result, PREG_SET_ORDER ); |
337 | |
338 | $data = []; |
339 | |
340 | foreach ( $result as $row ) { |
341 | $data[$row[1]] = $row[2]; |
342 | } |
343 | |
344 | return $data; |
345 | } |
346 | |
347 | /** |
348 | * helper function of retrieveMetaData(). |
349 | * parses shell return from identify-command into an array. |
350 | * @param string $dump |
351 | * @return array |
352 | */ |
353 | protected function parseIdentifyOutput( $dump ) { |
354 | global $wgTiffIdentifyRejectMessages, $wgTiffIdentifyBypassMessages; |
355 | |
356 | $state = new PagedTiffInfoParserState(); |
357 | |
358 | if ( strval( $dump ) == '' ) { |
359 | $state->addError( "no metadata" ); |
360 | return $state->getMetadata(); |
361 | } |
362 | |
363 | $infos = null; |
364 | preg_match_all( '/\[BEGIN\](.+?)\[END\]/si', $dump, $infos, PREG_SET_ORDER ); |
365 | // ImageMagick < 6.6.8-10 starts page numbering at 1; >= 6.6.8-10 starts at zero. |
366 | // Handle both and map to one-based page numbers (which are assumed in various other parts |
367 | // of the support for displaying multi-page files). |
368 | $pageSeen = false; |
369 | $pageOffset = 0; |
370 | foreach ( $infos as $info ) { |
371 | $state->resetPage(); |
372 | $lines = explode( "\n", $info[1] ); |
373 | foreach ( $lines as $line ) { |
374 | if ( trim( $line ) == '' ) { |
375 | continue; |
376 | } |
377 | [ $key, $value ] = explode( '=', $line ); |
378 | $key = trim( $key ); |
379 | $value = trim( $value ); |
380 | if ( $key === 'alpha' && $value === '%A' ) { |
381 | continue; |
382 | } |
383 | if ( $key === 'alpha2' && !$state->hasPageProperty( 'alpha' ) ) { |
384 | switch ( $value ) { |
385 | case 'DirectClassRGBMatte': |
386 | case 'DirectClassRGBA': |
387 | $state->setPageProperty( 'alpha', 'true' ); |
388 | break; |
389 | default: |
390 | $state->setPageProperty( 'alpha', 'false' ); |
391 | break; |
392 | } |
393 | continue; |
394 | } |
395 | if ( $key === 'page' ) { |
396 | if ( !$pageSeen ) { |
397 | $pageSeen = true; |
398 | $pageOffset = 1 - intval( $value ); |
399 | } |
400 | if ( $pageOffset !== 0 ) { |
401 | $value = intval( $value ) + $pageOffset; |
402 | } |
403 | } |
404 | $state->setPageProperty( $key, (string)$value ); |
405 | } |
406 | $state->finishPage(); |
407 | } |
408 | |
409 | $dump = preg_replace( '/\[BEGIN\](.+?)\[END\]/si', '', $dump ); |
410 | if ( strlen( $dump ) ) { |
411 | $errors = explode( "\n", $dump ); |
412 | foreach ( $errors as $error ) { |
413 | $error = trim( $error ); |
414 | if ( $error === '' ) { |
415 | continue; |
416 | } |
417 | |
418 | $knownError = false; |
419 | foreach ( $wgTiffIdentifyRejectMessages as $msg ) { |
420 | if ( preg_match( $msg, trim( $error ) ) ) { |
421 | $state->addError( $error ); |
422 | $knownError = true; |
423 | break; |
424 | } |
425 | } |
426 | if ( !$knownError ) { |
427 | // ignore messages that match $wgTiffIdentifyBypassMessages |
428 | foreach ( $wgTiffIdentifyBypassMessages as $msg ) { |
429 | if ( preg_match( $msg, trim( $error ) ) ) { |
430 | $knownError = true; |
431 | break; |
432 | } |
433 | } |
434 | } |
435 | if ( !$knownError ) { |
436 | $state->addWarning( $error ); |
437 | } |
438 | } |
439 | } |
440 | |
441 | $state->finish(); |
442 | |
443 | return $state->getMetadata(); |
444 | } |
445 | } |