Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 200 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
PagedTiffImage | |
0.00% |
0 / 200 |
|
0.00% |
0 / 9 |
5256 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
isValid | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getImageSize | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
getPageSize | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
resetMetaData | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
retrieveMetaData | |
0.00% |
0 / 61 |
|
0.00% |
0 / 1 |
182 | |||
parseTiffinfoOutput | |
0.00% |
0 / 57 |
|
0.00% |
0 / 1 |
702 | |||
parseExiv2Output | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
parseIdentifyOutput | |
0.00% |
0 / 57 |
|
0.00% |
0 / 1 |
600 |
1 | <?php |
2 | /** |
3 | * Copyright © Wikimedia Deutschland, 2009 |
4 | * Authors Hallo Welt! Medienwerkstatt GmbH |
5 | * Authors Sebastian Ulbricht, Daniel Lynge, Marc Reymann, Markus Glaser |
6 | * |
7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; either version 2 of the License, or |
10 | * (at your option) any later version. |
11 | * |
12 | * This program is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | * GNU General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU General Public License along |
18 | * with this program; if not, write to the Free Software Foundation, Inc., |
19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
20 | * http://www.gnu.org/copyleft/gpl.html |
21 | */ |
22 | |
23 | namespace MediaWiki\Extension\PagedTiffHandler; |
24 | |
25 | use BitmapMetadataHandler; |
26 | use MediaWiki\Shell\CommandFactory; |
27 | use Wikimedia\Stats\StatsFactory; |
28 | |
29 | /** |
30 | * inspired by djvuimage from Brion Vibber |
31 | * modified and written by xarax |
32 | * adapted to tiff by Hallo Welt! - Medienwerkstatt GmbH |
33 | */ |
34 | |
35 | class PagedTiffImage { |
36 | /** @var array|null */ |
37 | private $metadata = null; |
38 | |
39 | /** @var string */ |
40 | private $filename; |
41 | |
42 | /** @var CommandFactory */ |
43 | private $commandFactory; |
44 | |
45 | /** @var StatsFactory */ |
46 | private $statsFactory; |
47 | |
48 | /** |
49 | * @param CommandFactory $commandFactory |
50 | * @param StatsFactory $statsFactory |
51 | * @param string $filename |
52 | */ |
53 | public function __construct( CommandFactory $commandFactory, StatsFactory $statsFactory, $filename ) { |
54 | $this->commandFactory = $commandFactory; |
55 | $this->statsFactory = $statsFactory; |
56 | $this->filename = $filename; |
57 | } |
58 | |
59 | /** |
60 | * Called by MimeMagick functions. |
61 | * @return int |
62 | */ |
63 | public function isValid() { |
64 | return count( $this->retrieveMetaData() ); |
65 | } |
66 | |
67 | /** |
68 | * Returns an array that corresponds to the native PHP function getimagesize(). |
69 | * @return array|false |
70 | */ |
71 | public function getImageSize() { |
72 | $data = $this->retrieveMetaData(); |
73 | $size = self::getPageSize( $data, 1 ); |
74 | |
75 | if ( $size ) { |
76 | $width = $size['width']; |
77 | $height = $size['height']; |
78 | return [ $width, $height, 'Tiff', |
79 | "width=\"$width\" height=\"$height\"" ]; |
80 | } |
81 | return false; |
82 | } |
83 | |
84 | /** |
85 | * Returns an array with width and height of the tiff page. |
86 | * @param array $data |
87 | * @param int $page |
88 | * @return array|false |
89 | */ |
90 | public static function getPageSize( $data, $page ) { |
91 | if ( isset( $data['page_data'][$page] ) ) { |
92 | return [ |
93 | 'width' => intval( $data['page_data'][$page]['width'] ), |
94 | 'height' => intval( $data['page_data'][$page]['height'] ) |
95 | ]; |
96 | } |
97 | return false; |
98 | } |
99 | |
100 | public function resetMetaData() { |
101 | $this->metadata = null; |
102 | } |
103 | |
104 | /** |
105 | * Reads metadata of the tiff file via shell command and returns an associative array. |
106 | * @return array Associative array. Layout: |
107 | * meta['page_count'] = number of pages |
108 | * meta['first_page'] = number of first page |
109 | * meta['last_page'] = number of last page |
110 | * meta['page_data'] = metadata per page |
111 | * meta['exif'] = Exif, XMP and IPTC |
112 | * meta['errors'] = identify-errors |
113 | * meta['warnings'] = identify-warnings |
114 | */ |
115 | public function retrieveMetaData() { |
116 | global $wgImageMagickIdentifyCommand, $wgExiv2Command, $wgTiffUseExiv; |
117 | global $wgTiffUseTiffinfo, $wgTiffTiffinfoCommand; |
118 | global $wgShowEXIF, $wgShellboxShell; |
119 | |
120 | if ( $this->metadata !== null ) { |
121 | return $this->metadata; |
122 | } |
123 | |
124 | $command = $this->commandFactory |
125 | ->createBoxed( 'pagedtiffhandler' ) |
126 | ->disableNetwork() |
127 | ->firejailDefaultSeccomp() |
128 | ->routeName( 'pagedtiffhandler-metadata' ); |
129 | $command |
130 | ->params( $wgShellboxShell, 'scripts/retrieveMetaData.sh' ) |
131 | ->inputFileFromFile( |
132 | 'scripts/retrieveMetaData.sh', |
133 | __DIR__ . '/../scripts/retrieveMetaData.sh' ) |
134 | ->inputFileFromFile( 'file.tiff', $this->filename ) |
135 | ->environment( [ |
136 | 'TIFF_USETIFFINFO' => $wgTiffUseTiffinfo ? 'yes' : 'no', |
137 | 'TIFF_TIFFINFO' => $wgTiffTiffinfoCommand, |
138 | 'TIFF_IDENTIFY' => $wgImageMagickIdentifyCommand, |
139 | 'TIFF_USEEXIV' => $wgTiffUseExiv ? 'yes' : 'no', |
140 | 'TIFF_EXIV2' => $wgExiv2Command, |
141 | ] ); |
142 | if ( $wgTiffUseTiffinfo ) { |
143 | $command->outputFileToString( 'info' ); |
144 | } else { |
145 | $command->outputFileToString( 'identified' ); |
146 | } |
147 | if ( $wgTiffUseExiv ) { |
148 | $command |
149 | ->outputFileToString( 'extended' ) |
150 | ->outputFileToString( 'exiv_exit_code' ); |
151 | } |
152 | |
153 | $result = $command->execute(); |
154 | // Record in statsd |
155 | $this->statsFactory->getCounter( 'pagedtiffhandler_shell_retrievemetadata_total' ) |
156 | ->copyToStatsdAt( 'pagedtiffhandler.shell.retrieve_meta_data' ) |
157 | ->increment(); |
158 | |
159 | $overallExit = $result->getExitCode(); |
160 | if ( $overallExit == 10 ) { |
161 | // tiffinfo failure |
162 | wfDebug( __METHOD__ . ": tiffinfo command failed: {$this->filename}" ); |
163 | return [ 'errors' => [ "tiffinfo command failed: {$this->filename}" ] ]; |
164 | } elseif ( $overallExit == 11 ) { |
165 | // identify failure |
166 | wfDebug( __METHOD__ . ": identify command failed: {$this->filename}" ); |
167 | return [ 'errors' => [ "identify command failed: {$this->filename}" ] ]; |
168 | } |
169 | |
170 | if ( $wgTiffUseTiffinfo ) { |
171 | $this->metadata = $this->parseTiffinfoOutput( $result->getFileContents( 'info' ) ); |
172 | } else { |
173 | $this->metadata = $this->parseIdentifyOutput( $result->getFileContents( 'identified' ) ); |
174 | } |
175 | |
176 | $this->metadata['exif'] = []; |
177 | |
178 | if ( !empty( $this->metadata['errors'] ) ) { |
179 | wfDebug( __METHOD__ . ": found errors, skipping EXIF extraction" ); |
180 | } elseif ( $wgTiffUseExiv ) { |
181 | $exivExit = (int)trim( $result->getFileContents( 'exiv_exit_code' ) ); |
182 | if ( $exivExit != 0 ) { |
183 | // FIXME: $data is immediately overwritten? |
184 | $data = [ 'errors' => [ "exiv command failed: {$this->filename}" ] ]; |
185 | wfDebug( __METHOD__ . ": exiv command failed: {$this->filename}" ); |
186 | // don't fail - we are missing info, just report |
187 | } |
188 | |
189 | $data = $this->parseExiv2Output( $result->getFileContents( 'extended' ) ); |
190 | |
191 | $this->metadata['exif'] = $data; |
192 | } elseif ( $wgShowEXIF ) { |
193 | wfDebug( __METHOD__ . ": using internal Exif( {$this->filename} )" ); |
194 | $this->metadata['exif'] = BitmapMetadataHandler::Tiff( $this->filename ); |
195 | } |
196 | |
197 | unset( $this->metadata['exif']['Image'] ); |
198 | unset( $this->metadata['exif']['filename'] ); |
199 | unset( $this->metadata['exif']['Base filename'] ); |
200 | unset( $this->metadata['exif']['XMLPacket'] ); |
201 | unset( $this->metadata['exif']['ImageResources'] ); |
202 | |
203 | $this->metadata['TIFF_METADATA_VERSION'] = PagedTiffHandler::TIFF_METADATA_VERSION; |
204 | |
205 | return $this->metadata; |
206 | } |
207 | |
208 | /** |
209 | * helper function of retrieveMetaData(). |
210 | * parses shell return from tiffinfo-command into an array. |
211 | * @param string $dump |
212 | * @return array |
213 | */ |
214 | protected function parseTiffinfoOutput( $dump ) { |
215 | global $wgTiffTiffinfoRejectMessages, $wgTiffTiffinfoBypassMessages; |
216 | |
217 | # HACK: width and length are given on a single line... |
218 | $dump = preg_replace( '/ Image Length:/', "\n Image Length:", $dump ); |
219 | $rows = preg_split( '/[\r\n]+\s*/', $dump ); |
220 | |
221 | $state = new PagedTiffInfoParserState(); |
222 | |
223 | $ignoreIFDs = []; |
224 | $ignore = false; |
225 | |
226 | foreach ( $rows as $row ) { |
227 | $row = trim( $row ); |
228 | |
229 | # ignore XML rows |
230 | if ( preg_match( '/^<|^$/', $row ) ) { |
231 | continue; |
232 | } |
233 | |
234 | $error = false; |
235 | |
236 | # handle fatal errors |
237 | foreach ( $wgTiffTiffinfoRejectMessages as $pattern ) { |
238 | if ( preg_match( $pattern, trim( $row ) ) ) { |
239 | $state->addError( $row ); |
240 | $error = true; |
241 | break; |
242 | } |
243 | } |
244 | |
245 | if ( $error ) { |
246 | continue; |
247 | } |
248 | |
249 | $m = []; |
250 | |
251 | if ( preg_match( '/^TIFF Directory at offset 0x[a-f0-9]+ \((\d+)\)/', $row, $m ) ) { |
252 | # new IFD starting, flush previous page |
253 | |
254 | if ( $ignore ) { |
255 | $state->resetPage(); |
256 | } else { |
257 | $ok = $state->finishPage(); |
258 | |
259 | if ( !$ok ) { |
260 | $error = true; |
261 | continue; |
262 | } |
263 | } |
264 | |
265 | # check if the next IFD is to be ignored |
266 | $offset = (int)$m[1]; |
267 | $ignore = !empty( $ignoreIFDs[ $offset ] ); |
268 | } elseif ( preg_match( '#^(TIFF.*?Directory): (.*?/.*?): (.*)#i', $row, $m ) ) { |
269 | # handle warnings |
270 | |
271 | $bypass = false; |
272 | $msg = $m[3]; |
273 | |
274 | foreach ( $wgTiffTiffinfoBypassMessages as $pattern ) { |
275 | if ( preg_match( $pattern, trim( $row ) ) ) { |
276 | $bypass = true; |
277 | break; |
278 | } |
279 | } |
280 | |
281 | if ( !$bypass ) { |
282 | $state->addWarning( $msg ); |
283 | } |
284 | } elseif ( preg_match( '/^\s*(.*?)\s*:\s*(.*?)\s*$/', $row, $m ) ) { |
285 | # handle key/value pair |
286 | |
287 | [ , $key, $value ] = $m; |
288 | |
289 | if ( $key == 'Page Number' && preg_match( '/(\d+)-(\d+)/', $value, $m ) ) { |
290 | $state->setPageProperty( 'page', (string)( (int)$m[1] + 1 ) ); |
291 | } elseif ( $key == 'Samples/Pixel' ) { |
292 | if ( $value == '4' ) { |
293 | $state->setPageProperty( 'alpha', 'true' ); |
294 | } |
295 | } elseif ( $key == 'Extra samples' ) { |
296 | if ( preg_match( '/.*alpha.*/', $value ) ) { |
297 | $state->setPageProperty( 'alpha', 'true' ); |
298 | } |
299 | } elseif ( $key == 'Image Width' || $key == 'PixelXDimension' ) { |
300 | $state->setPageProperty( 'width', (string)( (int)$value ) ); |
301 | } elseif ( $key == 'Image Length' || $key == 'PixelYDimension' ) { |
302 | $state->setPageProperty( 'height', (string)( (int)$value ) ); |
303 | } elseif ( preg_match( '/.*IFDOffset/', $key ) ) { |
304 | # ignore extra IFDs, |
305 | # see <http://www.awaresystems.be/imaging/tiff/tifftags/exififd.html> |
306 | # Note: we assume that we will always see the reference before the actual IFD, |
307 | # so we know which IFDs to ignore |
308 | // Offset is usually in hex |
309 | if ( preg_match( '/^0x[0-9A-Fa-f]+$/', $value ) ) { |
310 | $value = hexdec( substr( $value, 2 ) ); |
311 | } |
312 | $offset = (int)$value; |
313 | $ignoreIFDs[$offset] = true; |
314 | } |
315 | } else { |
316 | // strange line |
317 | } |
318 | |
319 | } |
320 | |
321 | $state->finish( !$ignore ); |
322 | |
323 | return $state->getMetadata(); |
324 | } |
325 | |
326 | /** |
327 | * helper function of retrieveMetaData(). |
328 | * parses shell return from exiv2-command into an array. |
329 | * @param string $dump |
330 | * @return array |
331 | */ |
332 | protected function parseExiv2Output( $dump ) { |
333 | $result = []; |
334 | preg_match_all( '/^(\w+)\s+(.+)$/m', $dump, $result, PREG_SET_ORDER ); |
335 | |
336 | $data = []; |
337 | |
338 | foreach ( $result as $row ) { |
339 | $data[$row[1]] = $row[2]; |
340 | } |
341 | |
342 | return $data; |
343 | } |
344 | |
345 | /** |
346 | * helper function of retrieveMetaData(). |
347 | * parses shell return from identify-command into an array. |
348 | * @param string $dump |
349 | * @return array |
350 | */ |
351 | protected function parseIdentifyOutput( $dump ) { |
352 | global $wgTiffIdentifyRejectMessages, $wgTiffIdentifyBypassMessages; |
353 | |
354 | $state = new PagedTiffInfoParserState(); |
355 | |
356 | if ( strval( $dump ) == '' ) { |
357 | $state->addError( "no metadata" ); |
358 | return $state->getMetadata(); |
359 | } |
360 | |
361 | $infos = null; |
362 | preg_match_all( '/\[BEGIN\](.+?)\[END\]/si', $dump, $infos, PREG_SET_ORDER ); |
363 | // ImageMagick < 6.6.8-10 starts page numbering at 1; >= 6.6.8-10 starts at zero. |
364 | // Handle both and map to one-based page numbers (which are assumed in various other parts |
365 | // of the support for displaying multi-page files). |
366 | $pageSeen = false; |
367 | $pageOffset = 0; |
368 | foreach ( $infos as $info ) { |
369 | $state->resetPage(); |
370 | $lines = explode( "\n", $info[1] ); |
371 | foreach ( $lines as $line ) { |
372 | if ( trim( $line ) == '' ) { |
373 | continue; |
374 | } |
375 | [ $key, $value ] = explode( '=', $line ); |
376 | $key = trim( $key ); |
377 | $value = trim( $value ); |
378 | if ( $key === 'alpha' && $value === '%A' ) { |
379 | continue; |
380 | } |
381 | if ( $key === 'alpha2' && !$state->hasPageProperty( 'alpha' ) ) { |
382 | switch ( $value ) { |
383 | case 'DirectClassRGBMatte': |
384 | case 'DirectClassRGBA': |
385 | $state->setPageProperty( 'alpha', 'true' ); |
386 | break; |
387 | default: |
388 | $state->setPageProperty( 'alpha', 'false' ); |
389 | break; |
390 | } |
391 | continue; |
392 | } |
393 | if ( $key === 'page' ) { |
394 | if ( !$pageSeen ) { |
395 | $pageSeen = true; |
396 | $pageOffset = 1 - intval( $value ); |
397 | } |
398 | if ( $pageOffset !== 0 ) { |
399 | $value = intval( $value ) + $pageOffset; |
400 | } |
401 | } |
402 | $state->setPageProperty( $key, (string)$value ); |
403 | } |
404 | $state->finishPage(); |
405 | } |
406 | |
407 | $dump = preg_replace( '/\[BEGIN\](.+?)\[END\]/si', '', $dump ); |
408 | if ( strlen( $dump ) ) { |
409 | $errors = explode( "\n", $dump ); |
410 | foreach ( $errors as $error ) { |
411 | $error = trim( $error ); |
412 | if ( $error === '' ) { |
413 | continue; |
414 | } |
415 | |
416 | $knownError = false; |
417 | foreach ( $wgTiffIdentifyRejectMessages as $msg ) { |
418 | if ( preg_match( $msg, trim( $error ) ) ) { |
419 | $state->addError( $error ); |
420 | $knownError = true; |
421 | break; |
422 | } |
423 | } |
424 | if ( !$knownError ) { |
425 | // ignore messages that match $wgTiffIdentifyBypassMessages |
426 | foreach ( $wgTiffIdentifyBypassMessages as $msg ) { |
427 | if ( preg_match( $msg, trim( $error ) ) ) { |
428 | $knownError = true; |
429 | break; |
430 | } |
431 | } |
432 | } |
433 | if ( !$knownError ) { |
434 | $state->addWarning( $error ); |
435 | } |
436 | } |
437 | } |
438 | |
439 | $state->finish(); |
440 | |
441 | return $state->getMetadata(); |
442 | } |
443 | } |