Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
67.13% |
145 / 216 |
|
0.00% |
0 / 2 |
CRAP | |
0.00% |
0 / 1 |
| PNGMetadataExtractor | |
67.44% |
145 / 215 |
|
0.00% |
0 / 2 |
194.67 | |
0.00% |
0 / 1 |
| getMetadata | |
67.15% |
139 / 207 |
|
0.00% |
0 / 1 |
177.25 | |||
| read | |
75.00% |
6 / 8 |
|
0.00% |
0 / 1 |
4.25 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * PNG frame counter and metadata extractor. |
| 4 | * |
| 5 | * Slightly derived from GIFMetadataExtractor.php |
| 6 | * Deliberately not using MWExceptions to avoid external dependencies, encouraging |
| 7 | * redistribution. |
| 8 | * |
| 9 | * @license GPL-2.0-or-later |
| 10 | * @file |
| 11 | * @ingroup Media |
| 12 | */ |
| 13 | |
| 14 | namespace MediaWiki\Media; |
| 15 | |
| 16 | use InvalidArgumentException; |
| 17 | use Wikimedia\Timestamp\TimestampFormat as TS; |
| 18 | |
| 19 | /** |
| 20 | * PNG frame counter. |
| 21 | * |
| 22 | * @ingroup Media |
| 23 | */ |
| 24 | class PNGMetadataExtractor { |
| 25 | /** @var int */ |
| 26 | private static $crcSize; |
| 27 | |
| 28 | /** @var array */ |
| 29 | private static $textChunks; |
| 30 | |
| 31 | public const VERSION = 1; |
| 32 | private const MAX_CHUNK_SIZE = 3_145_728; // 3 mebibytes |
| 33 | |
| 34 | /** |
| 35 | * @param string $filename |
| 36 | * @return array |
| 37 | */ |
| 38 | public static function getMetadata( $filename ) { |
| 39 | self::$crcSize = 4; |
| 40 | /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData |
| 41 | * and https://www.w3.org/TR/PNG/#11keywords |
| 42 | */ |
| 43 | self::$textChunks = [ |
| 44 | 'xml:com.adobe.xmp' => 'xmp', |
| 45 | # Artist is unofficial. Author is the recommended |
| 46 | # keyword in the PNG spec. However some people output |
| 47 | # Artist so support both. |
| 48 | 'artist' => 'Artist', |
| 49 | 'model' => 'Model', |
| 50 | 'make' => 'Make', |
| 51 | 'author' => 'Artist', |
| 52 | 'comment' => 'PNGFileComment', |
| 53 | 'description' => 'ImageDescription', |
| 54 | 'title' => 'ObjectName', |
| 55 | 'copyright' => 'Copyright', |
| 56 | # Source as in original device used to make image |
| 57 | # not as in who gave you the image |
| 58 | 'source' => 'Model', |
| 59 | 'software' => 'Software', |
| 60 | 'disclaimer' => 'Disclaimer', |
| 61 | 'warning' => 'ContentWarning', |
| 62 | 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement. |
| 63 | 'label' => 'Label', |
| 64 | 'creation time' => 'DateTimeDigitized', |
| 65 | /* Other potentially useful things - Document */ |
| 66 | ]; |
| 67 | |
| 68 | $frameCount = 0; |
| 69 | $loopCount = 1; |
| 70 | $text = []; |
| 71 | $duration = 0.0; |
| 72 | $width = 0; |
| 73 | $height = 0; |
| 74 | $bitDepth = 0; |
| 75 | $colorType = 'unknown'; |
| 76 | $exif = null; |
| 77 | |
| 78 | if ( !$filename ) { |
| 79 | throw new InvalidArgumentException( __METHOD__ . ": No file name specified" ); |
| 80 | } |
| 81 | |
| 82 | if ( !file_exists( $filename ) || is_dir( $filename ) ) { |
| 83 | throw new InvalidArgumentException( __METHOD__ . ": File $filename does not exist" ); |
| 84 | } |
| 85 | |
| 86 | $fh = fopen( $filename, 'rb' ); |
| 87 | |
| 88 | if ( !$fh ) { |
| 89 | throw new InvalidArgumentException( __METHOD__ . ": Unable to open file $filename" ); |
| 90 | } |
| 91 | |
| 92 | // Check for the PNG header |
| 93 | $buf = self::read( $fh, 8 ); |
| 94 | if ( $buf !== "\x89PNG\x0d\x0a\x1a\x0a" ) { |
| 95 | throw new InvalidArgumentException( __METHOD__ . ": Not a valid PNG file; header: $buf" ); |
| 96 | } |
| 97 | |
| 98 | // Read chunks |
| 99 | while ( !feof( $fh ) ) { |
| 100 | $buf = self::read( $fh, 4 ); |
| 101 | $chunk_size = unpack( "N", $buf )[1]; |
| 102 | |
| 103 | if ( $chunk_size < 0 || $chunk_size > self::MAX_CHUNK_SIZE ) { |
| 104 | wfDebug( __METHOD__ . ': Chunk size of ' . $chunk_size . |
| 105 | ' too big, skipping. Max size is: ' . self::MAX_CHUNK_SIZE ); |
| 106 | if ( fseek( $fh, 4 + $chunk_size + self::$crcSize, SEEK_CUR ) !== 0 ) { |
| 107 | throw new InvalidArgumentException( __METHOD__ . ': seek error' ); |
| 108 | } |
| 109 | continue; |
| 110 | } |
| 111 | |
| 112 | $chunk_type = self::read( $fh, 4 ); |
| 113 | $buf = self::read( $fh, $chunk_size ); |
| 114 | $crc = self::read( $fh, self::$crcSize ); |
| 115 | $computed = crc32( $chunk_type . $buf ); |
| 116 | if ( pack( 'N', $computed ) !== $crc ) { |
| 117 | wfDebug( __METHOD__ . ': chunk has invalid CRC, skipping' ); |
| 118 | continue; |
| 119 | } |
| 120 | |
| 121 | if ( $chunk_type === "IHDR" ) { |
| 122 | $width = unpack( 'N', substr( $buf, 0, 4 ) )[1]; |
| 123 | $height = unpack( 'N', substr( $buf, 4, 4 ) )[1]; |
| 124 | $bitDepth = ord( substr( $buf, 8, 1 ) ); |
| 125 | // Detect the color type in British English as per the spec |
| 126 | // https://www.w3.org/TR/PNG/#11IHDR |
| 127 | $colorType = match ( ord( substr( $buf, 9, 1 ) ) ) { |
| 128 | 0 => 'greyscale', |
| 129 | 2 => 'truecolour', |
| 130 | 3 => 'index-coloured', |
| 131 | 4 => 'greyscale-alpha', |
| 132 | 6 => 'truecolour-alpha', |
| 133 | default => 'unknown' |
| 134 | }; |
| 135 | } elseif ( $chunk_type === "acTL" ) { |
| 136 | if ( $chunk_size < 4 ) { |
| 137 | wfDebug( __METHOD__ . ": acTL chunk too small" ); |
| 138 | continue; |
| 139 | } |
| 140 | |
| 141 | $actl = unpack( "Nframes/Nplays", $buf ); |
| 142 | $frameCount = $actl['frames']; |
| 143 | $loopCount = $actl['plays']; |
| 144 | } elseif ( $chunk_type === "fcTL" ) { |
| 145 | $buf = substr( $buf, 20 ); |
| 146 | if ( strlen( $buf ) < 4 ) { |
| 147 | wfDebug( __METHOD__ . ": fcTL chunk too small" ); |
| 148 | continue; |
| 149 | } |
| 150 | |
| 151 | $fctldur = unpack( "ndelay_num/ndelay_den", $buf ); |
| 152 | if ( $fctldur['delay_den'] == 0 ) { |
| 153 | $fctldur['delay_den'] = 100; |
| 154 | } |
| 155 | if ( $fctldur['delay_num'] ) { |
| 156 | $duration += $fctldur['delay_num'] / $fctldur['delay_den']; |
| 157 | } |
| 158 | } elseif ( $chunk_type === "iTXt" ) { |
| 159 | // Extracts iTXt chunks, uncompressing if necessary. |
| 160 | $items = []; |
| 161 | if ( preg_match( |
| 162 | '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds', |
| 163 | $buf, $items ) |
| 164 | ) { |
| 165 | /* $items[1] = text chunk name, $items[2] = compressed flag, |
| 166 | * $items[3] = lang code (or ""), $items[4]= compression type. |
| 167 | * $items[5] = content |
| 168 | */ |
| 169 | |
| 170 | // Theoretically should be case-sensitive, but in practise... |
| 171 | $items[1] = strtolower( $items[1] ); |
| 172 | if ( !isset( self::$textChunks[$items[1]] ) ) { |
| 173 | // Only extract textual chunks on our list. |
| 174 | continue; |
| 175 | } |
| 176 | |
| 177 | $items[3] = strtolower( $items[3] ); |
| 178 | if ( $items[3] == '' ) { |
| 179 | // if no lang specified use x-default like in xmp. |
| 180 | $items[3] = 'x-default'; |
| 181 | } |
| 182 | |
| 183 | // if compressed |
| 184 | if ( $items[2] === "\x01" ) { |
| 185 | if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) { |
| 186 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
| 187 | $items[5] = @gzuncompress( $items[5] ); |
| 188 | |
| 189 | if ( $items[5] === false ) { |
| 190 | // decompression failed |
| 191 | wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] ); |
| 192 | continue; |
| 193 | } |
| 194 | } else { |
| 195 | wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,' |
| 196 | . " or potentially invalid compression method" ); |
| 197 | continue; |
| 198 | } |
| 199 | } |
| 200 | $finalKeyword = self::$textChunks[$items[1]]; |
| 201 | $text[$finalKeyword][$items[3]] = $items[5]; |
| 202 | $text[$finalKeyword]['_type'] = 'lang'; |
| 203 | } else { |
| 204 | // Error reading iTXt chunk |
| 205 | wfDebug( __METHOD__ . ": Invalid iTXt chunk" ); |
| 206 | } |
| 207 | } elseif ( $chunk_type === 'tEXt' ) { |
| 208 | // In case there is no \x00 which will make explode fail. |
| 209 | if ( !str_contains( $buf, "\x00" ) ) { |
| 210 | wfDebug( __METHOD__ . ": Invalid tEXt chunk: no null byte" ); |
| 211 | continue; |
| 212 | } |
| 213 | |
| 214 | [ $keyword, $content ] = explode( "\x00", $buf, 2 ); |
| 215 | if ( $keyword === '' ) { |
| 216 | wfDebug( __METHOD__ . ": Empty tEXt keyword" ); |
| 217 | continue; |
| 218 | } |
| 219 | |
| 220 | // Theoretically should be case-sensitive, but in practise... |
| 221 | $keyword = strtolower( $keyword ); |
| 222 | if ( !isset( self::$textChunks[$keyword] ) ) { |
| 223 | // Don't recognize chunk, so skip. |
| 224 | continue; |
| 225 | } |
| 226 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
| 227 | $content = @iconv( 'ISO-8859-1', 'UTF-8', $content ); |
| 228 | |
| 229 | if ( $content === false ) { |
| 230 | wfDebug( __METHOD__ . ": Read error (error with iconv)" ); |
| 231 | continue; |
| 232 | } |
| 233 | |
| 234 | $finalKeyword = self::$textChunks[$keyword]; |
| 235 | $text[$finalKeyword]['x-default'] = $content; |
| 236 | $text[$finalKeyword]['_type'] = 'lang'; |
| 237 | } elseif ( $chunk_type === 'zTXt' ) { |
| 238 | if ( function_exists( 'gzuncompress' ) ) { |
| 239 | // In case there is no \x00 which will make explode fail. |
| 240 | if ( !str_contains( $buf, "\x00" ) ) { |
| 241 | wfDebug( __METHOD__ . ": No null byte in zTXt chunk" ); |
| 242 | continue; |
| 243 | } |
| 244 | |
| 245 | [ $keyword, $postKeyword ] = explode( "\x00", $buf, 2 ); |
| 246 | if ( $keyword === '' || $postKeyword === '' ) { |
| 247 | wfDebug( __METHOD__ . ": Empty zTXt chunk" ); |
| 248 | continue; |
| 249 | } |
| 250 | // Theoretically should be case-sensitive, but in practise... |
| 251 | $keyword = strtolower( $keyword ); |
| 252 | |
| 253 | if ( !isset( self::$textChunks[$keyword] ) ) { |
| 254 | // Don't recognize chunk, so skip. |
| 255 | continue; |
| 256 | } |
| 257 | $compression = substr( $postKeyword, 0, 1 ); |
| 258 | $content = substr( $postKeyword, 1 ); |
| 259 | if ( $compression !== "\x00" ) { |
| 260 | wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." ); |
| 261 | continue; |
| 262 | } |
| 263 | |
| 264 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
| 265 | $content = @gzuncompress( $content ); |
| 266 | |
| 267 | if ( $content === false ) { |
| 268 | // decompression failed |
| 269 | wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword ); |
| 270 | continue; |
| 271 | } |
| 272 | |
| 273 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
| 274 | $content = @iconv( 'ISO-8859-1', 'UTF-8', $content ); |
| 275 | |
| 276 | if ( $content === false ) { |
| 277 | wfDebug( __METHOD__ . ": iconv error in zTXt chunk" ); |
| 278 | continue; |
| 279 | } |
| 280 | |
| 281 | $finalKeyword = self::$textChunks[$keyword]; |
| 282 | $text[$finalKeyword]['x-default'] = $content; |
| 283 | $text[$finalKeyword]['_type'] = 'lang'; |
| 284 | } else { |
| 285 | wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." ); |
| 286 | } |
| 287 | } elseif ( $chunk_type === 'tIME' ) { |
| 288 | // last mod timestamp. |
| 289 | if ( $chunk_size !== 7 ) { |
| 290 | wfDebug( __METHOD__ . ": tIME wrong size" ); |
| 291 | continue; |
| 292 | } |
| 293 | |
| 294 | // Note: spec says this should be UTC. |
| 295 | $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf ); |
| 296 | $strTime = sprintf( "%04d%02d%02d%02d%02d%02d", |
| 297 | $t['y'], $t['m'], $t['d'], $t['h'], |
| 298 | $t['min'], $t['s'] ); |
| 299 | |
| 300 | $exifTime = wfTimestamp( TS::EXIF, $strTime ); |
| 301 | |
| 302 | if ( $exifTime ) { |
| 303 | $text['DateTime'] = $exifTime; |
| 304 | } |
| 305 | } elseif ( $chunk_type === 'pHYs' ) { |
| 306 | // how big pixels are (dots per meter). |
| 307 | if ( $chunk_size !== 9 ) { |
| 308 | wfDebug( __METHOD__ . ": pHYs wrong size" ); |
| 309 | continue; |
| 310 | } |
| 311 | |
| 312 | $dim = unpack( "Nwidth/Nheight/Cunit", $buf ); |
| 313 | if ( $dim['unit'] === 1 ) { |
| 314 | // Need to check for negative because php |
| 315 | // doesn't deal with super-large unsigned 32-bit ints well |
| 316 | if ( $dim['width'] > 0 && $dim['height'] > 0 ) { |
| 317 | // unit is meters |
| 318 | // (as opposed to 0 = undefined ) |
| 319 | $text['XResolution'] = $dim['width'] |
| 320 | . '/100'; |
| 321 | $text['YResolution'] = $dim['height'] |
| 322 | . '/100'; |
| 323 | $text['ResolutionUnit'] = 3; |
| 324 | // 3 = dots per cm (from Exif). |
| 325 | } |
| 326 | } |
| 327 | } elseif ( $chunk_type === "eXIf" ) { |
| 328 | // There are 4 competing ways to store Exif |
| 329 | // in a PNG file. This is the official one. |
| 330 | if ( |
| 331 | $chunk_size < 4 || ( |
| 332 | substr( $buf, 0, 4 ) !== "II\x2A\x00" && |
| 333 | substr( $buf, 0, 4 ) !== "MM\x00\x2A" |
| 334 | ) |
| 335 | ) { |
| 336 | wfDebug( __METHOD__ . ": Invalid eXIf tag" ); |
| 337 | } |
| 338 | $exif = $buf; |
| 339 | } elseif ( $chunk_type === "IEND" ) { |
| 340 | break; |
| 341 | } |
| 342 | } |
| 343 | fclose( $fh ); |
| 344 | |
| 345 | if ( $loopCount > 1 ) { |
| 346 | $duration *= $loopCount; |
| 347 | } |
| 348 | |
| 349 | if ( isset( $text['DateTimeDigitized'] ) ) { |
| 350 | // Convert date format from rfc2822 to exif. |
| 351 | foreach ( $text['DateTimeDigitized'] as $name => &$value ) { |
| 352 | if ( $name === '_type' ) { |
| 353 | continue; |
| 354 | } |
| 355 | |
| 356 | // @todo FIXME: Currently timezones are ignored. |
| 357 | // possibly should be wfTimestamp's |
| 358 | // responsibility. (at least for numeric TZ) |
| 359 | $formatted = wfTimestamp( TS::EXIF, $value ); |
| 360 | if ( $formatted ) { |
| 361 | // Only change if we could convert the |
| 362 | // date. |
| 363 | // The png standard says it should be |
| 364 | // in rfc2822 format, but not required. |
| 365 | // In general for the exif stuff we |
| 366 | // prettify the date if we can, but we |
| 367 | // display as-is if we cannot or if |
| 368 | // it is invalid. |
| 369 | // So do the same here. |
| 370 | |
| 371 | $value = $formatted; |
| 372 | } |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | return [ |
| 377 | 'width' => $width, |
| 378 | 'height' => $height, |
| 379 | 'frameCount' => $frameCount, |
| 380 | 'loopCount' => $loopCount, |
| 381 | 'duration' => $duration, |
| 382 | 'text' => $text, |
| 383 | 'bitDepth' => $bitDepth, |
| 384 | 'colorType' => $colorType, |
| 385 | 'exif' => $exif, |
| 386 | ]; |
| 387 | } |
| 388 | |
| 389 | /** |
| 390 | * Read a chunk, checking to make sure its not too big. |
| 391 | * |
| 392 | * @param resource $fh The file handle |
| 393 | * @param int $size Size in bytes. |
| 394 | * @throws \Exception If too big |
| 395 | * @return string The chunk. |
| 396 | */ |
| 397 | private static function read( $fh, $size ) { |
| 398 | if ( $size === 0 ) { |
| 399 | return ''; |
| 400 | } |
| 401 | |
| 402 | $result = fread( $fh, $size ); |
| 403 | if ( $result === false ) { |
| 404 | throw new InvalidArgumentException( __METHOD__ . ': read error' ); |
| 405 | } |
| 406 | if ( strlen( $result ) < $size ) { |
| 407 | throw new InvalidArgumentException( __METHOD__ . ': unexpected end of file' ); |
| 408 | } |
| 409 | return $result; |
| 410 | } |
| 411 | } |
| 412 | |
| 413 | /** @deprecated class alias since 1.46 */ |
| 414 | class_alias( PNGMetadataExtractor::class, 'PNGMetadataExtractor' ); |