Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
67.50% |
81 / 120 |
|
55.56% |
5 / 9 |
CRAP | |
0.00% |
0 / 1 |
| BitmapMetadataHandler | |
68.07% |
81 / 119 |
|
55.56% |
5 / 9 |
127.18 | |
0.00% |
0 / 1 |
| doApp13 | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
| getExif | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
4 | |||
| addMetadata | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
| getMetadataArray | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
10 | |||
| Jpeg | |
95.00% |
19 / 20 |
|
0.00% |
0 / 1 |
11 | |||
| PNG | |
100.00% |
25 / 25 |
|
100.00% |
1 / 1 |
6 | |||
| GIF | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
30 | |||
| Tiff | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
20 | |||
| getTiffByteOrder | |
80.00% |
8 / 10 |
|
0.00% |
0 / 1 |
5.20 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Extraction of metadata from different bitmap image types. |
| 4 | * |
| 5 | * @license GPL-2.0-or-later |
| 6 | * @file |
| 7 | * @ingroup Media |
| 8 | */ |
| 9 | |
| 10 | namespace MediaWiki\Media; |
| 11 | |
| 12 | use MediaWiki\Logger\LoggerFactory; |
| 13 | use MediaWiki\MainConfigNames; |
| 14 | use MediaWiki\MediaWikiServices; |
| 15 | use Wikimedia\XMPReader\Reader as XMPReader; |
| 16 | |
| 17 | /** |
| 18 | * Class to deal with reconciling and extracting metadata from bitmap images. |
| 19 | * This is meant to comply with http://www.metadataworkinggroup.org/pdf/mwg_guidance.pdf |
| 20 | * |
| 21 | * This sort of acts as an intermediary between MediaHandler::getMetadata |
| 22 | * and the various metadata extractors. |
| 23 | * |
| 24 | * @todo Other image formats. |
| 25 | * @newable |
| 26 | * @note marked as newable in 1.35 for lack of a better alternative, |
| 27 | * but should become a stateless service, or a handler managed |
| 28 | * registry for metadata handlers for different file types. |
| 29 | * @ingroup Media |
| 30 | */ |
| 31 | class BitmapMetadataHandler { |
| 32 | /** @var array */ |
| 33 | private $metadata = []; |
| 34 | |
| 35 | /** @var array Metadata priority */ |
| 36 | private $metaPriority = [ |
| 37 | 20 => [ 'other' ], |
| 38 | 40 => [ 'native' ], |
| 39 | 60 => [ 'iptc-good-hash', 'iptc-no-hash' ], |
| 40 | 70 => [ 'xmp-deprecated' ], |
| 41 | 80 => [ 'xmp-general' ], |
| 42 | 90 => [ 'xmp-exif' ], |
| 43 | 100 => [ 'iptc-bad-hash' ], |
| 44 | 120 => [ 'exif' ], |
| 45 | ]; |
| 46 | |
| 47 | /** @var string */ |
| 48 | private $iptcType = 'iptc-no-hash'; |
| 49 | |
| 50 | /** |
| 51 | * This does the photoshop image resource app13 block |
| 52 | * of interest, IPTC-IIM metadata is stored here. |
| 53 | * |
| 54 | * Mostly just calls doPSIR and doIPTC |
| 55 | * |
| 56 | * @param string $app13 String containing app13 block from jpeg file |
| 57 | */ |
| 58 | private function doApp13( $app13 ) { |
| 59 | try { |
| 60 | $this->iptcType = JpegMetadataExtractor::doPSIR( $app13 ); |
| 61 | } catch ( InvalidPSIRException $e ) { |
| 62 | // Error reading the iptc hash information. |
| 63 | // This probably means the App13 segment is something other than what we expect. |
| 64 | // However, still try to read it, and treat it as if the hash didn't exist. |
| 65 | wfDebug( "Error parsing iptc data of file: " . $e->getMessage() ); |
| 66 | $this->iptcType = 'iptc-no-hash'; |
| 67 | } |
| 68 | |
| 69 | $iptc = IPTC::parse( $app13 ); |
| 70 | $this->addMetadata( $iptc, $this->iptcType ); |
| 71 | } |
| 72 | |
| 73 | /** |
| 74 | * Get exif info using exif class. |
| 75 | * Basically what used to be in BitmapHandler::getMetadata(). |
| 76 | * Just calls stuff in the Exif class. |
| 77 | * |
| 78 | * Parameters are passed to the Exif class. |
| 79 | * |
| 80 | * @param string $filename |
| 81 | * @param string $byteOrder |
| 82 | */ |
| 83 | public function getExif( $filename, $byteOrder ) { |
| 84 | $showEXIF = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::ShowEXIF ); |
| 85 | if ( file_exists( $filename ) && $showEXIF ) { |
| 86 | $exif = new Exif( $filename, $byteOrder ); |
| 87 | $data = $exif->getFilteredData(); |
| 88 | if ( $data ) { |
| 89 | $this->addMetadata( $data, 'exif' ); |
| 90 | } |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | /** Add misc metadata. Warning: atm if the metadata category |
| 95 | * doesn't have a priority, it will be silently discarded. |
| 96 | * |
| 97 | * @param array $metaArray Array of metadata values |
| 98 | * @param string $type Type. defaults to other. if two things have the same type they're merged |
| 99 | */ |
| 100 | public function addMetadata( $metaArray, $type = 'other' ) { |
| 101 | if ( isset( $this->metadata[$type] ) ) { |
| 102 | /* merge with old data */ |
| 103 | $metaArray += $this->metadata[$type]; |
| 104 | } |
| 105 | |
| 106 | $this->metadata[$type] = $metaArray; |
| 107 | } |
| 108 | |
| 109 | /** |
| 110 | * Merge together the various types of metadata |
| 111 | * the different types have different priorities, |
| 112 | * and are merged in order. |
| 113 | * |
| 114 | * This function is generally called by the media handlers' getMetadata() |
| 115 | * |
| 116 | * @return array |
| 117 | */ |
| 118 | public function getMetadataArray() { |
| 119 | // this seems a bit ugly... This is all so its merged in right order |
| 120 | // based on the MWG recommendation. |
| 121 | $temp = []; |
| 122 | krsort( $this->metaPriority ); |
| 123 | foreach ( $this->metaPriority as $pri ) { |
| 124 | foreach ( $pri as $type ) { |
| 125 | if ( isset( $this->metadata[$type] ) ) { |
| 126 | // Do some special casing for multilingual values. |
| 127 | // Don't discard translations if also as a simple value. |
| 128 | foreach ( $this->metadata[$type] as $itemName => $item ) { |
| 129 | if ( is_array( $item ) && isset( $item['_type'] ) && $item['_type'] === 'lang' && |
| 130 | isset( $temp[$itemName] ) && !is_array( $temp[$itemName] ) |
| 131 | ) { |
| 132 | $default = $temp[$itemName]; |
| 133 | $temp[$itemName] = $item; |
| 134 | $temp[$itemName]['x-default'] = $default; |
| 135 | unset( $this->metadata[$type][$itemName] ); |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | $temp += $this->metadata[$type]; |
| 140 | } |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | return $temp; |
| 145 | } |
| 146 | |
| 147 | /** Main entry point for jpeg's. |
| 148 | * |
| 149 | * @param string $filename Filename (with full path) |
| 150 | * @return array Metadata result array. |
| 151 | * @throws InvalidJpegException |
| 152 | */ |
| 153 | public static function Jpeg( $filename ) { |
| 154 | $showXMP = XMPReader::isSupported(); |
| 155 | $meta = new self(); |
| 156 | |
| 157 | $seg = JpegMetadataExtractor::segmentSplitter( $filename ); |
| 158 | |
| 159 | if ( isset( $seg['SOF'] ) ) { |
| 160 | $meta->addMetadata( [ 'SOF' => $seg['SOF'] ] ); |
| 161 | } |
| 162 | if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) { |
| 163 | $meta->addMetadata( [ 'JPEGFileComment' => $seg['COM'] ], 'native' ); |
| 164 | } |
| 165 | if ( isset( $seg['PSIR'] ) && count( $seg['PSIR'] ) > 0 ) { |
| 166 | foreach ( $seg['PSIR'] as $curPSIRValue ) { |
| 167 | $meta->doApp13( $curPSIRValue ); |
| 168 | } |
| 169 | } |
| 170 | if ( isset( $seg['XMP'] ) && $showXMP ) { |
| 171 | $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename ); |
| 172 | $xmp->parse( $seg['XMP'] ); |
| 173 | foreach ( $seg['XMP_ext'] as $xmpExt ) { |
| 174 | /* Support for extended xmp in jpeg files |
| 175 | * is not well tested and a bit fragile. |
| 176 | */ |
| 177 | $xmp->parseExtended( $xmpExt ); |
| 178 | } |
| 179 | $res = $xmp->getResults(); |
| 180 | foreach ( $res as $type => $array ) { |
| 181 | $meta->addMetadata( $array, $type ); |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | $meta->getExif( $filename, $seg['byteOrder'] ?? 'BE' ); |
| 186 | |
| 187 | return $meta->getMetadataArray(); |
| 188 | } |
| 189 | |
| 190 | /** Entry point for png |
| 191 | * At some point in the future this might |
| 192 | * merge the png various tEXt chunks to that |
| 193 | * are interesting, but for now it only does XMP |
| 194 | * |
| 195 | * @param string $filename Full path to file |
| 196 | * @return array Array for storage in img_metadata. |
| 197 | */ |
| 198 | public static function PNG( $filename ) { |
| 199 | $showXMP = XMPReader::isSupported(); |
| 200 | |
| 201 | $meta = new self(); |
| 202 | $array = PNGMetadataExtractor::getMetadata( $filename ); |
| 203 | if ( isset( $array['text']['xmp']['x-default'] ) |
| 204 | && $array['text']['xmp']['x-default'] !== '' && $showXMP |
| 205 | ) { |
| 206 | $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename ); |
| 207 | $xmp->parse( $array['text']['xmp']['x-default'] ); |
| 208 | $xmpRes = $xmp->getResults(); |
| 209 | foreach ( $xmpRes as $type => $xmpSection ) { |
| 210 | $meta->addMetadata( $xmpSection, $type ); |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | if ( $array['exif'] ) { |
| 215 | // The Exif section is essentially an embedded tiff file, |
| 216 | // so just extract it and read it. |
| 217 | $tmpFile = MediaWikiServices::getInstance()-> |
| 218 | getTempFSFileFactory()-> |
| 219 | newTempFSFile( 'png-exif_', 'tiff' ); |
| 220 | $exifDataFile = $tmpFile->getPath(); |
| 221 | file_put_contents( $exifDataFile, $array['exif'] ); |
| 222 | $byteOrder = self::getTiffByteOrder( $exifDataFile ); |
| 223 | $meta->getExif( $exifDataFile, $byteOrder ); |
| 224 | } |
| 225 | unset( $array['exif'] ); |
| 226 | unset( $array['text']['xmp'] ); |
| 227 | $meta->addMetadata( $array['text'], 'native' ); |
| 228 | unset( $array['text'] ); |
| 229 | $array['metadata'] = $meta->getMetadataArray(); |
| 230 | $array['metadata']['_MW_PNG_VERSION'] = PNGMetadataExtractor::VERSION; |
| 231 | |
| 232 | return $array; |
| 233 | } |
| 234 | |
| 235 | /** function for gif images. |
| 236 | * |
| 237 | * They don't really have native metadata, so just merges together |
| 238 | * XMP and image comment. |
| 239 | * |
| 240 | * @param string $filename Full path to file |
| 241 | * @return array Metadata array |
| 242 | */ |
| 243 | public static function GIF( $filename ) { |
| 244 | $meta = new self(); |
| 245 | $baseArray = GIFMetadataExtractor::getMetadata( $filename ); |
| 246 | |
| 247 | if ( count( $baseArray['comment'] ) > 0 ) { |
| 248 | $meta->addMetadata( [ 'GIFFileComment' => $baseArray['comment'] ], 'native' ); |
| 249 | } |
| 250 | |
| 251 | if ( $baseArray['xmp'] !== '' && XMPReader::isSupported() ) { |
| 252 | $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename ); |
| 253 | $xmp->parse( $baseArray['xmp'] ); |
| 254 | $xmpRes = $xmp->getResults(); |
| 255 | foreach ( $xmpRes as $type => $xmpSection ) { |
| 256 | $meta->addMetadata( $xmpSection, $type ); |
| 257 | } |
| 258 | } |
| 259 | |
| 260 | unset( $baseArray['comment'] ); |
| 261 | unset( $baseArray['xmp'] ); |
| 262 | |
| 263 | $baseArray['metadata'] = $meta->getMetadataArray(); |
| 264 | $baseArray['metadata']['_MW_GIF_VERSION'] = GIFMetadataExtractor::VERSION; |
| 265 | |
| 266 | return $baseArray; |
| 267 | } |
| 268 | |
| 269 | /** |
| 270 | * This doesn't do much yet, but eventually I plan to add |
| 271 | * XMP support for Tiff. (PHP's exif support already extracts |
| 272 | * but needs some further processing because PHP's exif support |
| 273 | * is stupid...) |
| 274 | * |
| 275 | * @todo Add XMP support, so this function actually makes sense to put here. |
| 276 | * |
| 277 | * The various exceptions this throws are caught later. |
| 278 | * @param string $filename |
| 279 | * @throws InvalidTiffException |
| 280 | * @return array The metadata. |
| 281 | */ |
| 282 | public static function Tiff( $filename ) { |
| 283 | if ( file_exists( $filename ) ) { |
| 284 | $byteOrder = self::getTiffByteOrder( $filename ); |
| 285 | if ( !$byteOrder ) { |
| 286 | throw new InvalidTiffException( |
| 287 | 'Error determining byte order of {filename}', |
| 288 | [ 'filename' => $filename ] |
| 289 | ); |
| 290 | } |
| 291 | $exif = new Exif( $filename, $byteOrder ); |
| 292 | $data = $exif->getFilteredData(); |
| 293 | if ( $data ) { |
| 294 | $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); |
| 295 | |
| 296 | return $data; |
| 297 | } else { |
| 298 | throw new InvalidTiffException( |
| 299 | 'Could not extract data from tiff file {filename}', |
| 300 | [ 'filename' => $filename ] |
| 301 | ); |
| 302 | } |
| 303 | } else { |
| 304 | throw new InvalidTiffException( |
| 305 | "File {filename} doesn't exist", |
| 306 | [ 'filename' => $filename ] |
| 307 | ); |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | /** |
| 312 | * Read the first 2 bytes of a tiff file to figure out |
| 313 | * Little Endian or Big Endian. Needed for exif stuff. |
| 314 | * |
| 315 | * @param string $filename |
| 316 | * @return string|false 'BE' or 'LE' or false |
| 317 | */ |
| 318 | public static function getTiffByteOrder( $filename ) { |
| 319 | $fh = fopen( $filename, 'rb' ); |
| 320 | if ( !$fh ) { |
| 321 | return false; |
| 322 | } |
| 323 | $head = fread( $fh, 2 ); |
| 324 | fclose( $fh ); |
| 325 | |
| 326 | switch ( $head ) { |
| 327 | case 'II': |
| 328 | return 'LE'; // II for intel. |
| 329 | case 'MM': |
| 330 | return 'BE'; // MM for motorla. |
| 331 | default: |
| 332 | return false; // Something went wrong. |
| 333 | |
| 334 | } |
| 335 | } |
| 336 | } |
| 337 | |
| 338 | /** @deprecated class alias since 1.46 */ |
| 339 | class_alias( BitmapMetadataHandler::class, 'BitmapMetadataHandler' ); |