Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
52.94% |
63 / 119 |
|
22.22% |
2 / 9 |
CRAP | |
0.00% |
0 / 1 |
| BitmapMetadataHandler | |
52.94% |
63 / 119 |
|
22.22% |
2 / 9 |
299.22 | |
0.00% |
0 / 1 |
| doApp13 | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
| getExif | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 | |||
| addMetadata | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
| getMetadataArray | |
71.43% |
10 / 14 |
|
0.00% |
0 / 1 |
12.33 | |||
| Jpeg | |
95.00% |
19 / 20 |
|
0.00% |
0 / 1 |
11 | |||
| PNG | |
100.00% |
25 / 25 |
|
100.00% |
1 / 1 |
6 | |||
| GIF | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
30 | |||
| Tiff | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
20 | |||
| getTiffByteOrder | |
60.00% |
6 / 10 |
|
0.00% |
0 / 1 |
6.60 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Extraction of metadata from different bitmap image types. |
| 4 | * |
| 5 | * @license GPL-2.0-or-later |
| 6 | * @file |
| 7 | * @ingroup Media |
| 8 | */ |
| 9 | |
| 10 | use MediaWiki\Logger\LoggerFactory; |
| 11 | use MediaWiki\MainConfigNames; |
| 12 | use MediaWiki\MediaWikiServices; |
| 13 | use Wikimedia\XMPReader\Reader as XMPReader; |
| 14 | |
| 15 | /** |
| 16 | * Class to deal with reconciling and extracting metadata from bitmap images. |
| 17 | * This is meant to comply with http://www.metadataworkinggroup.org/pdf/mwg_guidance.pdf |
| 18 | * |
| 19 | * This sort of acts as an intermediary between MediaHandler::getMetadata |
| 20 | * and the various metadata extractors. |
| 21 | * |
| 22 | * @todo Other image formats. |
| 23 | * @newable |
| 24 | * @note marked as newable in 1.35 for lack of a better alternative, |
| 25 | * but should become a stateless service, or a handler managed |
| 26 | * registry for metadata handlers for different file types. |
| 27 | * @ingroup Media |
| 28 | */ |
| 29 | class BitmapMetadataHandler { |
| 30 | /** @var array */ |
| 31 | private $metadata = []; |
| 32 | |
| 33 | /** @var array Metadata priority */ |
| 34 | private $metaPriority = [ |
| 35 | 20 => [ 'other' ], |
| 36 | 40 => [ 'native' ], |
| 37 | 60 => [ 'iptc-good-hash', 'iptc-no-hash' ], |
| 38 | 70 => [ 'xmp-deprecated' ], |
| 39 | 80 => [ 'xmp-general' ], |
| 40 | 90 => [ 'xmp-exif' ], |
| 41 | 100 => [ 'iptc-bad-hash' ], |
| 42 | 120 => [ 'exif' ], |
| 43 | ]; |
| 44 | |
| 45 | /** @var string */ |
| 46 | private $iptcType = 'iptc-no-hash'; |
| 47 | |
| 48 | /** |
| 49 | * This does the photoshop image resource app13 block |
| 50 | * of interest, IPTC-IIM metadata is stored here. |
| 51 | * |
| 52 | * Mostly just calls doPSIR and doIPTC |
| 53 | * |
| 54 | * @param string $app13 String containing app13 block from jpeg file |
| 55 | */ |
| 56 | private function doApp13( $app13 ) { |
| 57 | try { |
| 58 | $this->iptcType = JpegMetadataExtractor::doPSIR( $app13 ); |
| 59 | } catch ( InvalidPSIRException $e ) { |
| 60 | // Error reading the iptc hash information. |
| 61 | // This probably means the App13 segment is something other than what we expect. |
| 62 | // However, still try to read it, and treat it as if the hash didn't exist. |
| 63 | wfDebug( "Error parsing iptc data of file: " . $e->getMessage() ); |
| 64 | $this->iptcType = 'iptc-no-hash'; |
| 65 | } |
| 66 | |
| 67 | $iptc = IPTC::parse( $app13 ); |
| 68 | $this->addMetadata( $iptc, $this->iptcType ); |
| 69 | } |
| 70 | |
| 71 | /** |
| 72 | * Get exif info using exif class. |
| 73 | * Basically what used to be in BitmapHandler::getMetadata(). |
| 74 | * Just calls stuff in the Exif class. |
| 75 | * |
| 76 | * Parameters are passed to the Exif class. |
| 77 | * |
| 78 | * @param string $filename |
| 79 | * @param string $byteOrder |
| 80 | */ |
| 81 | public function getExif( $filename, $byteOrder ) { |
| 82 | $showEXIF = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::ShowEXIF ); |
| 83 | if ( file_exists( $filename ) && $showEXIF ) { |
| 84 | $exif = new Exif( $filename, $byteOrder ); |
| 85 | $data = $exif->getFilteredData(); |
| 86 | if ( $data ) { |
| 87 | $this->addMetadata( $data, 'exif' ); |
| 88 | } |
| 89 | } |
| 90 | } |
| 91 | |
| 92 | /** Add misc metadata. Warning: atm if the metadata category |
| 93 | * doesn't have a priority, it will be silently discarded. |
| 94 | * |
| 95 | * @param array $metaArray Array of metadata values |
| 96 | * @param string $type Type. defaults to other. if two things have the same type they're merged |
| 97 | */ |
| 98 | public function addMetadata( $metaArray, $type = 'other' ) { |
| 99 | if ( isset( $this->metadata[$type] ) ) { |
| 100 | /* merge with old data */ |
| 101 | $metaArray += $this->metadata[$type]; |
| 102 | } |
| 103 | |
| 104 | $this->metadata[$type] = $metaArray; |
| 105 | } |
| 106 | |
| 107 | /** |
| 108 | * Merge together the various types of metadata |
| 109 | * the different types have different priorities, |
| 110 | * and are merged in order. |
| 111 | * |
| 112 | * This function is generally called by the media handlers' getMetadata() |
| 113 | * |
| 114 | * @return array |
| 115 | */ |
| 116 | public function getMetadataArray() { |
| 117 | // this seems a bit ugly... This is all so its merged in right order |
| 118 | // based on the MWG recommendation. |
| 119 | $temp = []; |
| 120 | krsort( $this->metaPriority ); |
| 121 | foreach ( $this->metaPriority as $pri ) { |
| 122 | foreach ( $pri as $type ) { |
| 123 | if ( isset( $this->metadata[$type] ) ) { |
| 124 | // Do some special casing for multilingual values. |
| 125 | // Don't discard translations if also as a simple value. |
| 126 | foreach ( $this->metadata[$type] as $itemName => $item ) { |
| 127 | if ( is_array( $item ) && isset( $item['_type'] ) && $item['_type'] === 'lang' && |
| 128 | isset( $temp[$itemName] ) && !is_array( $temp[$itemName] ) |
| 129 | ) { |
| 130 | $default = $temp[$itemName]; |
| 131 | $temp[$itemName] = $item; |
| 132 | $temp[$itemName]['x-default'] = $default; |
| 133 | unset( $this->metadata[$type][$itemName] ); |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | $temp += $this->metadata[$type]; |
| 138 | } |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | return $temp; |
| 143 | } |
| 144 | |
| 145 | /** Main entry point for jpeg's. |
| 146 | * |
| 147 | * @param string $filename Filename (with full path) |
| 148 | * @return array Metadata result array. |
| 149 | * @throws InvalidJpegException |
| 150 | */ |
| 151 | public static function Jpeg( $filename ) { |
| 152 | $showXMP = XMPReader::isSupported(); |
| 153 | $meta = new self(); |
| 154 | |
| 155 | $seg = JpegMetadataExtractor::segmentSplitter( $filename ); |
| 156 | |
| 157 | if ( isset( $seg['SOF'] ) ) { |
| 158 | $meta->addMetadata( [ 'SOF' => $seg['SOF'] ] ); |
| 159 | } |
| 160 | if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) { |
| 161 | $meta->addMetadata( [ 'JPEGFileComment' => $seg['COM'] ], 'native' ); |
| 162 | } |
| 163 | if ( isset( $seg['PSIR'] ) && count( $seg['PSIR'] ) > 0 ) { |
| 164 | foreach ( $seg['PSIR'] as $curPSIRValue ) { |
| 165 | $meta->doApp13( $curPSIRValue ); |
| 166 | } |
| 167 | } |
| 168 | if ( isset( $seg['XMP'] ) && $showXMP ) { |
| 169 | $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename ); |
| 170 | $xmp->parse( $seg['XMP'] ); |
| 171 | foreach ( $seg['XMP_ext'] as $xmpExt ) { |
| 172 | /* Support for extended xmp in jpeg files |
| 173 | * is not well tested and a bit fragile. |
| 174 | */ |
| 175 | $xmp->parseExtended( $xmpExt ); |
| 176 | } |
| 177 | $res = $xmp->getResults(); |
| 178 | foreach ( $res as $type => $array ) { |
| 179 | $meta->addMetadata( $array, $type ); |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | $meta->getExif( $filename, $seg['byteOrder'] ?? 'BE' ); |
| 184 | |
| 185 | return $meta->getMetadataArray(); |
| 186 | } |
| 187 | |
| 188 | /** Entry point for png |
| 189 | * At some point in the future this might |
| 190 | * merge the png various tEXt chunks to that |
| 191 | * are interesting, but for now it only does XMP |
| 192 | * |
| 193 | * @param string $filename Full path to file |
| 194 | * @return array Array for storage in img_metadata. |
| 195 | */ |
| 196 | public static function PNG( $filename ) { |
| 197 | $showXMP = XMPReader::isSupported(); |
| 198 | |
| 199 | $meta = new self(); |
| 200 | $array = PNGMetadataExtractor::getMetadata( $filename ); |
| 201 | if ( isset( $array['text']['xmp']['x-default'] ) |
| 202 | && $array['text']['xmp']['x-default'] !== '' && $showXMP |
| 203 | ) { |
| 204 | $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename ); |
| 205 | $xmp->parse( $array['text']['xmp']['x-default'] ); |
| 206 | $xmpRes = $xmp->getResults(); |
| 207 | foreach ( $xmpRes as $type => $xmpSection ) { |
| 208 | $meta->addMetadata( $xmpSection, $type ); |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | if ( $array['exif'] ) { |
| 213 | // The Exif section is essentially an embedded tiff file, |
| 214 | // so just extract it and read it. |
| 215 | $tmpFile = MediaWikiServices::getInstance()-> |
| 216 | getTempFSFileFactory()-> |
| 217 | newTempFSFile( 'png-exif_', 'tiff' ); |
| 218 | $exifDataFile = $tmpFile->getPath(); |
| 219 | file_put_contents( $exifDataFile, $array['exif'] ); |
| 220 | $byteOrder = self::getTiffByteOrder( $exifDataFile ); |
| 221 | $meta->getExif( $exifDataFile, $byteOrder ); |
| 222 | } |
| 223 | unset( $array['exif'] ); |
| 224 | unset( $array['text']['xmp'] ); |
| 225 | $meta->addMetadata( $array['text'], 'native' ); |
| 226 | unset( $array['text'] ); |
| 227 | $array['metadata'] = $meta->getMetadataArray(); |
| 228 | $array['metadata']['_MW_PNG_VERSION'] = PNGMetadataExtractor::VERSION; |
| 229 | |
| 230 | return $array; |
| 231 | } |
| 232 | |
| 233 | /** function for gif images. |
| 234 | * |
| 235 | * They don't really have native metadata, so just merges together |
| 236 | * XMP and image comment. |
| 237 | * |
| 238 | * @param string $filename Full path to file |
| 239 | * @return array Metadata array |
| 240 | */ |
| 241 | public static function GIF( $filename ) { |
| 242 | $meta = new self(); |
| 243 | $baseArray = GIFMetadataExtractor::getMetadata( $filename ); |
| 244 | |
| 245 | if ( count( $baseArray['comment'] ) > 0 ) { |
| 246 | $meta->addMetadata( [ 'GIFFileComment' => $baseArray['comment'] ], 'native' ); |
| 247 | } |
| 248 | |
| 249 | if ( $baseArray['xmp'] !== '' && XMPReader::isSupported() ) { |
| 250 | $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename ); |
| 251 | $xmp->parse( $baseArray['xmp'] ); |
| 252 | $xmpRes = $xmp->getResults(); |
| 253 | foreach ( $xmpRes as $type => $xmpSection ) { |
| 254 | $meta->addMetadata( $xmpSection, $type ); |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | unset( $baseArray['comment'] ); |
| 259 | unset( $baseArray['xmp'] ); |
| 260 | |
| 261 | $baseArray['metadata'] = $meta->getMetadataArray(); |
| 262 | $baseArray['metadata']['_MW_GIF_VERSION'] = GIFMetadataExtractor::VERSION; |
| 263 | |
| 264 | return $baseArray; |
| 265 | } |
| 266 | |
| 267 | /** |
| 268 | * This doesn't do much yet, but eventually I plan to add |
| 269 | * XMP support for Tiff. (PHP's exif support already extracts |
| 270 | * but needs some further processing because PHP's exif support |
| 271 | * is stupid...) |
| 272 | * |
| 273 | * @todo Add XMP support, so this function actually makes sense to put here. |
| 274 | * |
| 275 | * The various exceptions this throws are caught later. |
| 276 | * @param string $filename |
| 277 | * @throws InvalidTiffException |
| 278 | * @return array The metadata. |
| 279 | */ |
| 280 | public static function Tiff( $filename ) { |
| 281 | if ( file_exists( $filename ) ) { |
| 282 | $byteOrder = self::getTiffByteOrder( $filename ); |
| 283 | if ( !$byteOrder ) { |
| 284 | throw new InvalidTiffException( |
| 285 | 'Error determining byte order of {filename}', |
| 286 | [ 'filename' => $filename ] |
| 287 | ); |
| 288 | } |
| 289 | $exif = new Exif( $filename, $byteOrder ); |
| 290 | $data = $exif->getFilteredData(); |
| 291 | if ( $data ) { |
| 292 | $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); |
| 293 | |
| 294 | return $data; |
| 295 | } else { |
| 296 | throw new InvalidTiffException( |
| 297 | 'Could not extract data from tiff file {filename}', |
| 298 | [ 'filename' => $filename ] |
| 299 | ); |
| 300 | } |
| 301 | } else { |
| 302 | throw new InvalidTiffException( |
| 303 | "File {filename} doesn't exist", |
| 304 | [ 'filename' => $filename ] |
| 305 | ); |
| 306 | } |
| 307 | } |
| 308 | |
| 309 | /** |
| 310 | * Read the first 2 bytes of a tiff file to figure out |
| 311 | * Little Endian or Big Endian. Needed for exif stuff. |
| 312 | * |
| 313 | * @param string $filename |
| 314 | * @return string|false 'BE' or 'LE' or false |
| 315 | */ |
| 316 | public static function getTiffByteOrder( $filename ) { |
| 317 | $fh = fopen( $filename, 'rb' ); |
| 318 | if ( !$fh ) { |
| 319 | return false; |
| 320 | } |
| 321 | $head = fread( $fh, 2 ); |
| 322 | fclose( $fh ); |
| 323 | |
| 324 | switch ( $head ) { |
| 325 | case 'II': |
| 326 | return 'LE'; // II for intel. |
| 327 | case 'MM': |
| 328 | return 'BE'; // MM for motorla. |
| 329 | default: |
| 330 | return false; // Something went wrong. |
| 331 | |
| 332 | } |
| 333 | } |
| 334 | } |