Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
53.47% |
54 / 101 |
|
22.22% |
2 / 9 |
CRAP | |
0.00% |
0 / 1 |
BitmapMetadataHandler | |
53.47% |
54 / 101 |
|
22.22% |
2 / 9 |
280.17 | |
0.00% |
0 / 1 |
doApp13 | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
getExif | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 | |||
addMetadata | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
getMetadataArray | |
71.43% |
10 / 14 |
|
0.00% |
0 / 1 |
12.33 | |||
Jpeg | |
95.00% |
19 / 20 |
|
0.00% |
0 / 1 |
11 | |||
PNG | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
5 | |||
GIF | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
30 | |||
Tiff | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
20 | |||
getTiffByteOrder | |
60.00% |
6 / 10 |
|
0.00% |
0 / 1 |
6.60 |
1 | <?php |
2 | /** |
3 | * Extraction of metadata from different bitmap image types. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Media |
22 | */ |
23 | |
24 | use MediaWiki\Logger\LoggerFactory; |
25 | use MediaWiki\MainConfigNames; |
26 | use MediaWiki\MediaWikiServices; |
27 | use Wikimedia\XMPReader\Reader as XMPReader; |
28 | |
29 | /** |
30 | * Class to deal with reconciling and extracting metadata from bitmap images. |
31 | * This is meant to comply with http://www.metadataworkinggroup.org/pdf/mwg_guidance.pdf |
32 | * |
33 | * This sort of acts as an intermediary between MediaHandler::getMetadata |
34 | * and the various metadata extractors. |
35 | * |
36 | * @todo Other image formats. |
37 | * @newable |
38 | * @note marked as newable in 1.35 for lack of a better alternative, |
39 | * but should become a stateless service, or a handler managed |
40 | * registry for metadata handlers for different file types. |
41 | * @ingroup Media |
42 | */ |
43 | class BitmapMetadataHandler { |
44 | /** @var array */ |
45 | private $metadata = []; |
46 | |
47 | /** @var array Metadata priority */ |
48 | private $metaPriority = [ |
49 | 20 => [ 'other' ], |
50 | 40 => [ 'native' ], |
51 | 60 => [ 'iptc-good-hash', 'iptc-no-hash' ], |
52 | 70 => [ 'xmp-deprecated' ], |
53 | 80 => [ 'xmp-general' ], |
54 | 90 => [ 'xmp-exif' ], |
55 | 100 => [ 'iptc-bad-hash' ], |
56 | 120 => [ 'exif' ], |
57 | ]; |
58 | |
59 | /** @var string */ |
60 | private $iptcType = 'iptc-no-hash'; |
61 | |
62 | /** |
63 | * This does the photoshop image resource app13 block |
64 | * of interest, IPTC-IIM metadata is stored here. |
65 | * |
66 | * Mostly just calls doPSIR and doIPTC |
67 | * |
68 | * @param string $app13 String containing app13 block from jpeg file |
69 | */ |
70 | private function doApp13( $app13 ) { |
71 | try { |
72 | $this->iptcType = JpegMetadataExtractor::doPSIR( $app13 ); |
73 | } catch ( InvalidPSIRException $e ) { |
74 | // Error reading the iptc hash information. |
75 | // This probably means the App13 segment is something other than what we expect. |
76 | // However, still try to read it, and treat it as if the hash didn't exist. |
77 | wfDebug( "Error parsing iptc data of file: " . $e->getMessage() ); |
78 | $this->iptcType = 'iptc-no-hash'; |
79 | } |
80 | |
81 | $iptc = IPTC::parse( $app13 ); |
82 | $this->addMetadata( $iptc, $this->iptcType ); |
83 | } |
84 | |
85 | /** |
86 | * Get exif info using exif class. |
87 | * Basically what used to be in BitmapHandler::getMetadata(). |
88 | * Just calls stuff in the Exif class. |
89 | * |
90 | * Parameters are passed to the Exif class. |
91 | * |
92 | * @param string $filename |
93 | * @param string $byteOrder |
94 | */ |
95 | public function getExif( $filename, $byteOrder ) { |
96 | $showEXIF = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::ShowEXIF ); |
97 | if ( file_exists( $filename ) && $showEXIF ) { |
98 | $exif = new Exif( $filename, $byteOrder ); |
99 | $data = $exif->getFilteredData(); |
100 | if ( $data ) { |
101 | $this->addMetadata( $data, 'exif' ); |
102 | } |
103 | } |
104 | } |
105 | |
106 | /** Add misc metadata. Warning: atm if the metadata category |
107 | * doesn't have a priority, it will be silently discarded. |
108 | * |
109 | * @param array $metaArray Array of metadata values |
110 | * @param string $type Type. defaults to other. if two things have the same type they're merged |
111 | */ |
112 | public function addMetadata( $metaArray, $type = 'other' ) { |
113 | if ( isset( $this->metadata[$type] ) ) { |
114 | /* merge with old data */ |
115 | $metaArray += $this->metadata[$type]; |
116 | } |
117 | |
118 | $this->metadata[$type] = $metaArray; |
119 | } |
120 | |
121 | /** |
122 | * Merge together the various types of metadata |
123 | * the different types have different priorities, |
124 | * and are merged in order. |
125 | * |
126 | * This function is generally called by the media handlers' getMetadata() |
127 | * |
128 | * @return array |
129 | */ |
130 | public function getMetadataArray() { |
131 | // this seems a bit ugly... This is all so its merged in right order |
132 | // based on the MWG recommendation. |
133 | $temp = []; |
134 | krsort( $this->metaPriority ); |
135 | foreach ( $this->metaPriority as $pri ) { |
136 | foreach ( $pri as $type ) { |
137 | if ( isset( $this->metadata[$type] ) ) { |
138 | // Do some special casing for multilingual values. |
139 | // Don't discard translations if also as a simple value. |
140 | foreach ( $this->metadata[$type] as $itemName => $item ) { |
141 | if ( is_array( $item ) && isset( $item['_type'] ) && $item['_type'] === 'lang' && |
142 | isset( $temp[$itemName] ) && !is_array( $temp[$itemName] ) |
143 | ) { |
144 | $default = $temp[$itemName]; |
145 | $temp[$itemName] = $item; |
146 | $temp[$itemName]['x-default'] = $default; |
147 | unset( $this->metadata[$type][$itemName] ); |
148 | } |
149 | } |
150 | |
151 | $temp += $this->metadata[$type]; |
152 | } |
153 | } |
154 | } |
155 | |
156 | return $temp; |
157 | } |
158 | |
159 | /** Main entry point for jpeg's. |
160 | * |
161 | * @param string $filename Filename (with full path) |
162 | * @return array Metadata result array. |
163 | * @throws InvalidJpegException |
164 | */ |
165 | public static function Jpeg( $filename ) { |
166 | $showXMP = XMPReader::isSupported(); |
167 | $meta = new self(); |
168 | |
169 | $seg = JpegMetadataExtractor::segmentSplitter( $filename ); |
170 | |
171 | if ( isset( $seg['SOF'] ) ) { |
172 | $meta->addMetadata( [ 'SOF' => $seg['SOF'] ] ); |
173 | } |
174 | if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) { |
175 | $meta->addMetadata( [ 'JPEGFileComment' => $seg['COM'] ], 'native' ); |
176 | } |
177 | if ( isset( $seg['PSIR'] ) && count( $seg['PSIR'] ) > 0 ) { |
178 | foreach ( $seg['PSIR'] as $curPSIRValue ) { |
179 | $meta->doApp13( $curPSIRValue ); |
180 | } |
181 | } |
182 | if ( isset( $seg['XMP'] ) && $showXMP ) { |
183 | $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename ); |
184 | $xmp->parse( $seg['XMP'] ); |
185 | foreach ( $seg['XMP_ext'] as $xmpExt ) { |
186 | /* Support for extended xmp in jpeg files |
187 | * is not well tested and a bit fragile. |
188 | */ |
189 | $xmp->parseExtended( $xmpExt ); |
190 | } |
191 | $res = $xmp->getResults(); |
192 | foreach ( $res as $type => $array ) { |
193 | $meta->addMetadata( $array, $type ); |
194 | } |
195 | } |
196 | |
197 | $meta->getExif( $filename, $seg['byteOrder'] ?? 'BE' ); |
198 | |
199 | return $meta->getMetadataArray(); |
200 | } |
201 | |
202 | /** Entry point for png |
203 | * At some point in the future this might |
204 | * merge the png various tEXt chunks to that |
205 | * are interesting, but for now it only does XMP |
206 | * |
207 | * @param string $filename Full path to file |
208 | * @return array Array for storage in img_metadata. |
209 | */ |
210 | public static function PNG( $filename ) { |
211 | $showXMP = XMPReader::isSupported(); |
212 | |
213 | $meta = new self(); |
214 | $array = PNGMetadataExtractor::getMetadata( $filename ); |
215 | if ( isset( $array['text']['xmp']['x-default'] ) |
216 | && $array['text']['xmp']['x-default'] !== '' && $showXMP |
217 | ) { |
218 | $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename ); |
219 | $xmp->parse( $array['text']['xmp']['x-default'] ); |
220 | $xmpRes = $xmp->getResults(); |
221 | foreach ( $xmpRes as $type => $xmpSection ) { |
222 | $meta->addMetadata( $xmpSection, $type ); |
223 | } |
224 | } |
225 | unset( $array['text']['xmp'] ); |
226 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset xmp is not alone in text, makes text always set |
227 | $meta->addMetadata( $array['text'], 'native' ); |
228 | unset( $array['text'] ); |
229 | $array['metadata'] = $meta->getMetadataArray(); |
230 | $array['metadata']['_MW_PNG_VERSION'] = PNGMetadataExtractor::VERSION; |
231 | |
232 | return $array; |
233 | } |
234 | |
235 | /** function for gif images. |
236 | * |
237 | * They don't really have native metadata, so just merges together |
238 | * XMP and image comment. |
239 | * |
240 | * @param string $filename Full path to file |
241 | * @return array Metadata array |
242 | */ |
243 | public static function GIF( $filename ) { |
244 | $meta = new self(); |
245 | $baseArray = GIFMetadataExtractor::getMetadata( $filename ); |
246 | |
247 | if ( count( $baseArray['comment'] ) > 0 ) { |
248 | $meta->addMetadata( [ 'GIFFileComment' => $baseArray['comment'] ], 'native' ); |
249 | } |
250 | |
251 | if ( $baseArray['xmp'] !== '' && XMPReader::isSupported() ) { |
252 | $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename ); |
253 | $xmp->parse( $baseArray['xmp'] ); |
254 | $xmpRes = $xmp->getResults(); |
255 | foreach ( $xmpRes as $type => $xmpSection ) { |
256 | $meta->addMetadata( $xmpSection, $type ); |
257 | } |
258 | } |
259 | |
260 | unset( $baseArray['comment'] ); |
261 | unset( $baseArray['xmp'] ); |
262 | |
263 | $baseArray['metadata'] = $meta->getMetadataArray(); |
264 | $baseArray['metadata']['_MW_GIF_VERSION'] = GIFMetadataExtractor::VERSION; |
265 | |
266 | return $baseArray; |
267 | } |
268 | |
269 | /** |
270 | * This doesn't do much yet, but eventually I plan to add |
271 | * XMP support for Tiff. (PHP's exif support already extracts |
272 | * but needs some further processing because PHP's exif support |
273 | * is stupid...) |
274 | * |
275 | * @todo Add XMP support, so this function actually makes sense to put here. |
276 | * |
277 | * The various exceptions this throws are caught later. |
278 | * @param string $filename |
279 | * @throws InvalidTiffException |
280 | * @return array The metadata. |
281 | */ |
282 | public static function Tiff( $filename ) { |
283 | if ( file_exists( $filename ) ) { |
284 | $byteOrder = self::getTiffByteOrder( $filename ); |
285 | if ( !$byteOrder ) { |
286 | throw new InvalidTiffException( "Error determining byte order of $filename" ); |
287 | } |
288 | $exif = new Exif( $filename, $byteOrder ); |
289 | $data = $exif->getFilteredData(); |
290 | if ( $data ) { |
291 | $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); |
292 | |
293 | return $data; |
294 | } else { |
295 | throw new InvalidTiffException( "Could not extract data from tiff file $filename" ); |
296 | } |
297 | } else { |
298 | throw new InvalidTiffException( "File doesn't exist - $filename" ); |
299 | } |
300 | } |
301 | |
302 | /** |
303 | * Read the first 2 bytes of a tiff file to figure out |
304 | * Little Endian or Big Endian. Needed for exif stuff. |
305 | * |
306 | * @param string $filename |
307 | * @return string|false 'BE' or 'LE' or false |
308 | */ |
309 | public static function getTiffByteOrder( $filename ) { |
310 | $fh = fopen( $filename, 'rb' ); |
311 | if ( !$fh ) { |
312 | return false; |
313 | } |
314 | $head = fread( $fh, 2 ); |
315 | fclose( $fh ); |
316 | |
317 | switch ( $head ) { |
318 | case 'II': |
319 | return 'LE'; // II for intel. |
320 | case 'MM': |
321 | return 'BE'; // MM for motorla. |
322 | default: |
323 | return false; // Something went wrong. |
324 | |
325 | } |
326 | } |
327 | } |