Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
70.84% |
260 / 367 |
|
15.00% |
3 / 20 |
CRAP | |
0.00% |
0 / 1 |
Exif | |
70.84% |
260 / 367 |
|
15.00% |
3 / 20 |
390.74 | |
0.00% |
0 / 1 |
__construct | |
96.50% |
138 / 143 |
|
0.00% |
0 / 1 |
5 | |||
makeFilteredData | |
91.67% |
11 / 12 |
|
0.00% |
0 / 1 |
6.02 | |||
collapseData | |
68.89% |
31 / 45 |
|
0.00% |
0 / 1 |
18.09 | |||
charCodeString | |
48.39% |
15 / 31 |
|
0.00% |
0 / 1 |
20.14 | |||
exifPropToOrd | |
50.00% |
1 / 2 |
|
0.00% |
0 / 1 |
2.50 | |||
exifGPStoNumber | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
9 | |||
getData | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFilteredData | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
version | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isByte | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
isASCII | |
44.44% |
4 / 9 |
|
0.00% |
0 / 1 |
6.74 | |||
isShort | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
isLong | |
60.00% |
3 / 5 |
|
0.00% |
0 / 1 |
6.60 | |||
isRational | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
4.59 | |||
isUndefined | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
isSlong | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
isSrational | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 | |||
validate | |
56.60% |
30 / 53 |
|
0.00% |
0 / 1 |
57.04 | |||
debug | |
15.38% |
2 / 13 |
|
0.00% |
0 / 1 |
27.81 | |||
debugFile | |
33.33% |
2 / 6 |
|
0.00% |
0 / 1 |
5.67 |
1 | <?php |
2 | /** |
3 | * Extraction and validation of image metadata. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @ingroup Media |
21 | * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com> |
22 | * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason, 2009 Brent Garber |
23 | * @license GPL-2.0-or-later |
24 | * @see http://exif.org/Exif2-2.PDF The Exif 2.2 specification |
25 | * @file |
26 | */ |
27 | |
28 | use MediaWiki\Config\ConfigException; |
29 | use Wikimedia\AtEase\AtEase; |
30 | |
31 | /** |
32 | * Class to extract and validate Exif data from jpeg (and possibly tiff) files. |
33 | * @ingroup Media |
34 | */ |
35 | class Exif { |
36 | /** An 8-bit (1-byte) unsigned integer. */ |
37 | private const BYTE = 1; |
38 | |
39 | /** An 8-bit byte containing one 7-bit ASCII code. |
40 | * The final byte is terminated with NULL. |
41 | */ |
42 | private const ASCII = 2; |
43 | |
44 | /** A 16-bit (2-byte) unsigned integer. */ |
45 | private const SHORT = 3; |
46 | |
47 | /** A 32-bit (4-byte) unsigned integer. */ |
48 | private const LONG = 4; |
49 | |
50 | /** Two LONGs. The first LONG is the numerator and the second LONG expresses |
51 | * the denominator |
52 | */ |
53 | private const RATIONAL = 5; |
54 | |
55 | /** A 16-bit (2-byte) or 32-bit (4-byte) unsigned integer. */ |
56 | private const SHORT_OR_LONG = 6; |
57 | |
58 | /** An 8-bit byte that can take any value depending on the field definition */ |
59 | private const UNDEFINED = 7; |
60 | |
61 | /** A 32-bit (4-byte) signed integer (2's complement notation), */ |
62 | private const SLONG = 9; |
63 | |
64 | /** Two SLONGs. The first SLONG is the numerator and the second SLONG is |
65 | * the denominator. |
66 | */ |
67 | private const SRATIONAL = 10; |
68 | |
69 | /** A fake value for things we don't want or don't support. */ |
70 | private const IGNORE = -1; |
71 | |
72 | /** @var array Exif tags grouped by category, the tagname itself is the key |
73 | * and the type is the value, in the case of more than one possible value |
74 | * type they are separated by commas. |
75 | */ |
76 | private $mExifTags; |
77 | |
78 | /** @var array The raw Exif data returned by exif_read_data() */ |
79 | private $mRawExifData; |
80 | |
81 | /** @var array A Filtered version of $mRawExifData that has been pruned |
82 | * of invalid tags and tags that contain content they shouldn't contain |
83 | * according to the Exif specification |
84 | */ |
85 | private $mFilteredExifData; |
86 | |
87 | /** @var string The file being processed */ |
88 | private $file; |
89 | |
90 | /** @var string The basename of the file being processed */ |
91 | private $basename; |
92 | |
93 | /** @var string|false The private log to log to, e.g. 'exif' */ |
94 | private $log = false; |
95 | |
96 | /** @var string The byte order of the file. Needed because php's extension |
97 | * doesn't fully process some obscure props. |
98 | */ |
99 | private $byteOrder; |
100 | |
101 | /** |
102 | * @param string $file Filename. |
103 | * @param string $byteOrder Type of byte ordering either 'BE' (Big Endian) |
104 | * or 'LE' (Little Endian). Default ''. |
105 | * @todo FIXME: The following are broke: |
106 | * SubjectArea. Need to test the more obscure tags. |
107 | * DigitalZoomRatio = 0/0 is rejected. need to determine if that's valid. |
108 | * Possibly should treat 0/0 = 0. need to read exif spec on that. |
109 | */ |
110 | public function __construct( $file, $byteOrder = '' ) { |
111 | /** |
112 | * Page numbers here refer to pages in the Exif 2.2 standard |
113 | * |
114 | * Note, Exif::UNDEFINED is treated as a string, not as an array of bytes |
115 | * so don't put a count parameter for any UNDEFINED values. |
116 | * |
117 | * @link http://exif.org/Exif2-2.PDF The Exif 2.2 specification |
118 | */ |
119 | $this->mExifTags = [ |
120 | # TIFF Rev. 6.0 Attribute Information (p22) |
121 | 'IFD0' => [ |
122 | # Tags relating to image structure |
123 | # Image width |
124 | 'ImageWidth' => self::SHORT_OR_LONG, |
125 | # Image height |
126 | 'ImageLength' => self::SHORT_OR_LONG, |
127 | # Number of bits per component |
128 | 'BitsPerSample' => [ self::SHORT, 3 ], |
129 | |
130 | # "When a primary image is JPEG compressed, this designation is not" |
131 | # "necessary and is omitted." (p23) |
132 | # Compression scheme #p23 |
133 | 'Compression' => self::SHORT, |
134 | # Pixel composition #p23 |
135 | 'PhotometricInterpretation' => self::SHORT, |
136 | # Orientation of image #p24 |
137 | 'Orientation' => self::SHORT, |
138 | # Number of components |
139 | 'SamplesPerPixel' => self::SHORT, |
140 | # Image data arrangement #p24 |
141 | 'PlanarConfiguration' => self::SHORT, |
142 | # Subsampling ratio of Y to C #p24 |
143 | 'YCbCrSubSampling' => [ self::SHORT, 2 ], |
144 | # Y and C positioning #p24-25 |
145 | 'YCbCrPositioning' => self::SHORT, |
146 | # Image resolution in width direction |
147 | 'XResolution' => self::RATIONAL, |
148 | # Image resolution in height direction |
149 | 'YResolution' => self::RATIONAL, |
150 | # Unit of X and Y resolution #(p26) |
151 | 'ResolutionUnit' => self::SHORT, |
152 | |
153 | # Tags relating to recording offset |
154 | # Image data location |
155 | 'StripOffsets' => self::SHORT_OR_LONG, |
156 | # Number of rows per strip |
157 | 'RowsPerStrip' => self::SHORT_OR_LONG, |
158 | # Bytes per compressed strip |
159 | 'StripByteCounts' => self::SHORT_OR_LONG, |
160 | # Offset to JPEG SOI |
161 | 'JPEGInterchangeFormat' => self::SHORT_OR_LONG, |
162 | # Bytes of JPEG data |
163 | 'JPEGInterchangeFormatLength' => self::SHORT_OR_LONG, |
164 | |
165 | # Tags relating to image data characteristics |
166 | # Transfer function |
167 | 'TransferFunction' => self::IGNORE, |
168 | # White point chromaticity |
169 | 'WhitePoint' => [ self::RATIONAL, 2 ], |
170 | # Chromaticities of primarities |
171 | 'PrimaryChromaticities' => [ self::RATIONAL, 6 ], |
172 | # Color space transformation matrix coefficients #p27 |
173 | 'YCbCrCoefficients' => [ self::RATIONAL, 3 ], |
174 | # Pair of black and white reference values |
175 | 'ReferenceBlackWhite' => [ self::RATIONAL, 6 ], |
176 | |
177 | # Other tags |
178 | # File change date and time |
179 | 'DateTime' => self::ASCII, |
180 | # Image title |
181 | 'ImageDescription' => self::ASCII, |
182 | # Image input equipment manufacturer |
183 | 'Make' => self::ASCII, |
184 | # Image input equipment model |
185 | 'Model' => self::ASCII, |
186 | # Software used |
187 | 'Software' => self::ASCII, |
188 | # Person who created the image |
189 | 'Artist' => self::ASCII, |
190 | # Copyright holder |
191 | 'Copyright' => self::ASCII, |
192 | ], |
193 | |
194 | # Exif IFD Attribute Information (p30-31) |
195 | 'EXIF' => [ |
196 | # @todo NOTE: Nonexistence of this field is taken to mean non-conformance |
197 | # to the Exif 2.1 AND 2.2 standards |
198 | 'ExifVersion' => self::UNDEFINED, |
199 | # Supported Flashpix version #p32 |
200 | 'FlashPixVersion' => self::UNDEFINED, |
201 | |
202 | # Tags relating to Image Data Characteristics |
203 | # Color space information #p32 |
204 | 'ColorSpace' => self::SHORT, |
205 | |
206 | # Tags relating to image configuration |
207 | # Meaning of each component #p33 |
208 | 'ComponentsConfiguration' => self::UNDEFINED, |
209 | # Image compression mode |
210 | 'CompressedBitsPerPixel' => self::RATIONAL, |
211 | # Valid image height |
212 | 'PixelYDimension' => self::SHORT_OR_LONG, |
213 | # Valid image width |
214 | 'PixelXDimension' => self::SHORT_OR_LONG, |
215 | |
216 | # Tags relating to related user information |
217 | # Manufacturer notes |
218 | 'MakerNote' => self::IGNORE, |
219 | # User comments #p34 |
220 | 'UserComment' => self::UNDEFINED, |
221 | |
222 | # Tags relating to related file information |
223 | # Related audio file |
224 | 'RelatedSoundFile' => self::ASCII, |
225 | |
226 | # Tags relating to date and time |
227 | # Date and time of original data generation #p36 |
228 | 'DateTimeOriginal' => self::ASCII, |
229 | # Date and time of original data generation |
230 | 'DateTimeDigitized' => self::ASCII, |
231 | # DateTime subseconds |
232 | 'SubSecTime' => self::ASCII, |
233 | # DateTimeOriginal subseconds |
234 | 'SubSecTimeOriginal' => self::ASCII, |
235 | # DateTimeDigitized subseconds |
236 | 'SubSecTimeDigitized' => self::ASCII, |
237 | |
238 | # Tags relating to picture-taking conditions (p31) |
239 | # Exposure time |
240 | 'ExposureTime' => self::RATIONAL, |
241 | # F Number |
242 | 'FNumber' => self::RATIONAL, |
243 | # Exposure Program #p38 |
244 | 'ExposureProgram' => self::SHORT, |
245 | # Spectral sensitivity |
246 | 'SpectralSensitivity' => self::ASCII, |
247 | # ISO speed rating |
248 | 'ISOSpeedRatings' => self::SHORT, |
249 | |
250 | # Optoelectronic conversion factor. Note: We don't have support for this atm. |
251 | 'OECF' => self::IGNORE, |
252 | |
253 | # Shutter speed |
254 | 'ShutterSpeedValue' => self::SRATIONAL, |
255 | # Aperture |
256 | 'ApertureValue' => self::RATIONAL, |
257 | # Brightness |
258 | 'BrightnessValue' => self::SRATIONAL, |
259 | # Exposure bias |
260 | 'ExposureBiasValue' => self::SRATIONAL, |
261 | # Maximum land aperture |
262 | 'MaxApertureValue' => self::RATIONAL, |
263 | # Subject distance |
264 | 'SubjectDistance' => self::RATIONAL, |
265 | # Metering mode #p40 |
266 | 'MeteringMode' => self::SHORT, |
267 | # Light source #p40-41 |
268 | 'LightSource' => self::SHORT, |
269 | # Flash #p41-42 |
270 | 'Flash' => self::SHORT, |
271 | # Lens focal length |
272 | 'FocalLength' => self::RATIONAL, |
273 | # Subject area |
274 | 'SubjectArea' => [ self::SHORT, 4 ], |
275 | # Flash energy |
276 | 'FlashEnergy' => self::RATIONAL, |
277 | # Spatial frequency response. Not supported atm. |
278 | 'SpatialFrequencyResponse' => self::IGNORE, |
279 | # Focal plane X resolution |
280 | 'FocalPlaneXResolution' => self::RATIONAL, |
281 | # Focal plane Y resolution |
282 | 'FocalPlaneYResolution' => self::RATIONAL, |
283 | # Focal plane resolution unit #p46 |
284 | 'FocalPlaneResolutionUnit' => self::SHORT, |
285 | # Subject location |
286 | 'SubjectLocation' => [ self::SHORT, 2 ], |
287 | # Exposure index |
288 | 'ExposureIndex' => self::RATIONAL, |
289 | # Sensing method #p46 |
290 | 'SensingMethod' => self::SHORT, |
291 | # File source #p47 |
292 | 'FileSource' => self::UNDEFINED, |
293 | # Scene type #p47 |
294 | 'SceneType' => self::UNDEFINED, |
295 | # CFA pattern. not supported atm. |
296 | 'CFAPattern' => self::IGNORE, |
297 | # Custom image processing #p48 |
298 | 'CustomRendered' => self::SHORT, |
299 | # Exposure mode #p48 |
300 | 'ExposureMode' => self::SHORT, |
301 | # White Balance #p49 |
302 | 'WhiteBalance' => self::SHORT, |
303 | # Digital zoom ratio |
304 | 'DigitalZoomRatio' => self::RATIONAL, |
305 | # Focal length in 35 mm film |
306 | 'FocalLengthIn35mmFilm' => self::SHORT, |
307 | # Scene capture type #p49 |
308 | 'SceneCaptureType' => self::SHORT, |
309 | # Scene control #p49-50 |
310 | 'GainControl' => self::SHORT, |
311 | # Contrast #p50 |
312 | 'Contrast' => self::SHORT, |
313 | # Saturation #p50 |
314 | 'Saturation' => self::SHORT, |
315 | # Sharpness #p50 |
316 | 'Sharpness' => self::SHORT, |
317 | |
318 | # Device settings description. This could maybe be supported. Need to find an |
319 | # example file that uses this to see if it has stuff of interest in it. |
320 | 'DeviceSettingDescription' => self::IGNORE, |
321 | |
322 | # Subject distance range #p51 |
323 | 'SubjectDistanceRange' => self::SHORT, |
324 | |
325 | # Unique image ID |
326 | 'ImageUniqueID' => self::ASCII, |
327 | ], |
328 | |
329 | # GPS Attribute Information (p52) |
330 | 'GPS' => [ |
331 | 'GPSVersion' => self::UNDEFINED, |
332 | # Should be an array of 4 Exif::BYTE's. However, php treats it as an undefined |
333 | # Note exif standard calls this GPSVersionID, but php doesn't like the id suffix |
334 | # North or South Latitude #p52-53 |
335 | 'GPSLatitudeRef' => self::ASCII, |
336 | # Latitude |
337 | 'GPSLatitude' => [ self::RATIONAL, 3 ], |
338 | # East or West Longitude #p53 |
339 | 'GPSLongitudeRef' => self::ASCII, |
340 | # Longitude |
341 | 'GPSLongitude' => [ self::RATIONAL, 3 ], |
342 | 'GPSAltitudeRef' => self::UNDEFINED, |
343 | |
344 | # Altitude reference. Note, the exif standard says this should be an EXIF::Byte, |
345 | # but php seems to disagree. |
346 | # Altitude |
347 | 'GPSAltitude' => self::RATIONAL, |
348 | # GPS time (atomic clock) |
349 | 'GPSTimeStamp' => [ self::RATIONAL, 3 ], |
350 | # Satellites used for measurement |
351 | 'GPSSatellites' => self::ASCII, |
352 | # Receiver status #p54 |
353 | 'GPSStatus' => self::ASCII, |
354 | # Measurement mode #p54-55 |
355 | 'GPSMeasureMode' => self::ASCII, |
356 | # Measurement precision |
357 | 'GPSDOP' => self::RATIONAL, |
358 | # Speed unit #p55 |
359 | 'GPSSpeedRef' => self::ASCII, |
360 | # Speed of GPS receiver |
361 | 'GPSSpeed' => self::RATIONAL, |
362 | # Reference for direction of movement #p55 |
363 | 'GPSTrackRef' => self::ASCII, |
364 | # Direction of movement |
365 | 'GPSTrack' => self::RATIONAL, |
366 | # Reference for direction of image #p56 |
367 | 'GPSImgDirectionRef' => self::ASCII, |
368 | # Direction of image |
369 | 'GPSImgDirection' => self::RATIONAL, |
370 | # Geodetic survey data used |
371 | 'GPSMapDatum' => self::ASCII, |
372 | # Reference for latitude of destination #p56 |
373 | 'GPSDestLatitudeRef' => self::ASCII, |
374 | # Latitude destination |
375 | 'GPSDestLatitude' => [ self::RATIONAL, 3 ], |
376 | # Reference for longitude of destination #p57 |
377 | 'GPSDestLongitudeRef' => self::ASCII, |
378 | # Longitude of destination |
379 | 'GPSDestLongitude' => [ self::RATIONAL, 3 ], |
380 | # Reference for bearing of destination #p57 |
381 | 'GPSDestBearingRef' => self::ASCII, |
382 | # Bearing of destination |
383 | 'GPSDestBearing' => self::RATIONAL, |
384 | # Reference for distance to destination #p57-58 |
385 | 'GPSDestDistanceRef' => self::ASCII, |
386 | # Distance to destination |
387 | 'GPSDestDistance' => self::RATIONAL, |
388 | # Name of GPS processing method |
389 | 'GPSProcessingMethod' => self::UNDEFINED, |
390 | # Name of GPS area |
391 | 'GPSAreaInformation' => self::UNDEFINED, |
392 | # GPS date |
393 | 'GPSDateStamp' => self::ASCII, |
394 | # GPS differential correction |
395 | 'GPSDifferential' => self::SHORT, |
396 | ], |
397 | ]; |
398 | |
399 | $this->file = $file; |
400 | $this->basename = wfBaseName( $this->file ); |
401 | if ( $byteOrder === 'BE' || $byteOrder === 'LE' ) { |
402 | $this->byteOrder = $byteOrder; |
403 | } else { |
404 | // Only give a warning for b/c, since originally we didn't |
405 | // require this. The number of things affected by this is |
406 | // rather small. |
407 | wfWarn( 'Exif class did not have byte order specified. ' . |
408 | 'Some properties may be decoded incorrectly.' ); |
409 | // BE seems about twice as popular as LE in jpg's. |
410 | $this->byteOrder = 'BE'; |
411 | } |
412 | |
413 | $this->debugFile( __FUNCTION__, true ); |
414 | if ( function_exists( 'exif_read_data' ) ) { |
415 | AtEase::suppressWarnings(); |
416 | $data = exif_read_data( $this->file, '', true ); |
417 | AtEase::restoreWarnings(); |
418 | } else { |
419 | throw new ConfigException( "Internal error: exif_read_data not present. " . |
420 | "\$wgShowEXIF may be incorrectly set or not checked by an extension." ); |
421 | } |
422 | /** |
423 | * exif_read_data() will return false on invalid input, such as |
424 | * when somebody uploads a file called something.jpeg |
425 | * containing random gibberish. |
426 | */ |
427 | $this->mRawExifData = $data ?: []; |
428 | $this->makeFilteredData(); |
429 | $this->collapseData(); |
430 | $this->debugFile( __FUNCTION__, false ); |
431 | } |
432 | |
433 | /** |
434 | * Make $this->mFilteredExifData |
435 | */ |
436 | private function makeFilteredData() { |
437 | $this->mFilteredExifData = []; |
438 | |
439 | foreach ( $this->mRawExifData as $section => $data ) { |
440 | if ( !array_key_exists( $section, $this->mExifTags ) ) { |
441 | $this->debug( $section, __FUNCTION__, "'$section' is not a valid Exif section" ); |
442 | continue; |
443 | } |
444 | |
445 | foreach ( $data as $tag => $value ) { |
446 | if ( !array_key_exists( $tag, $this->mExifTags[$section] ) ) { |
447 | $this->debug( $tag, __FUNCTION__, "'$tag' is not a valid tag in '$section'" ); |
448 | continue; |
449 | } |
450 | |
451 | if ( $this->validate( $section, $tag, $value ) ) { |
452 | // This is ok, as the tags in the different sections do not conflict. |
453 | // except in computed and thumbnail section, which we don't use. |
454 | $this->mFilteredExifData[$tag] = $value; |
455 | } else { |
456 | $this->debug( $value, __FUNCTION__, "'$tag' contained invalid data" ); |
457 | } |
458 | } |
459 | } |
460 | } |
461 | |
462 | /** |
463 | * Collapse some fields together. |
464 | * This converts some fields from exif form, to a more friendly form. |
465 | * For example GPS latitude to a single number. |
466 | * |
467 | * The rationale behind this is that we're storing data, not presenting to the user |
468 | * For example a longitude is a single number describing how far away you are from |
469 | * the prime meridian. Well it might be nice to split it up into minutes and seconds |
470 | * for the user, it doesn't really make sense to split a single number into 4 parts |
471 | * for storage. (degrees, minutes, second, direction vs single floating point number). |
472 | * |
473 | * Other things this might do (not really sure if they make sense or not): |
474 | * Dates -> mediawiki date format. |
475 | * convert values that can be in different units to be in one standardized unit. |
476 | * |
477 | * As an alternative approach, some of this could be done in the validate phase |
478 | * if we make up our own types like Exif::DATE. |
479 | */ |
480 | private function collapseData() { |
481 | $this->exifGPStoNumber( 'GPSLatitude' ); |
482 | $this->exifGPStoNumber( 'GPSDestLatitude' ); |
483 | $this->exifGPStoNumber( 'GPSLongitude' ); |
484 | $this->exifGPStoNumber( 'GPSDestLongitude' ); |
485 | |
486 | if ( isset( $this->mFilteredExifData['GPSAltitude'] ) ) { |
487 | // We know altitude data is a <num>/<denom> from the validation |
488 | // functions ran earlier. But multiplying such a string by -1 |
489 | // doesn't work well, so convert. |
490 | [ $num, $denom ] = explode( '/', $this->mFilteredExifData['GPSAltitude'], 2 ); |
491 | $this->mFilteredExifData['GPSAltitude'] = (int)$num / (int)$denom; |
492 | |
493 | if ( isset( $this->mFilteredExifData['GPSAltitudeRef'] ) ) { |
494 | switch ( $this->mFilteredExifData['GPSAltitudeRef'] ) { |
495 | case "\0": |
496 | // Above sea level |
497 | break; |
498 | case "\1": |
499 | // Below sea level |
500 | $this->mFilteredExifData['GPSAltitude'] *= -1; |
501 | break; |
502 | default: |
503 | // Invalid |
504 | unset( $this->mFilteredExifData['GPSAltitude'] ); |
505 | break; |
506 | } |
507 | } |
508 | } |
509 | unset( $this->mFilteredExifData['GPSAltitudeRef'] ); |
510 | |
511 | $this->exifPropToOrd( 'FileSource' ); |
512 | $this->exifPropToOrd( 'SceneType' ); |
513 | |
514 | $this->charCodeString( 'UserComment' ); |
515 | $this->charCodeString( 'GPSProcessingMethod' ); |
516 | $this->charCodeString( 'GPSAreaInformation' ); |
517 | |
518 | // ComponentsConfiguration should really be an array instead of a string... |
519 | // This turns a string of binary numbers into an array of numbers. |
520 | |
521 | if ( isset( $this->mFilteredExifData['ComponentsConfiguration'] ) ) { |
522 | $val = $this->mFilteredExifData['ComponentsConfiguration']; |
523 | $ccVals = []; |
524 | |
525 | $strLen = strlen( $val ); |
526 | for ( $i = 0; $i < $strLen; $i++ ) { |
527 | $ccVals[$i] = ord( substr( $val, $i, 1 ) ); |
528 | } |
529 | // this is for formatting later. |
530 | $ccVals['_type'] = 'ol'; |
531 | $this->mFilteredExifData['ComponentsConfiguration'] = $ccVals; |
532 | } |
533 | |
534 | // GPSVersion(ID) is treated as the wrong type by php exif support. |
535 | // Go through each byte turning it into a version string. |
536 | // For example: "\x02\x02\x00\x00" -> "2.2.0.0" |
537 | |
538 | // Also change exif tag name from GPSVersion (what php exif thinks it is) |
539 | // to GPSVersionID (what the exif standard thinks it is). |
540 | |
541 | if ( isset( $this->mFilteredExifData['GPSVersion'] ) ) { |
542 | $val = $this->mFilteredExifData['GPSVersion']; |
543 | $newVal = ''; |
544 | |
545 | $strLen = strlen( $val ); |
546 | for ( $i = 0; $i < $strLen; $i++ ) { |
547 | if ( $i !== 0 ) { |
548 | $newVal .= '.'; |
549 | } |
550 | $newVal .= ord( substr( $val, $i, 1 ) ); |
551 | } |
552 | |
553 | if ( $this->byteOrder === 'LE' ) { |
554 | // Need to reverse the string |
555 | $newVal2 = ''; |
556 | for ( $i = strlen( $newVal ) - 1; $i >= 0; $i-- ) { |
557 | $newVal2 .= substr( $newVal, $i, 1 ); |
558 | } |
559 | $this->mFilteredExifData['GPSVersionID'] = $newVal2; |
560 | } else { |
561 | $this->mFilteredExifData['GPSVersionID'] = $newVal; |
562 | } |
563 | unset( $this->mFilteredExifData['GPSVersion'] ); |
564 | } |
565 | } |
566 | |
567 | /** |
568 | * Do userComment tags and similar. See pg. 34 of exif standard. |
569 | * basically first 8 bytes is charset, rest is value. |
570 | * This has not been tested on any shift-JIS strings. |
571 | * @param string $prop Prop name |
572 | */ |
573 | private function charCodeString( $prop ) { |
574 | if ( isset( $this->mFilteredExifData[$prop] ) ) { |
575 | if ( strlen( $this->mFilteredExifData[$prop] ) <= 8 ) { |
576 | // invalid. Must be at least 9 bytes long. |
577 | |
578 | $this->debug( $this->mFilteredExifData[$prop], __FUNCTION__, false ); |
579 | unset( $this->mFilteredExifData[$prop] ); |
580 | |
581 | return; |
582 | } |
583 | $charCode = substr( $this->mFilteredExifData[$prop], 0, 8 ); |
584 | $val = substr( $this->mFilteredExifData[$prop], 8 ); |
585 | |
586 | switch ( $charCode ) { |
587 | case "JIS\x00\x00\x00\x00\x00": |
588 | $charset = "Shift-JIS"; |
589 | break; |
590 | case "UNICODE\x00": |
591 | $charset = "UTF-16" . $this->byteOrder; |
592 | break; |
593 | default: |
594 | // ascii or undefined. |
595 | $charset = ""; |
596 | break; |
597 | } |
598 | if ( $charset ) { |
599 | AtEase::suppressWarnings(); |
600 | $val = iconv( $charset, 'UTF-8//IGNORE', $val ); |
601 | AtEase::restoreWarnings(); |
602 | } else { |
603 | // if valid utf-8, assume that, otherwise assume windows-1252 |
604 | $valCopy = $val; |
605 | UtfNormal\Validator::quickIsNFCVerify( $valCopy ); |
606 | if ( $valCopy !== $val ) { |
607 | AtEase::suppressWarnings(); |
608 | $val = iconv( 'Windows-1252', 'UTF-8//IGNORE', $val ); |
609 | AtEase::restoreWarnings(); |
610 | } |
611 | } |
612 | |
613 | // trim and check to make sure not only whitespace. |
614 | $val = trim( $val ); |
615 | if ( strlen( $val ) === 0 ) { |
616 | // only whitespace. |
617 | $this->debug( $this->mFilteredExifData[$prop], __FUNCTION__, "$prop: Is only whitespace" ); |
618 | unset( $this->mFilteredExifData[$prop] ); |
619 | |
620 | return; |
621 | } |
622 | |
623 | // all's good. |
624 | $this->mFilteredExifData[$prop] = $val; |
625 | } |
626 | } |
627 | |
628 | /** |
629 | * Convert an Exif::UNDEFINED from a raw binary string |
630 | * to its value. This is sometimes needed depending on |
631 | * the type of UNDEFINED field |
632 | * @param string $prop Name of property |
633 | */ |
634 | private function exifPropToOrd( $prop ) { |
635 | if ( isset( $this->mFilteredExifData[$prop] ) ) { |
636 | $this->mFilteredExifData[$prop] = ord( $this->mFilteredExifData[$prop] ); |
637 | } |
638 | } |
639 | |
640 | /** |
641 | * Convert gps in exif form to a single floating point number |
642 | * for example 10 degrees 20`40`` S -> -10.34444 |
643 | * @param string $prop A GPS coordinate exif tag name (like GPSLongitude) |
644 | */ |
645 | private function exifGPStoNumber( $prop ) { |
646 | $loc = $this->mFilteredExifData[$prop] ?? null; |
647 | $dir = $this->mFilteredExifData[$prop . 'Ref'] ?? null; |
648 | $res = false; |
649 | |
650 | if ( $loc !== null && ( $dir === 'N' || $dir === 'S' || $dir === 'E' || $dir === 'W' ) ) { |
651 | [ $num, $denom ] = explode( '/', $loc[0], 2 ); |
652 | $res = (int)$num / (int)$denom; |
653 | [ $num, $denom ] = explode( '/', $loc[1], 2 ); |
654 | $res += ( (int)$num / (int)$denom ) * ( 1 / 60 ); |
655 | [ $num, $denom ] = explode( '/', $loc[2], 2 ); |
656 | $res += ( (int)$num / (int)$denom ) * ( 1 / 3600 ); |
657 | |
658 | if ( $dir === 'S' || $dir === 'W' ) { |
659 | // make negative |
660 | $res *= -1; |
661 | } |
662 | } |
663 | |
664 | // update the exif records. |
665 | |
666 | // using !== as $res could potentially be 0 |
667 | if ( $res !== false ) { |
668 | $this->mFilteredExifData[$prop] = $res; |
669 | } else { |
670 | // if invalid |
671 | unset( $this->mFilteredExifData[$prop] ); |
672 | } |
673 | unset( $this->mFilteredExifData[$prop . 'Ref'] ); |
674 | } |
675 | |
676 | /** |
677 | * Get $this->mRawExifData |
678 | * @return array |
679 | */ |
680 | public function getData() { |
681 | return $this->mRawExifData; |
682 | } |
683 | |
684 | /** |
685 | * Get $this->mFilteredExifData |
686 | * @return array |
687 | */ |
688 | public function getFilteredData() { |
689 | return $this->mFilteredExifData; |
690 | } |
691 | |
692 | /** |
693 | * The version of the output format |
694 | * |
695 | * Before the actual metadata information is saved in the database we |
696 | * strip some of it since we don't want to save things like thumbnails |
697 | * which usually accompany Exif data. This value gets saved in the |
698 | * database along with the actual Exif data, and if the version in the |
699 | * database doesn't equal the value returned by this function the Exif |
700 | * data is regenerated. |
701 | * |
702 | * @return int |
703 | */ |
704 | public static function version() { |
705 | return 2; |
706 | } |
707 | |
708 | /** |
709 | * Validates if a tag value is of the type it should be according to the Exif spec |
710 | * |
711 | * @param mixed $in The input value to check |
712 | * @return bool |
713 | */ |
714 | private function isByte( $in ) { |
715 | if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 255 ) { |
716 | $this->debug( $in, __FUNCTION__, true ); |
717 | |
718 | return true; |
719 | } |
720 | |
721 | $this->debug( $in, __FUNCTION__, false ); |
722 | |
723 | return false; |
724 | } |
725 | |
726 | /** |
727 | * @param mixed $in The input value to check |
728 | * @return bool |
729 | */ |
730 | private function isASCII( $in ) { |
731 | if ( is_array( $in ) ) { |
732 | return false; |
733 | } |
734 | |
735 | if ( preg_match( "/[^\x0a\x20-\x7e]/", $in ) ) { |
736 | $this->debug( $in, __FUNCTION__, 'found a character that is not allowed' ); |
737 | |
738 | return false; |
739 | } |
740 | |
741 | if ( preg_match( '/^\s*$/', $in ) ) { |
742 | $this->debug( $in, __FUNCTION__, 'input consisted solely of whitespace' ); |
743 | |
744 | return false; |
745 | } |
746 | |
747 | return true; |
748 | } |
749 | |
750 | /** |
751 | * @param mixed $in The input value to check |
752 | * @return bool |
753 | */ |
754 | private function isShort( $in ) { |
755 | if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 65536 ) { |
756 | $this->debug( $in, __FUNCTION__, true ); |
757 | |
758 | return true; |
759 | } |
760 | |
761 | $this->debug( $in, __FUNCTION__, false ); |
762 | |
763 | return false; |
764 | } |
765 | |
766 | /** |
767 | * @param mixed $in The input value to check |
768 | * @return bool |
769 | */ |
770 | private function isLong( $in ) { |
771 | if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 4_294_967_296 ) { |
772 | $this->debug( $in, __FUNCTION__, true ); |
773 | |
774 | return true; |
775 | } |
776 | |
777 | $this->debug( $in, __FUNCTION__, false ); |
778 | |
779 | return false; |
780 | } |
781 | |
782 | /** |
783 | * @param mixed $in The input value to check |
784 | * @return bool |
785 | */ |
786 | private function isRational( $in ) { |
787 | $m = []; |
788 | |
789 | # Avoid division by zero |
790 | if ( !is_array( $in ) |
791 | && preg_match( '/^(\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m ) |
792 | ) { |
793 | return $this->isLong( $m[1] ) && $this->isLong( $m[2] ); |
794 | } |
795 | |
796 | $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' ); |
797 | |
798 | return false; |
799 | } |
800 | |
801 | /** |
802 | * @param mixed $in The input value to check |
803 | * @return bool |
804 | */ |
805 | private function isUndefined( $in ) { |
806 | $this->debug( $in, __FUNCTION__, true ); |
807 | |
808 | return true; |
809 | } |
810 | |
811 | /** |
812 | * @param mixed $in The input value to check |
813 | * @return bool |
814 | */ |
815 | private function isSlong( $in ) { |
816 | if ( $this->isLong( abs( (float)$in ) ) ) { |
817 | $this->debug( $in, __FUNCTION__, true ); |
818 | |
819 | return true; |
820 | } |
821 | |
822 | $this->debug( $in, __FUNCTION__, false ); |
823 | |
824 | return false; |
825 | } |
826 | |
827 | /** |
828 | * @param mixed $in The input value to check |
829 | * @return bool |
830 | */ |
831 | private function isSrational( $in ) { |
832 | $m = []; |
833 | |
834 | # Avoid division by zero |
835 | if ( !is_array( $in ) && |
836 | preg_match( '/^(-?\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m ) |
837 | ) { |
838 | return $this->isSlong( $m[0] ) && $this->isSlong( $m[1] ); |
839 | } |
840 | |
841 | $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' ); |
842 | |
843 | return false; |
844 | } |
845 | |
846 | /** |
847 | * Validates if a tag has a legal value according to the Exif spec |
848 | * |
849 | * @param string $section Section where tag is located. |
850 | * @param string $tag The tag to check. |
851 | * @param mixed $val The value of the tag. |
852 | * @param bool $recursive True if called recursively for array types. |
853 | * @return bool |
854 | */ |
855 | private function validate( $section, $tag, $val, $recursive = false ): bool { |
856 | $debug = "tag is '$tag'"; |
857 | $etype = $this->mExifTags[$section][$tag]; |
858 | $ecount = 1; |
859 | if ( is_array( $etype ) ) { |
860 | [ $etype, $ecount ] = $etype; |
861 | if ( $recursive ) { |
862 | // checking individual elements |
863 | $ecount = 1; |
864 | } |
865 | } |
866 | |
867 | $count = 1; |
868 | if ( is_array( $val ) ) { |
869 | $count = count( $val ); |
870 | if ( $ecount !== $count ) { |
871 | $this->debug( $val, __FUNCTION__, "Expected $ecount elements for $tag but got $count" ); |
872 | return false; |
873 | } |
874 | } |
875 | // If there are multiple values, recursively validate each of them. |
876 | if ( $count > 1 ) { |
877 | foreach ( $val as $v ) { |
878 | if ( !$this->validate( $section, $tag, $v, true ) ) { |
879 | return false; |
880 | } |
881 | } |
882 | |
883 | return true; |
884 | } |
885 | |
886 | // NULL values are considered valid. T315202. |
887 | if ( $val === null ) { |
888 | return true; |
889 | } |
890 | |
891 | // Does not work if not typecast |
892 | switch ( (string)$etype ) { |
893 | case (string)self::BYTE: |
894 | $this->debug( $val, __FUNCTION__, $debug ); |
895 | |
896 | return $this->isByte( $val ); |
897 | case (string)self::ASCII: |
898 | $this->debug( $val, __FUNCTION__, $debug ); |
899 | |
900 | return $this->isASCII( $val ); |
901 | case (string)self::SHORT: |
902 | $this->debug( $val, __FUNCTION__, $debug ); |
903 | |
904 | return $this->isShort( $val ); |
905 | case (string)self::LONG: |
906 | $this->debug( $val, __FUNCTION__, $debug ); |
907 | |
908 | return $this->isLong( $val ); |
909 | case (string)self::RATIONAL: |
910 | $this->debug( $val, __FUNCTION__, $debug ); |
911 | |
912 | return $this->isRational( $val ); |
913 | case (string)self::SHORT_OR_LONG: |
914 | $this->debug( $val, __FUNCTION__, $debug ); |
915 | |
916 | return $this->isShort( $val ) || $this->isLong( $val ); |
917 | case (string)self::UNDEFINED: |
918 | $this->debug( $val, __FUNCTION__, $debug ); |
919 | |
920 | return $this->isUndefined( $val ); |
921 | case (string)self::SLONG: |
922 | $this->debug( $val, __FUNCTION__, $debug ); |
923 | |
924 | return $this->isSlong( $val ); |
925 | case (string)self::SRATIONAL: |
926 | $this->debug( $val, __FUNCTION__, $debug ); |
927 | |
928 | return $this->isSrational( $val ); |
929 | case (string)self::IGNORE: |
930 | $this->debug( $val, __FUNCTION__, $debug ); |
931 | |
932 | return false; |
933 | default: |
934 | $this->debug( $val, __FUNCTION__, "The tag '$tag' is unknown" ); |
935 | |
936 | return false; |
937 | } |
938 | } |
939 | |
940 | /** |
941 | * Convenience function for debugging output |
942 | * |
943 | * @param mixed $in Arrays will be processed with print_r(). |
944 | * @param string $fname Function name to log. |
945 | * @param string|bool|null $action Default null. |
946 | */ |
947 | private function debug( $in, $fname, $action = null ) { |
948 | if ( !$this->log ) { |
949 | return; |
950 | } |
951 | $type = get_debug_type( $in ); |
952 | $class = ucfirst( __CLASS__ ); |
953 | if ( is_array( $in ) ) { |
954 | $in = print_r( $in, true ); |
955 | } |
956 | |
957 | if ( $action === true ) { |
958 | wfDebugLog( $this->log, "$class::$fname: accepted: '$in' (type: $type)" ); |
959 | } elseif ( $action === false ) { |
960 | wfDebugLog( $this->log, "$class::$fname: rejected: '$in' (type: $type)" ); |
961 | } elseif ( $action === null ) { |
962 | wfDebugLog( $this->log, "$class::$fname: input was: '$in' (type: $type)" ); |
963 | } else { |
964 | wfDebugLog( $this->log, "$class::$fname: $action (type: $type; content: '$in')" ); |
965 | } |
966 | } |
967 | |
968 | /** |
969 | * Convenience function for debugging output |
970 | * |
971 | * @param string $fname The name of the function calling this function |
972 | * @param bool $io Specify whether we're beginning or ending |
973 | */ |
974 | private function debugFile( $fname, $io ) { |
975 | if ( !$this->log ) { |
976 | return; |
977 | } |
978 | $class = ucfirst( __CLASS__ ); |
979 | if ( $io ) { |
980 | wfDebugLog( $this->log, "$class::$fname: begin processing: '{$this->basename}'" ); |
981 | } else { |
982 | wfDebugLog( $this->log, "$class::$fname: end processing: '{$this->basename}'" ); |
983 | } |
984 | } |
985 | } |