Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
73.15% |
267 / 365 |
|
25.00% |
5 / 20 |
CRAP | |
0.00% |
0 / 1 |
| Exif | |
73.35% |
267 / 364 |
|
25.00% |
5 / 20 |
313.64 | |
0.00% |
0 / 1 |
| __construct | |
95.10% |
136 / 143 |
|
0.00% |
0 / 1 |
5 | |||
| makeFilteredData | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
6 | |||
| collapseData | |
68.89% |
31 / 45 |
|
0.00% |
0 / 1 |
18.09 | |||
| charCodeString | |
58.33% |
14 / 24 |
|
0.00% |
0 / 1 |
8.60 | |||
| exifPropToOrd | |
50.00% |
1 / 2 |
|
0.00% |
0 / 1 |
2.50 | |||
| exifGPStoNumber | |
100.00% |
20 / 20 |
|
100.00% |
1 / 1 |
10 | |||
| getData | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getFilteredData | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| version | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| isByte | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
| isASCII | |
44.44% |
4 / 9 |
|
0.00% |
0 / 1 |
6.74 | |||
| isShort | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
| isLong | |
60.00% |
3 / 5 |
|
0.00% |
0 / 1 |
6.60 | |||
| isRational | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
4 | |||
| isUndefined | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| isSlong | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
| isSrational | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 | |||
| validate | |
62.26% |
33 / 53 |
|
0.00% |
0 / 1 |
44.70 | |||
| debug | |
15.38% |
2 / 13 |
|
0.00% |
0 / 1 |
27.81 | |||
| debugFile | |
33.33% |
2 / 6 |
|
0.00% |
0 / 1 |
5.67 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Extraction and validation of image metadata. |
| 4 | * |
| 5 | * @license GPL-2.0-or-later |
| 6 | * @ingroup Media |
| 7 | * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com> |
| 8 | * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason, 2009 Brent Garber |
| 9 | * @license GPL-2.0-or-later |
| 10 | * @see http://exif.org/Exif2-2.PDF The Exif 2.2 specification |
| 11 | * @file |
| 12 | */ |
| 13 | |
| 14 | namespace MediaWiki\Media; |
| 15 | |
| 16 | use MediaWiki\Config\ConfigException; |
| 17 | |
| 18 | /** |
| 19 | * Class to extract and validate Exif data from jpeg (and possibly tiff) files. |
| 20 | * @ingroup Media |
| 21 | */ |
| 22 | class Exif { |
| 23 | /** An 8-bit (1-byte) unsigned integer. */ |
| 24 | private const BYTE = 1; |
| 25 | |
| 26 | /** An 8-bit byte containing one 7-bit ASCII code. |
| 27 | * The final byte is terminated with NULL. |
| 28 | */ |
| 29 | private const ASCII = 2; |
| 30 | |
| 31 | /** A 16-bit (2-byte) unsigned integer. */ |
| 32 | private const SHORT = 3; |
| 33 | |
| 34 | /** A 32-bit (4-byte) unsigned integer. */ |
| 35 | private const LONG = 4; |
| 36 | |
| 37 | /** Two LONGs. The first LONG is the numerator and the second LONG expresses |
| 38 | * the denominator |
| 39 | */ |
| 40 | private const RATIONAL = 5; |
| 41 | |
| 42 | /** A 16-bit (2-byte) or 32-bit (4-byte) unsigned integer. */ |
| 43 | private const SHORT_OR_LONG = 6; |
| 44 | |
| 45 | /** An 8-bit byte that can take any value depending on the field definition */ |
| 46 | private const UNDEFINED = 7; |
| 47 | |
| 48 | /** A 32-bit (4-byte) signed integer (2's complement notation), */ |
| 49 | private const SLONG = 9; |
| 50 | |
| 51 | /** Two SLONGs. The first SLONG is the numerator and the second SLONG is |
| 52 | * the denominator. |
| 53 | */ |
| 54 | private const SRATIONAL = 10; |
| 55 | |
| 56 | /** A fake value for things we don't want or don't support. */ |
| 57 | private const IGNORE = -1; |
| 58 | |
| 59 | /** @var array Exif tags grouped by category, the tagname itself is the key |
| 60 | * and the type is the value, in the case of more than one possible value |
| 61 | * type they are separated by commas. |
| 62 | */ |
| 63 | private $mExifTags; |
| 64 | |
| 65 | /** @var array The raw Exif data returned by exif_read_data() */ |
| 66 | private $mRawExifData; |
| 67 | |
| 68 | /** @var array A Filtered version of $mRawExifData that has been pruned |
| 69 | * of invalid tags and tags that contain content they shouldn't contain |
| 70 | * according to the Exif specification |
| 71 | */ |
| 72 | private $mFilteredExifData; |
| 73 | |
| 74 | /** @var string The file being processed */ |
| 75 | private $file; |
| 76 | |
| 77 | /** @var string The basename of the file being processed */ |
| 78 | private $basename; |
| 79 | |
| 80 | /** @var string|false The private log to log to, e.g. 'exif' */ |
| 81 | private $log = false; |
| 82 | |
| 83 | /** @var string The byte order of the file. Needed because php's extension |
| 84 | * doesn't fully process some obscure props. |
| 85 | */ |
| 86 | private $byteOrder; |
| 87 | |
| 88 | /** |
| 89 | * @param string $file Filename. |
| 90 | * @param string $byteOrder Type of byte ordering either 'BE' (Big Endian) |
| 91 | * or 'LE' (Little Endian). Default ''. |
| 92 | * @todo FIXME: The following are broke: |
| 93 | * SubjectArea. Need to test the more obscure tags. |
| 94 | * DigitalZoomRatio = 0/0 is rejected. need to determine if that's valid. |
| 95 | * Possibly should treat 0/0 = 0. need to read exif spec on that. |
| 96 | */ |
| 97 | public function __construct( $file, $byteOrder = '' ) { |
| 98 | if ( !function_exists( 'exif_read_data' ) ) { |
| 99 | throw new ConfigException( |
| 100 | "Internal error: exif_read_data not present. " . |
| 101 | "\$wgShowEXIF may be incorrectly set or not checked by an extension." |
| 102 | ); |
| 103 | } |
| 104 | |
| 105 | /** |
| 106 | * Page numbers here refer to pages in the Exif 2.2 standard |
| 107 | * |
| 108 | * Note, Exif::UNDEFINED is treated as a string, not as an array of bytes |
| 109 | * so don't put a count parameter for any UNDEFINED values. |
| 110 | * |
| 111 | * @link http://exif.org/Exif2-2.PDF The Exif 2.2 specification |
| 112 | */ |
| 113 | $this->mExifTags = [ |
| 114 | # TIFF Rev. 6.0 Attribute Information (p22) |
| 115 | 'IFD0' => [ |
| 116 | # Tags relating to image structure |
| 117 | # Image width |
| 118 | 'ImageWidth' => self::SHORT_OR_LONG, |
| 119 | # Image height |
| 120 | 'ImageLength' => self::SHORT_OR_LONG, |
| 121 | # Number of bits per component |
| 122 | 'BitsPerSample' => [ self::SHORT, 3 ], |
| 123 | |
| 124 | # "When a primary image is JPEG compressed, this designation is not" |
| 125 | # "necessary and is omitted." (p23) |
| 126 | # Compression scheme #p23 |
| 127 | 'Compression' => self::SHORT, |
| 128 | # Pixel composition #p23 |
| 129 | 'PhotometricInterpretation' => self::SHORT, |
| 130 | # Orientation of image #p24 |
| 131 | 'Orientation' => self::SHORT, |
| 132 | # Number of components |
| 133 | 'SamplesPerPixel' => self::SHORT, |
| 134 | # Image data arrangement #p24 |
| 135 | 'PlanarConfiguration' => self::SHORT, |
| 136 | # Subsampling ratio of Y to C #p24 |
| 137 | 'YCbCrSubSampling' => [ self::SHORT, 2 ], |
| 138 | # Y and C positioning #p24-25 |
| 139 | 'YCbCrPositioning' => self::SHORT, |
| 140 | # Image resolution in width direction |
| 141 | 'XResolution' => self::RATIONAL, |
| 142 | # Image resolution in height direction |
| 143 | 'YResolution' => self::RATIONAL, |
| 144 | # Unit of X and Y resolution #(p26) |
| 145 | 'ResolutionUnit' => self::SHORT, |
| 146 | |
| 147 | # Tags relating to recording offset |
| 148 | # Image data location |
| 149 | 'StripOffsets' => self::SHORT_OR_LONG, |
| 150 | # Number of rows per strip |
| 151 | 'RowsPerStrip' => self::SHORT_OR_LONG, |
| 152 | # Bytes per compressed strip |
| 153 | 'StripByteCounts' => self::SHORT_OR_LONG, |
| 154 | # Offset to JPEG SOI |
| 155 | 'JPEGInterchangeFormat' => self::SHORT_OR_LONG, |
| 156 | # Bytes of JPEG data |
| 157 | 'JPEGInterchangeFormatLength' => self::SHORT_OR_LONG, |
| 158 | |
| 159 | # Tags relating to image data characteristics |
| 160 | # Transfer function |
| 161 | 'TransferFunction' => self::IGNORE, |
| 162 | # White point chromaticity |
| 163 | 'WhitePoint' => [ self::RATIONAL, 2 ], |
| 164 | # Chromaticities of primarities |
| 165 | 'PrimaryChromaticities' => [ self::RATIONAL, 6 ], |
| 166 | # Color space transformation matrix coefficients #p27 |
| 167 | 'YCbCrCoefficients' => [ self::RATIONAL, 3 ], |
| 168 | # Pair of black and white reference values |
| 169 | 'ReferenceBlackWhite' => [ self::RATIONAL, 6 ], |
| 170 | |
| 171 | # Other tags |
| 172 | # File change date and time |
| 173 | 'DateTime' => self::ASCII, |
| 174 | # Image title |
| 175 | 'ImageDescription' => self::ASCII, |
| 176 | # Image input equipment manufacturer |
| 177 | 'Make' => self::ASCII, |
| 178 | # Image input equipment model |
| 179 | 'Model' => self::ASCII, |
| 180 | # Software used |
| 181 | 'Software' => self::ASCII, |
| 182 | # Person who created the image |
| 183 | 'Artist' => self::ASCII, |
| 184 | # Copyright holder |
| 185 | 'Copyright' => self::ASCII, |
| 186 | ], |
| 187 | |
| 188 | # Exif IFD Attribute Information (p30-31) |
| 189 | 'EXIF' => [ |
| 190 | # @todo NOTE: Nonexistence of this field is taken to mean non-conformance |
| 191 | # to the Exif 2.1 AND 2.2 standards |
| 192 | 'ExifVersion' => self::UNDEFINED, |
| 193 | # Supported Flashpix version #p32 |
| 194 | 'FlashPixVersion' => self::UNDEFINED, |
| 195 | |
| 196 | # Tags relating to Image Data Characteristics |
| 197 | # Color space information #p32 |
| 198 | 'ColorSpace' => self::SHORT, |
| 199 | |
| 200 | # Tags relating to image configuration |
| 201 | # Meaning of each component #p33 |
| 202 | 'ComponentsConfiguration' => self::UNDEFINED, |
| 203 | # Image compression mode |
| 204 | 'CompressedBitsPerPixel' => self::RATIONAL, |
| 205 | # Valid image height |
| 206 | 'PixelYDimension' => self::SHORT_OR_LONG, |
| 207 | # Valid image width |
| 208 | 'PixelXDimension' => self::SHORT_OR_LONG, |
| 209 | |
| 210 | # Tags relating to related user information |
| 211 | # Manufacturer notes |
| 212 | 'MakerNote' => self::IGNORE, |
| 213 | # User comments #p34 |
| 214 | 'UserComment' => self::UNDEFINED, |
| 215 | |
| 216 | # Tags relating to related file information |
| 217 | # Related audio file |
| 218 | 'RelatedSoundFile' => self::ASCII, |
| 219 | |
| 220 | # Tags relating to date and time |
| 221 | # Date and time of original data generation #p36 |
| 222 | 'DateTimeOriginal' => self::ASCII, |
| 223 | # Date and time of original data generation |
| 224 | 'DateTimeDigitized' => self::ASCII, |
| 225 | # DateTime subseconds |
| 226 | 'SubSecTime' => self::ASCII, |
| 227 | # DateTimeOriginal subseconds |
| 228 | 'SubSecTimeOriginal' => self::ASCII, |
| 229 | # DateTimeDigitized subseconds |
| 230 | 'SubSecTimeDigitized' => self::ASCII, |
| 231 | |
| 232 | # Tags relating to picture-taking conditions (p31) |
| 233 | # Exposure time |
| 234 | 'ExposureTime' => self::RATIONAL, |
| 235 | # F Number |
| 236 | 'FNumber' => self::RATIONAL, |
| 237 | # Exposure Program #p38 |
| 238 | 'ExposureProgram' => self::SHORT, |
| 239 | # Spectral sensitivity |
| 240 | 'SpectralSensitivity' => self::ASCII, |
| 241 | # ISO speed rating |
| 242 | 'ISOSpeedRatings' => self::SHORT, |
| 243 | |
| 244 | # Optoelectronic conversion factor. Note: We don't have support for this atm. |
| 245 | 'OECF' => self::IGNORE, |
| 246 | |
| 247 | # Shutter speed |
| 248 | 'ShutterSpeedValue' => self::SRATIONAL, |
| 249 | # Aperture |
| 250 | 'ApertureValue' => self::RATIONAL, |
| 251 | # Brightness |
| 252 | 'BrightnessValue' => self::SRATIONAL, |
| 253 | # Exposure bias |
| 254 | 'ExposureBiasValue' => self::SRATIONAL, |
| 255 | # Maximum land aperture |
| 256 | 'MaxApertureValue' => self::RATIONAL, |
| 257 | # Subject distance |
| 258 | 'SubjectDistance' => self::RATIONAL, |
| 259 | # Metering mode #p40 |
| 260 | 'MeteringMode' => self::SHORT, |
| 261 | # Light source #p40-41 |
| 262 | 'LightSource' => self::SHORT, |
| 263 | # Flash #p41-42 |
| 264 | 'Flash' => self::SHORT, |
| 265 | # Lens focal length |
| 266 | 'FocalLength' => self::RATIONAL, |
| 267 | # Subject area |
| 268 | 'SubjectArea' => [ self::SHORT, 4 ], |
| 269 | # Flash energy |
| 270 | 'FlashEnergy' => self::RATIONAL, |
| 271 | # Spatial frequency response. Not supported atm. |
| 272 | 'SpatialFrequencyResponse' => self::IGNORE, |
| 273 | # Focal plane X resolution |
| 274 | 'FocalPlaneXResolution' => self::RATIONAL, |
| 275 | # Focal plane Y resolution |
| 276 | 'FocalPlaneYResolution' => self::RATIONAL, |
| 277 | # Focal plane resolution unit #p46 |
| 278 | 'FocalPlaneResolutionUnit' => self::SHORT, |
| 279 | # Subject location |
| 280 | 'SubjectLocation' => [ self::SHORT, 2 ], |
| 281 | # Exposure index |
| 282 | 'ExposureIndex' => self::RATIONAL, |
| 283 | # Sensing method #p46 |
| 284 | 'SensingMethod' => self::SHORT, |
| 285 | # File source #p47 |
| 286 | 'FileSource' => self::UNDEFINED, |
| 287 | # Scene type #p47 |
| 288 | 'SceneType' => self::UNDEFINED, |
| 289 | # CFA pattern. not supported atm. |
| 290 | 'CFAPattern' => self::IGNORE, |
| 291 | # Custom image processing #p48 |
| 292 | 'CustomRendered' => self::SHORT, |
| 293 | # Exposure mode #p48 |
| 294 | 'ExposureMode' => self::SHORT, |
| 295 | # White Balance #p49 |
| 296 | 'WhiteBalance' => self::SHORT, |
| 297 | # Digital zoom ratio |
| 298 | 'DigitalZoomRatio' => self::RATIONAL, |
| 299 | # Focal length in 35 mm film |
| 300 | 'FocalLengthIn35mmFilm' => self::SHORT, |
| 301 | # Scene capture type #p49 |
| 302 | 'SceneCaptureType' => self::SHORT, |
| 303 | # Scene control #p49-50 |
| 304 | 'GainControl' => self::SHORT, |
| 305 | # Contrast #p50 |
| 306 | 'Contrast' => self::SHORT, |
| 307 | # Saturation #p50 |
| 308 | 'Saturation' => self::SHORT, |
| 309 | # Sharpness #p50 |
| 310 | 'Sharpness' => self::SHORT, |
| 311 | |
| 312 | # Device settings description. This could maybe be supported. Need to find an |
| 313 | # example file that uses this to see if it has stuff of interest in it. |
| 314 | 'DeviceSettingDescription' => self::IGNORE, |
| 315 | |
| 316 | # Subject distance range #p51 |
| 317 | 'SubjectDistanceRange' => self::SHORT, |
| 318 | |
| 319 | # Unique image ID |
| 320 | 'ImageUniqueID' => self::ASCII, |
| 321 | ], |
| 322 | |
| 323 | # GPS Attribute Information (p52) |
| 324 | 'GPS' => [ |
| 325 | 'GPSVersion' => self::UNDEFINED, |
| 326 | # Should be an array of 4 Exif::BYTE's. However, php treats it as an undefined |
| 327 | # Note exif standard calls this GPSVersionID, but php doesn't like the id suffix |
| 328 | # North or South Latitude #p52-53 |
| 329 | 'GPSLatitudeRef' => self::ASCII, |
| 330 | # Latitude |
| 331 | 'GPSLatitude' => [ self::RATIONAL, 3 ], |
| 332 | # East or West Longitude #p53 |
| 333 | 'GPSLongitudeRef' => self::ASCII, |
| 334 | # Longitude |
| 335 | 'GPSLongitude' => [ self::RATIONAL, 3 ], |
| 336 | 'GPSAltitudeRef' => self::UNDEFINED, |
| 337 | |
| 338 | # Altitude reference. Note, the exif standard says this should be an EXIF::Byte, |
| 339 | # but php seems to disagree. |
| 340 | # Altitude |
| 341 | 'GPSAltitude' => self::RATIONAL, |
| 342 | # GPS time (atomic clock) |
| 343 | 'GPSTimeStamp' => [ self::RATIONAL, 3 ], |
| 344 | # Satellites used for measurement |
| 345 | 'GPSSatellites' => self::ASCII, |
| 346 | # Receiver status #p54 |
| 347 | 'GPSStatus' => self::ASCII, |
| 348 | # Measurement mode #p54-55 |
| 349 | 'GPSMeasureMode' => self::ASCII, |
| 350 | # Measurement precision |
| 351 | 'GPSDOP' => self::RATIONAL, |
| 352 | # Speed unit #p55 |
| 353 | 'GPSSpeedRef' => self::ASCII, |
| 354 | # Speed of GPS receiver |
| 355 | 'GPSSpeed' => self::RATIONAL, |
| 356 | # Reference for direction of movement #p55 |
| 357 | 'GPSTrackRef' => self::ASCII, |
| 358 | # Direction of movement |
| 359 | 'GPSTrack' => self::RATIONAL, |
| 360 | # Reference for direction of image #p56 |
| 361 | 'GPSImgDirectionRef' => self::ASCII, |
| 362 | # Direction of image |
| 363 | 'GPSImgDirection' => self::RATIONAL, |
| 364 | # Geodetic survey data used |
| 365 | 'GPSMapDatum' => self::ASCII, |
| 366 | # Reference for latitude of destination #p56 |
| 367 | 'GPSDestLatitudeRef' => self::ASCII, |
| 368 | # Latitude destination |
| 369 | 'GPSDestLatitude' => [ self::RATIONAL, 3 ], |
| 370 | # Reference for longitude of destination #p57 |
| 371 | 'GPSDestLongitudeRef' => self::ASCII, |
| 372 | # Longitude of destination |
| 373 | 'GPSDestLongitude' => [ self::RATIONAL, 3 ], |
| 374 | # Reference for bearing of destination #p57 |
| 375 | 'GPSDestBearingRef' => self::ASCII, |
| 376 | # Bearing of destination |
| 377 | 'GPSDestBearing' => self::RATIONAL, |
| 378 | # Reference for distance to destination #p57-58 |
| 379 | 'GPSDestDistanceRef' => self::ASCII, |
| 380 | # Distance to destination |
| 381 | 'GPSDestDistance' => self::RATIONAL, |
| 382 | # Name of GPS processing method |
| 383 | 'GPSProcessingMethod' => self::UNDEFINED, |
| 384 | # Name of GPS area |
| 385 | 'GPSAreaInformation' => self::UNDEFINED, |
| 386 | # GPS date |
| 387 | 'GPSDateStamp' => self::ASCII, |
| 388 | # GPS differential correction |
| 389 | 'GPSDifferential' => self::SHORT, |
| 390 | ], |
| 391 | ]; |
| 392 | |
| 393 | $this->file = $file; |
| 394 | $this->basename = wfBaseName( $this->file ); |
| 395 | if ( $byteOrder === 'BE' || $byteOrder === 'LE' ) { |
| 396 | $this->byteOrder = $byteOrder; |
| 397 | } else { |
| 398 | // Only give a warning for b/c, since originally we didn't |
| 399 | // require this. The number of things affected by this is |
| 400 | // rather small. |
| 401 | wfWarn( 'Exif class did not have byte order specified. ' . |
| 402 | 'Some properties may be decoded incorrectly.' ); |
| 403 | // BE seems about twice as popular as LE in jpg's. |
| 404 | $this->byteOrder = 'BE'; |
| 405 | } |
| 406 | |
| 407 | $this->debugFile( __FUNCTION__, true ); |
| 408 | |
| 409 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
| 410 | $data = @exif_read_data( $this->file, '', true ); |
| 411 | |
| 412 | /** |
| 413 | * exif_read_data() will return false on invalid input, such as |
| 414 | * when somebody uploads a file called something.jpeg |
| 415 | * containing random gibberish. |
| 416 | */ |
| 417 | $this->mRawExifData = $data ?: []; |
| 418 | $this->makeFilteredData(); |
| 419 | $this->collapseData(); |
| 420 | $this->debugFile( __FUNCTION__, false ); |
| 421 | } |
| 422 | |
| 423 | /** |
| 424 | * Make $this->mFilteredExifData |
| 425 | */ |
| 426 | private function makeFilteredData() { |
| 427 | $this->mFilteredExifData = []; |
| 428 | |
| 429 | foreach ( $this->mRawExifData as $section => $data ) { |
| 430 | if ( !array_key_exists( $section, $this->mExifTags ) ) { |
| 431 | $this->debug( $section, __FUNCTION__, "'$section' is not a valid Exif section" ); |
| 432 | continue; |
| 433 | } |
| 434 | |
| 435 | foreach ( $data as $tag => $value ) { |
| 436 | if ( !array_key_exists( $tag, $this->mExifTags[$section] ) ) { |
| 437 | $this->debug( $tag, __FUNCTION__, "'$tag' is not a valid tag in '$section'" ); |
| 438 | continue; |
| 439 | } |
| 440 | |
| 441 | if ( $this->validate( $section, $tag, $value ) ) { |
| 442 | // This is ok, as the tags in the different sections do not conflict. |
| 443 | // except in computed and thumbnail section, which we don't use. |
| 444 | $this->mFilteredExifData[$tag] = $value; |
| 445 | } else { |
| 446 | $this->debug( $value, __FUNCTION__, "'$tag' contained invalid data" ); |
| 447 | } |
| 448 | } |
| 449 | } |
| 450 | } |
| 451 | |
| 452 | /** |
| 453 | * Collapse some fields together. |
| 454 | * This converts some fields from exif form, to a more friendly form. |
| 455 | * For example GPS latitude to a single number. |
| 456 | * |
| 457 | * The rationale behind this is that we're storing data, not presenting to the user |
| 458 | * For example a longitude is a single number describing how far away you are from |
| 459 | * the prime meridian. Well it might be nice to split it up into minutes and seconds |
| 460 | * for the user, it doesn't really make sense to split a single number into 4 parts |
| 461 | * for storage. (degrees, minutes, second, direction vs single floating point number). |
| 462 | * |
| 463 | * Other things this might do (not really sure if they make sense or not): |
| 464 | * Dates -> mediawiki date format. |
| 465 | * convert values that can be in different units to be in one standardized unit. |
| 466 | * |
| 467 | * As an alternative approach, some of this could be done in the validate phase |
| 468 | * if we make up our own types like Exif::DATE. |
| 469 | */ |
| 470 | private function collapseData() { |
| 471 | $this->exifGPStoNumber( 'GPSLatitude' ); |
| 472 | $this->exifGPStoNumber( 'GPSDestLatitude' ); |
| 473 | $this->exifGPStoNumber( 'GPSLongitude' ); |
| 474 | $this->exifGPStoNumber( 'GPSDestLongitude' ); |
| 475 | |
| 476 | if ( isset( $this->mFilteredExifData['GPSAltitude'] ) ) { |
| 477 | // We know altitude data is a <num>/<denom> from the validation |
| 478 | // functions ran earlier. But multiplying such a string by -1 |
| 479 | // doesn't work well, so convert. |
| 480 | [ $num, $denom ] = explode( '/', $this->mFilteredExifData['GPSAltitude'], 2 ); |
| 481 | $this->mFilteredExifData['GPSAltitude'] = (int)$num / (int)$denom; |
| 482 | |
| 483 | if ( isset( $this->mFilteredExifData['GPSAltitudeRef'] ) ) { |
| 484 | switch ( $this->mFilteredExifData['GPSAltitudeRef'] ) { |
| 485 | case "\0": |
| 486 | // Above sea level |
| 487 | break; |
| 488 | case "\1": |
| 489 | // Below sea level |
| 490 | $this->mFilteredExifData['GPSAltitude'] *= -1; |
| 491 | break; |
| 492 | default: |
| 493 | // Invalid |
| 494 | unset( $this->mFilteredExifData['GPSAltitude'] ); |
| 495 | break; |
| 496 | } |
| 497 | } |
| 498 | } |
| 499 | unset( $this->mFilteredExifData['GPSAltitudeRef'] ); |
| 500 | |
| 501 | $this->exifPropToOrd( 'FileSource' ); |
| 502 | $this->exifPropToOrd( 'SceneType' ); |
| 503 | |
| 504 | $this->charCodeString( 'UserComment' ); |
| 505 | $this->charCodeString( 'GPSProcessingMethod' ); |
| 506 | $this->charCodeString( 'GPSAreaInformation' ); |
| 507 | |
| 508 | // ComponentsConfiguration should really be an array instead of a string... |
| 509 | // This turns a string of binary numbers into an array of numbers. |
| 510 | |
| 511 | if ( isset( $this->mFilteredExifData['ComponentsConfiguration'] ) ) { |
| 512 | $val = $this->mFilteredExifData['ComponentsConfiguration']; |
| 513 | $ccVals = []; |
| 514 | |
| 515 | $strLen = strlen( $val ); |
| 516 | for ( $i = 0; $i < $strLen; $i++ ) { |
| 517 | $ccVals[$i] = ord( substr( $val, $i, 1 ) ); |
| 518 | } |
| 519 | // this is for formatting later. |
| 520 | $ccVals['_type'] = 'ol'; |
| 521 | $this->mFilteredExifData['ComponentsConfiguration'] = $ccVals; |
| 522 | } |
| 523 | |
| 524 | // GPSVersion(ID) is treated as the wrong type by php exif support. |
| 525 | // Go through each byte turning it into a version string. |
| 526 | // For example: "\x02\x02\x00\x00" -> "2.2.0.0" |
| 527 | |
| 528 | // Also change exif tag name from GPSVersion (what php exif thinks it is) |
| 529 | // to GPSVersionID (what the exif standard thinks it is). |
| 530 | |
| 531 | if ( isset( $this->mFilteredExifData['GPSVersion'] ) ) { |
| 532 | $val = $this->mFilteredExifData['GPSVersion']; |
| 533 | $newVal = ''; |
| 534 | |
| 535 | $strLen = strlen( $val ); |
| 536 | for ( $i = 0; $i < $strLen; $i++ ) { |
| 537 | if ( $i !== 0 ) { |
| 538 | $newVal .= '.'; |
| 539 | } |
| 540 | $newVal .= ord( substr( $val, $i, 1 ) ); |
| 541 | } |
| 542 | |
| 543 | if ( $this->byteOrder === 'LE' ) { |
| 544 | // Need to reverse the string |
| 545 | $newVal2 = ''; |
| 546 | for ( $i = strlen( $newVal ) - 1; $i >= 0; $i-- ) { |
| 547 | $newVal2 .= substr( $newVal, $i, 1 ); |
| 548 | } |
| 549 | $this->mFilteredExifData['GPSVersionID'] = $newVal2; |
| 550 | } else { |
| 551 | $this->mFilteredExifData['GPSVersionID'] = $newVal; |
| 552 | } |
| 553 | unset( $this->mFilteredExifData['GPSVersion'] ); |
| 554 | } |
| 555 | } |
| 556 | |
| 557 | /** |
| 558 | * Do userComment tags and similar. See pg. 34 of exif standard. |
| 559 | * basically first 8 bytes is charset, rest is value. |
| 560 | * This has not been tested on any shift-JIS strings. |
| 561 | * @param string $prop Prop name |
| 562 | */ |
| 563 | private function charCodeString( $prop ) { |
| 564 | if ( isset( $this->mFilteredExifData[$prop] ) ) { |
| 565 | if ( strlen( $this->mFilteredExifData[$prop] ) <= 8 ) { |
| 566 | // invalid. Must be at least 9 bytes long. |
| 567 | |
| 568 | $this->debug( $this->mFilteredExifData[$prop], __FUNCTION__, false ); |
| 569 | unset( $this->mFilteredExifData[$prop] ); |
| 570 | |
| 571 | return; |
| 572 | } |
| 573 | $charCode = substr( $this->mFilteredExifData[$prop], 0, 8 ); |
| 574 | $val = substr( $this->mFilteredExifData[$prop], 8 ); |
| 575 | |
| 576 | $charset = match ( $charCode ) { |
| 577 | "JIS\x00\x00\x00\x00\x00" => 'Shift-JIS', |
| 578 | "UNICODE\x00" => 'UTF-16' . $this->byteOrder, |
| 579 | default => null |
| 580 | }; |
| 581 | if ( $charset ) { |
| 582 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
| 583 | $val = @iconv( $charset, 'UTF-8//IGNORE', $val ); |
| 584 | } else { |
| 585 | // if valid utf-8, assume that, otherwise assume windows-1252 |
| 586 | $valCopy = $val; |
| 587 | \UtfNormal\Validator::quickIsNFCVerify( $valCopy ); |
| 588 | if ( $valCopy !== $val ) { |
| 589 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
| 590 | $val = @iconv( 'Windows-1252', 'UTF-8//IGNORE', $val ); |
| 591 | } |
| 592 | } |
| 593 | |
| 594 | // trim and check to make sure not only whitespace. |
| 595 | $val = trim( $val ); |
| 596 | if ( $val === '' ) { |
| 597 | // only whitespace. |
| 598 | $this->debug( $this->mFilteredExifData[$prop], __FUNCTION__, "$prop: Is only whitespace" ); |
| 599 | unset( $this->mFilteredExifData[$prop] ); |
| 600 | |
| 601 | return; |
| 602 | } |
| 603 | |
| 604 | // all's good. |
| 605 | $this->mFilteredExifData[$prop] = $val; |
| 606 | } |
| 607 | } |
| 608 | |
| 609 | /** |
| 610 | * Convert an Exif::UNDEFINED from a raw binary string |
| 611 | * to its value. This is sometimes needed depending on |
| 612 | * the type of UNDEFINED field |
| 613 | * @param string $prop Name of property |
| 614 | */ |
| 615 | private function exifPropToOrd( $prop ) { |
| 616 | if ( isset( $this->mFilteredExifData[$prop] ) ) { |
| 617 | $this->mFilteredExifData[$prop] = ord( $this->mFilteredExifData[$prop] ); |
| 618 | } |
| 619 | } |
| 620 | |
| 621 | /** |
| 622 | * Convert gps in exif form to a single floating point number |
| 623 | * for example 10 degrees 20`40`` S -> -10.34444 |
| 624 | * @param string $prop A GPS coordinate exif tag name (like GPSLongitude) |
| 625 | */ |
| 626 | private function exifGPStoNumber( $prop ) { |
| 627 | $loc = $this->mFilteredExifData[$prop] ?? null; |
| 628 | $dir = $this->mFilteredExifData[$prop . 'Ref'] ?? null; |
| 629 | $res = false; |
| 630 | |
| 631 | if ( $loc !== null && in_array( $dir, [ 'N', 'S', 'E', 'W' ] ) ) { |
| 632 | if ( is_array( $loc ) && count( $loc ) === 3 ) { |
| 633 | [ $num, $denom ] = explode( '/', $loc[0], 2 ); |
| 634 | $res = (int)$num / (int)$denom; |
| 635 | [ $num, $denom ] = explode( '/', $loc[1], 2 ); |
| 636 | $res += ( (int)$num / (int)$denom ) * ( 1 / 60 ); |
| 637 | [ $num, $denom ] = explode( '/', $loc[2], 2 ); |
| 638 | $res += ( (int)$num / (int)$denom ) * ( 1 / 3600 ); |
| 639 | } elseif ( is_string( $loc ) ) { |
| 640 | // This is non-standard, but occurs in the wild (T386208) |
| 641 | [ $num, $denom ] = explode( '/', $loc, 2 ); |
| 642 | $res = (int)$num / (int)$denom; |
| 643 | } |
| 644 | |
| 645 | if ( $res && ( $dir === 'S' || $dir === 'W' ) ) { |
| 646 | // make negative |
| 647 | $res *= -1; |
| 648 | } |
| 649 | } |
| 650 | |
| 651 | // update the exif records. |
| 652 | |
| 653 | // using !== as $res could potentially be 0 |
| 654 | if ( $res !== false ) { |
| 655 | $this->mFilteredExifData[$prop] = $res; |
| 656 | } else { |
| 657 | // if invalid |
| 658 | unset( $this->mFilteredExifData[$prop] ); |
| 659 | } |
| 660 | unset( $this->mFilteredExifData[$prop . 'Ref'] ); |
| 661 | } |
| 662 | |
| 663 | /** |
| 664 | * Get $this->mRawExifData |
| 665 | * @return array |
| 666 | */ |
| 667 | public function getData() { |
| 668 | return $this->mRawExifData; |
| 669 | } |
| 670 | |
| 671 | /** |
| 672 | * Get $this->mFilteredExifData |
| 673 | * @return array |
| 674 | */ |
| 675 | public function getFilteredData() { |
| 676 | return $this->mFilteredExifData; |
| 677 | } |
| 678 | |
| 679 | /** |
| 680 | * The version of the output format |
| 681 | * |
| 682 | * Before the actual metadata information is saved in the database we |
| 683 | * strip some of it since we don't want to save things like thumbnails |
| 684 | * which usually accompany Exif data. This value gets saved in the |
| 685 | * database along with the actual Exif data, and if the version in the |
| 686 | * database doesn't equal the value returned by this function the Exif |
| 687 | * data is regenerated. |
| 688 | * |
| 689 | * @return int |
| 690 | */ |
| 691 | public static function version() { |
| 692 | return 2; |
| 693 | } |
| 694 | |
| 695 | /** |
| 696 | * Validates if a tag value is of the type it should be according to the Exif spec |
| 697 | * |
| 698 | * @param mixed $in The input value to check |
| 699 | * @return bool |
| 700 | */ |
| 701 | private function isByte( $in ) { |
| 702 | if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 255 ) { |
| 703 | $this->debug( $in, __FUNCTION__, true ); |
| 704 | |
| 705 | return true; |
| 706 | } |
| 707 | |
| 708 | $this->debug( $in, __FUNCTION__, false ); |
| 709 | |
| 710 | return false; |
| 711 | } |
| 712 | |
| 713 | /** |
| 714 | * @param mixed $in The input value to check |
| 715 | * @return bool |
| 716 | */ |
| 717 | private function isASCII( $in ) { |
| 718 | if ( is_array( $in ) ) { |
| 719 | return false; |
| 720 | } |
| 721 | |
| 722 | if ( preg_match( "/[^\x0a\x20-\x7e]/", $in ) ) { |
| 723 | $this->debug( $in, __FUNCTION__, 'found a character that is not allowed' ); |
| 724 | |
| 725 | return false; |
| 726 | } |
| 727 | |
| 728 | if ( preg_match( '/^\s*$/', $in ) ) { |
| 729 | $this->debug( $in, __FUNCTION__, 'input consisted solely of whitespace' ); |
| 730 | |
| 731 | return false; |
| 732 | } |
| 733 | |
| 734 | return true; |
| 735 | } |
| 736 | |
| 737 | /** |
| 738 | * @param mixed $in The input value to check |
| 739 | * @return bool |
| 740 | */ |
| 741 | private function isShort( $in ) { |
| 742 | if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 65536 ) { |
| 743 | $this->debug( $in, __FUNCTION__, true ); |
| 744 | |
| 745 | return true; |
| 746 | } |
| 747 | |
| 748 | $this->debug( $in, __FUNCTION__, false ); |
| 749 | |
| 750 | return false; |
| 751 | } |
| 752 | |
| 753 | /** |
| 754 | * @param mixed $in The input value to check |
| 755 | * @return bool |
| 756 | */ |
| 757 | private function isLong( $in ) { |
| 758 | if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 4_294_967_296 ) { |
| 759 | $this->debug( $in, __FUNCTION__, true ); |
| 760 | |
| 761 | return true; |
| 762 | } |
| 763 | |
| 764 | $this->debug( $in, __FUNCTION__, false ); |
| 765 | |
| 766 | return false; |
| 767 | } |
| 768 | |
| 769 | /** |
| 770 | * @param mixed $in The input value to check |
| 771 | * @return bool |
| 772 | */ |
| 773 | private function isRational( $in ) { |
| 774 | $m = []; |
| 775 | |
| 776 | # Avoid division by zero |
| 777 | if ( !is_array( $in ) |
| 778 | && preg_match( '/^(\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m ) |
| 779 | ) { |
| 780 | return $this->isLong( $m[1] ) && $this->isLong( $m[2] ); |
| 781 | } |
| 782 | |
| 783 | $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' ); |
| 784 | |
| 785 | return false; |
| 786 | } |
| 787 | |
| 788 | /** |
| 789 | * @param mixed $in The input value to check |
| 790 | * @return bool |
| 791 | */ |
| 792 | private function isUndefined( $in ) { |
| 793 | $this->debug( $in, __FUNCTION__, true ); |
| 794 | |
| 795 | return true; |
| 796 | } |
| 797 | |
| 798 | /** |
| 799 | * @param mixed $in The input value to check |
| 800 | * @return bool |
| 801 | */ |
| 802 | private function isSlong( $in ) { |
| 803 | if ( $this->isLong( abs( (float)$in ) ) ) { |
| 804 | $this->debug( $in, __FUNCTION__, true ); |
| 805 | |
| 806 | return true; |
| 807 | } |
| 808 | |
| 809 | $this->debug( $in, __FUNCTION__, false ); |
| 810 | |
| 811 | return false; |
| 812 | } |
| 813 | |
| 814 | /** |
| 815 | * @param mixed $in The input value to check |
| 816 | * @return bool |
| 817 | */ |
| 818 | private function isSrational( $in ) { |
| 819 | $m = []; |
| 820 | |
| 821 | # Avoid division by zero |
| 822 | if ( !is_array( $in ) && |
| 823 | preg_match( '/^(-?\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m ) |
| 824 | ) { |
| 825 | return $this->isSlong( $m[0] ) && $this->isSlong( $m[1] ); |
| 826 | } |
| 827 | |
| 828 | $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' ); |
| 829 | |
| 830 | return false; |
| 831 | } |
| 832 | |
| 833 | /** |
| 834 | * Validates if a tag has a legal value according to the Exif spec |
| 835 | * |
| 836 | * @param string $section Section where tag is located. |
| 837 | * @param string $tag The tag to check. |
| 838 | * @param mixed $val The value of the tag. |
| 839 | * @param bool $recursive True if called recursively for array types. |
| 840 | * @return bool |
| 841 | */ |
| 842 | private function validate( $section, $tag, $val, $recursive = false ): bool { |
| 843 | $debug = "tag is '$tag'"; |
| 844 | $etype = $this->mExifTags[$section][$tag]; |
| 845 | $ecount = 1; |
| 846 | if ( is_array( $etype ) ) { |
| 847 | [ $etype, $ecount ] = $etype; |
| 848 | if ( $recursive ) { |
| 849 | // checking individual elements |
| 850 | $ecount = 1; |
| 851 | } |
| 852 | } |
| 853 | |
| 854 | $count = 1; |
| 855 | if ( is_array( $val ) ) { |
| 856 | $count = count( $val ); |
| 857 | if ( $ecount !== $count ) { |
| 858 | $this->debug( $val, __FUNCTION__, "Expected $ecount elements for $tag but got $count" ); |
| 859 | return false; |
| 860 | } |
| 861 | } |
| 862 | // If there are multiple values, recursively validate each of them. |
| 863 | if ( $count > 1 ) { |
| 864 | foreach ( $val as $v ) { |
| 865 | if ( !$this->validate( $section, $tag, $v, true ) ) { |
| 866 | return false; |
| 867 | } |
| 868 | } |
| 869 | |
| 870 | return true; |
| 871 | } |
| 872 | |
| 873 | // NULL values are considered valid. T315202. |
| 874 | if ( $val === null ) { |
| 875 | return true; |
| 876 | } |
| 877 | |
| 878 | // Does not work if not typecast |
| 879 | switch ( (string)$etype ) { |
| 880 | case (string)self::BYTE: |
| 881 | $this->debug( $val, __FUNCTION__, $debug ); |
| 882 | |
| 883 | return $this->isByte( $val ); |
| 884 | case (string)self::ASCII: |
| 885 | $this->debug( $val, __FUNCTION__, $debug ); |
| 886 | |
| 887 | return $this->isASCII( $val ); |
| 888 | case (string)self::SHORT: |
| 889 | $this->debug( $val, __FUNCTION__, $debug ); |
| 890 | |
| 891 | return $this->isShort( $val ); |
| 892 | case (string)self::LONG: |
| 893 | $this->debug( $val, __FUNCTION__, $debug ); |
| 894 | |
| 895 | return $this->isLong( $val ); |
| 896 | case (string)self::RATIONAL: |
| 897 | $this->debug( $val, __FUNCTION__, $debug ); |
| 898 | |
| 899 | return $this->isRational( $val ); |
| 900 | case (string)self::SHORT_OR_LONG: |
| 901 | $this->debug( $val, __FUNCTION__, $debug ); |
| 902 | |
| 903 | return $this->isShort( $val ) || $this->isLong( $val ); |
| 904 | case (string)self::UNDEFINED: |
| 905 | $this->debug( $val, __FUNCTION__, $debug ); |
| 906 | |
| 907 | return $this->isUndefined( $val ); |
| 908 | case (string)self::SLONG: |
| 909 | $this->debug( $val, __FUNCTION__, $debug ); |
| 910 | |
| 911 | return $this->isSlong( $val ); |
| 912 | case (string)self::SRATIONAL: |
| 913 | $this->debug( $val, __FUNCTION__, $debug ); |
| 914 | |
| 915 | return $this->isSrational( $val ); |
| 916 | case (string)self::IGNORE: |
| 917 | $this->debug( $val, __FUNCTION__, $debug ); |
| 918 | |
| 919 | return false; |
| 920 | default: |
| 921 | $this->debug( $val, __FUNCTION__, "The tag '$tag' is unknown" ); |
| 922 | |
| 923 | return false; |
| 924 | } |
| 925 | } |
| 926 | |
| 927 | /** |
| 928 | * Convenience function for debugging output |
| 929 | * |
| 930 | * @param mixed $in Arrays will be processed with print_r(). |
| 931 | * @param string $fname Function name to log. |
| 932 | * @param string|bool|null $action Default null. |
| 933 | */ |
| 934 | private function debug( $in, $fname, $action = null ) { |
| 935 | if ( !$this->log ) { |
| 936 | return; |
| 937 | } |
| 938 | $type = get_debug_type( $in ); |
| 939 | $class = ucfirst( __CLASS__ ); |
| 940 | if ( is_array( $in ) ) { |
| 941 | $in = print_r( $in, true ); |
| 942 | } |
| 943 | |
| 944 | if ( $action === true ) { |
| 945 | wfDebugLog( $this->log, "$class::$fname: accepted: '$in' (type: $type)" ); |
| 946 | } elseif ( $action === false ) { |
| 947 | wfDebugLog( $this->log, "$class::$fname: rejected: '$in' (type: $type)" ); |
| 948 | } elseif ( $action === null ) { |
| 949 | wfDebugLog( $this->log, "$class::$fname: input was: '$in' (type: $type)" ); |
| 950 | } else { |
| 951 | wfDebugLog( $this->log, "$class::$fname: $action (type: $type; content: '$in')" ); |
| 952 | } |
| 953 | } |
| 954 | |
| 955 | /** |
| 956 | * Convenience function for debugging output |
| 957 | * |
| 958 | * @param string $fname The name of the function calling this function |
| 959 | * @param bool $io Specify whether we're beginning or ending |
| 960 | */ |
| 961 | private function debugFile( $fname, $io ) { |
| 962 | if ( !$this->log ) { |
| 963 | return; |
| 964 | } |
| 965 | $class = ucfirst( __CLASS__ ); |
| 966 | if ( $io ) { |
| 967 | wfDebugLog( $this->log, "$class::$fname: begin processing: '{$this->basename}'" ); |
| 968 | } else { |
| 969 | wfDebugLog( $this->log, "$class::$fname: end processing: '{$this->basename}'" ); |
| 970 | } |
| 971 | } |
| 972 | } |
| 973 | |
| 974 | /** @deprecated class alias since 1.46 */ |
| 975 | class_alias( Exif::class, 'Exif' ); |