Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 420 |
|
0.00% |
0 / 14 |
CRAP | |
0.00% |
0 / 1 |
| AddMediaInfo | |
0.00% |
0 / 420 |
|
0.00% |
0 / 14 |
18632 | |
0.00% |
0 / 1 |
| handleSize | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
420 | |||
| parseTimeString | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
56 | |||
| parseFrag | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
56 | |||
| addSources | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
56 | |||
| addTracks | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 | |||
| getPath | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
| handleAudio | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
30 | |||
| handleVideo | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
42 | |||
| handleImage | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
42 | |||
| makeErr | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| handleErrors | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
| copyOverAttribute | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| replaceAnchor | |
0.00% |
0 / 44 |
|
0.00% |
0 / 1 |
210 | |||
| run | |
0.00% |
0 / 187 |
|
0.00% |
0 / 1 |
2652 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Processors; |
| 5 | |
| 6 | use Wikimedia\Assert\Assert; |
| 7 | use Wikimedia\Parsoid\Config\Env; |
| 8 | use Wikimedia\Parsoid\Core\ContentMetadataCollectorStringSets as CMCSS; |
| 9 | use Wikimedia\Parsoid\Core\Sanitizer; |
| 10 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
| 11 | use Wikimedia\Parsoid\DOM\Element; |
| 12 | use Wikimedia\Parsoid\DOM\Node; |
| 13 | use Wikimedia\Parsoid\Html2Wt\WTSUtils; |
| 14 | use Wikimedia\Parsoid\NodeData\DataMw; |
| 15 | use Wikimedia\Parsoid\NodeData\DataMwError; |
| 16 | use Wikimedia\Parsoid\Utils\DOMCompat; |
| 17 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
| 18 | use Wikimedia\Parsoid\Utils\DOMUtils; |
| 19 | use Wikimedia\Parsoid\Utils\Title; |
| 20 | use Wikimedia\Parsoid\Utils\WTUtils; |
| 21 | use Wikimedia\Parsoid\Wikitext\Consts; |
| 22 | use Wikimedia\Parsoid\Wt2Html\PegTokenizer; |
| 23 | use Wikimedia\Parsoid\Wt2Html\Wt2HtmlDOMProcessor; |
| 24 | |
| 25 | class AddMediaInfo implements Wt2HtmlDOMProcessor { |
| 26 | /** |
| 27 | * Extract the dimensions for media. |
| 28 | * |
| 29 | * @param Env $env |
| 30 | * @param array $attrs |
| 31 | * @param array $info |
| 32 | * @phan-param array{size:array{height?:int,width?:int},format:string} $attrs |
| 33 | * @return array |
| 34 | */ |
| 35 | private static function handleSize( Env $env, array $attrs, array $info ): array { |
| 36 | $height = $info['height']; |
| 37 | $width = $info['width']; |
| 38 | |
| 39 | Assert::invariant( |
| 40 | is_numeric( $height ) && $height !== NAN, |
| 41 | 'Expected $height as a valid number' |
| 42 | ); |
| 43 | Assert::invariant( |
| 44 | is_numeric( $width ) && $width !== NAN, |
| 45 | 'Expected $width as a valid number' |
| 46 | ); |
| 47 | |
| 48 | if ( !empty( $info['thumburl'] ) && !empty( $info['thumbheight'] ) ) { |
| 49 | $height = $info['thumbheight']; |
| 50 | } |
| 51 | |
| 52 | if ( !empty( $info['thumburl'] ) && !empty( $info['thumbwidth'] ) ) { |
| 53 | $width = $info['thumbwidth']; |
| 54 | } |
| 55 | |
| 56 | // Audio files don't have dimensions, so we fallback to these arbitrary |
| 57 | // defaults, and the "mw-default-audio-height" class is added. |
| 58 | if ( $info['mediatype'] === 'AUDIO' ) { |
| 59 | // FIXME: TMH uses 23 but VE wants 32 |
| 60 | $height = /* height || */32; // Arguably, audio should respect a defined height |
| 61 | $width = max( 35, $width ?: $env->getSiteConfig()->widthOption() ); |
| 62 | } |
| 63 | |
| 64 | // Handle client-side upscaling (including 'border') |
| 65 | |
| 66 | $mustRender = $info['mustRender'] ?? $info['mediatype'] !== 'BITMAP'; |
| 67 | |
| 68 | // Calculate the scaling ratio from the user-specified width and height |
| 69 | $ratio = null; |
| 70 | if ( !empty( $attrs['dims']['height'] ) && !empty( $info['height'] ) ) { |
| 71 | $ratio = $attrs['dims']['height'] / $info['height']; |
| 72 | } |
| 73 | if ( !empty( $attrs['dims']['width'] ) && !empty( $info['width'] ) ) { |
| 74 | $r = $attrs['dims']['width'] / $info['width']; |
| 75 | $ratio = ( $ratio === null || $r < $ratio ) ? $r : $ratio; |
| 76 | } |
| 77 | |
| 78 | // If the user requested upscaling, then this is denied in the thumbnail |
| 79 | // and frameless format, except for files with mustRender. |
| 80 | if ( |
| 81 | $ratio !== null && $ratio > 1 && !$mustRender && |
| 82 | ( $attrs['format'] === 'Thumb' || $attrs['format'] === 'Frameless' ) |
| 83 | ) { |
| 84 | // Upscaling denied |
| 85 | $height = $info['height']; |
| 86 | $width = $info['width']; |
| 87 | } |
| 88 | |
| 89 | return [ 'height' => $height, 'width' => $width ]; |
| 90 | } |
| 91 | |
| 92 | /** |
| 93 | * This is a port of TMH's parseTimeString() |
| 94 | * |
| 95 | * @param string $timeString |
| 96 | * @param int|float|null $length |
| 97 | * @return int|float|null |
| 98 | */ |
| 99 | private static function parseTimeString( |
| 100 | string $timeString, $length = null |
| 101 | ) { |
| 102 | $parts = explode( ':', $timeString ); |
| 103 | $time = 0; |
| 104 | $countParts = count( $parts ); |
| 105 | if ( $countParts > 3 ) { |
| 106 | return null; |
| 107 | } |
| 108 | for ( $i = 0; $i < $countParts; $i++ ) { |
| 109 | if ( !is_numeric( $parts[$i] ) ) { |
| 110 | return null; |
| 111 | } |
| 112 | $time += floatval( $parts[$i] ) * pow( 60, $countParts - 1 - $i ); |
| 113 | } |
| 114 | if ( $time < 0 ) { |
| 115 | $time = 0; |
| 116 | } elseif ( $length !== null ) { |
| 117 | if ( $time > $length ) { |
| 118 | $time = $length - 1; |
| 119 | } |
| 120 | } |
| 121 | return $time; |
| 122 | } |
| 123 | |
| 124 | /** |
| 125 | * Handle media fragments |
| 126 | * https://www.w3.org/TR/media-frags/ |
| 127 | * |
| 128 | * @param array $info |
| 129 | * @param DataMw $dataMw |
| 130 | * @return string |
| 131 | */ |
| 132 | private static function parseFrag( array $info, DataMw $dataMw ): string { |
| 133 | $frag = ''; |
| 134 | $starttime = WTSUtils::getAttrFromDataMw( $dataMw, 'starttime', true ); |
| 135 | $endtime = WTSUtils::getAttrFromDataMw( $dataMw, 'endtime', true ); |
| 136 | if ( $starttime || $endtime ) { |
| 137 | $frag .= '#t='; |
| 138 | if ( $starttime ) { |
| 139 | $time = self::parseTimeString( $starttime->value['txt'], $info['duration'] ?? null ); |
| 140 | if ( $time !== null ) { |
| 141 | $frag .= $time; |
| 142 | } |
| 143 | } |
| 144 | if ( $endtime ) { |
| 145 | $time = self::parseTimeString( $endtime->value['txt'], $info['duration'] ?? null ); |
| 146 | if ( $time !== null ) { |
| 147 | $frag .= ',' . $time; |
| 148 | } |
| 149 | } |
| 150 | } |
| 151 | return $frag; |
| 152 | } |
| 153 | |
| 154 | private static function addSources( |
| 155 | Element $elt, array $info, DataMw $dataMw, bool $hasDimension |
| 156 | ): void { |
| 157 | $doc = $elt->ownerDocument; |
| 158 | $frag = self::parseFrag( $info, $dataMw ); |
| 159 | |
| 160 | if ( is_array( $info['thumbdata']['derivatives'] ?? null ) ) { |
| 161 | // BatchAPI's `getAPIData` |
| 162 | $derivatives = $info['thumbdata']['derivatives']; |
| 163 | } elseif ( is_array( $info['derivatives'] ?? null ) ) { |
| 164 | // "videoinfo" prop |
| 165 | $derivatives = $info['derivatives']; |
| 166 | } else { |
| 167 | $derivatives = [ |
| 168 | [ |
| 169 | 'src' => $info['url'], |
| 170 | 'type' => $info['mime'], |
| 171 | 'width' => (string)$info['width'], |
| 172 | 'height' => (string)$info['height'], |
| 173 | ], |
| 174 | ]; |
| 175 | } |
| 176 | |
| 177 | foreach ( $derivatives as $o ) { |
| 178 | $source = $doc->createElement( 'source' ); |
| 179 | $source->setAttribute( 'src', $o['src'] . $frag ); |
| 180 | $source->setAttribute( 'type', $o['type'] ); // T339375 |
| 181 | $fromFile = isset( $o['transcodekey'] ) ? '' : '-file'; |
| 182 | if ( $hasDimension ) { |
| 183 | $source->setAttribute( 'data' . $fromFile . '-width', (string)$o['width'] ); |
| 184 | $source->setAttribute( 'data' . $fromFile . '-height', (string)$o['height'] ); |
| 185 | } |
| 186 | if ( !$fromFile ) { |
| 187 | $source->setAttribute( 'data-transcodekey', $o['transcodekey'] ); |
| 188 | } |
| 189 | $elt->appendChild( $source ); |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | private static function addTracks( Element $elt, array $info ): void { |
| 194 | $doc = $elt->ownerDocument; |
| 195 | if ( is_array( $info['thumbdata']['timedtext'] ?? null ) ) { |
| 196 | // BatchAPI's `getAPIData` |
| 197 | $timedtext = $info['thumbdata']['timedtext']; |
| 198 | } elseif ( is_array( $info['timedtext'] ?? null ) ) { |
| 199 | // "videoinfo" prop |
| 200 | $timedtext = $info['timedtext']; |
| 201 | } else { |
| 202 | $timedtext = []; |
| 203 | } |
| 204 | foreach ( $timedtext as $o ) { |
| 205 | $track = $doc->createElement( 'track' ); |
| 206 | $track->setAttribute( 'kind', $o['kind'] ?? '' ); |
| 207 | $track->setAttribute( 'type', $o['type'] ?? '' ); |
| 208 | $track->setAttribute( 'src', $o['src'] ?? '' ); |
| 209 | $track->setAttribute( 'srclang', $o['srclang'] ?? '' ); |
| 210 | $track->setAttribute( 'label', $o['label'] ?? '' ); |
| 211 | $track->setAttribute( 'data-mwtitle', $o['title'] ?? '' ); |
| 212 | $track->setAttribute( 'data-dir', $o['dir'] ?? '' ); |
| 213 | $elt->appendChild( $track ); |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | /** |
| 218 | * Abstract way to get the path for an image given an info object. |
| 219 | * |
| 220 | * @param array $info |
| 221 | * @return string |
| 222 | */ |
| 223 | private static function getPath( array $info ) { |
| 224 | $path = ''; |
| 225 | if ( !empty( $info['thumburl'] ) ) { |
| 226 | $path = $info['thumburl']; |
| 227 | } elseif ( !empty( $info['url'] ) ) { |
| 228 | $path = $info['url']; |
| 229 | } |
| 230 | return $path; |
| 231 | } |
| 232 | |
| 233 | /** |
| 234 | * @param Env $env |
| 235 | * @param Element $span |
| 236 | * @param array $attrs |
| 237 | * @param array $info |
| 238 | * @param DataMw $dataMw |
| 239 | * @param Element $container |
| 240 | * @param string|null $alt Unused, but matches the signature of handlers |
| 241 | * @return Element |
| 242 | */ |
| 243 | private static function handleAudio( |
| 244 | Env $env, Element $span, array $attrs, array $info, DataMw $dataMw, |
| 245 | Element $container, ?string $alt |
| 246 | ): Element { |
| 247 | $doc = $span->ownerDocument; |
| 248 | $audio = $doc->createElement( 'audio' ); |
| 249 | |
| 250 | $audio->setAttribute( 'controls', '' ); |
| 251 | $audio->setAttribute( 'preload', 'none' ); |
| 252 | |
| 253 | $muted = WTSUtils::getAttrFromDataMw( $dataMw, 'muted', false ); |
| 254 | if ( $muted ) { |
| 255 | $audio->setAttribute( 'muted', '' ); |
| 256 | } |
| 257 | $loop = WTSUtils::getAttrFromDataMw( $dataMw, 'loop', false ); |
| 258 | if ( $loop ) { |
| 259 | $audio->setAttribute( 'loop', '' ); |
| 260 | } |
| 261 | |
| 262 | // HACK(T295514): Until T313875 is implemented |
| 263 | $audio->setAttribute( 'data-mw-tmh', '' ); |
| 264 | |
| 265 | $size = self::handleSize( $env, $attrs, $info ); |
| 266 | DOMDataUtils::addNormalizedAttribute( $audio, 'height', (string)$size['height'], null, true ); |
| 267 | DOMDataUtils::addNormalizedAttribute( $audio, 'width', (string)$size['width'], null, true ); |
| 268 | $audio->setAttribute( 'style', "width: {$size['width']}px;" ); |
| 269 | |
| 270 | // Hardcoded until defined heights are respected. |
| 271 | // See `AddMediaInfo::handleSize` |
| 272 | DOMCompat::getClassList( $container )->add( 'mw-default-audio-height' ); |
| 273 | |
| 274 | self::copyOverAttribute( $audio, $span, 'resource' ); |
| 275 | |
| 276 | if ( $span->hasAttribute( 'lang' ) ) { |
| 277 | self::copyOverAttribute( $audio, $span, 'lang' ); |
| 278 | } |
| 279 | |
| 280 | if ( $info['duration'] ?? null ) { |
| 281 | $audio->setAttribute( 'data-durationhint', (string)ceil( (float)$info['duration'] ) ); |
| 282 | } |
| 283 | |
| 284 | self::addSources( $audio, $info, $dataMw, false ); |
| 285 | self::addTracks( $audio, $info ); |
| 286 | |
| 287 | return $audio; |
| 288 | } |
| 289 | |
| 290 | /** |
| 291 | * @param Env $env |
| 292 | * @param Element $span |
| 293 | * @param array $attrs |
| 294 | * @param array $info |
| 295 | * @param DataMw $dataMw |
| 296 | * @param Element $container |
| 297 | * @param string|null $alt Unused, but matches the signature of handlers |
| 298 | * @return Element |
| 299 | */ |
| 300 | private static function handleVideo( |
| 301 | Env $env, Element $span, array $attrs, array $info, DataMw $dataMw, |
| 302 | Element $container, ?string $alt |
| 303 | ): Element { |
| 304 | $doc = $span->ownerDocument; |
| 305 | $video = $doc->createElement( 'video' ); |
| 306 | |
| 307 | if ( !empty( $info['thumburl'] ) ) { |
| 308 | $video->setAttribute( 'poster', self::getPath( $info ) ); |
| 309 | } |
| 310 | |
| 311 | $video->setAttribute( 'controls', '' ); |
| 312 | $video->setAttribute( 'preload', 'none' ); |
| 313 | |
| 314 | $muted = WTSUtils::getAttrFromDataMw( $dataMw, 'muted', false ); |
| 315 | if ( $muted ) { |
| 316 | $video->setAttribute( 'muted', '' ); |
| 317 | } |
| 318 | $loop = WTSUtils::getAttrFromDataMw( $dataMw, 'loop', false ); |
| 319 | if ( $loop ) { |
| 320 | $video->setAttribute( 'loop', '' ); |
| 321 | } |
| 322 | |
| 323 | // HACK(T295514): Until T313875 is implemented |
| 324 | $video->setAttribute( 'data-mw-tmh', '' ); |
| 325 | |
| 326 | $size = self::handleSize( $env, $attrs, $info ); |
| 327 | DOMDataUtils::addNormalizedAttribute( $video, 'height', (string)$size['height'], null, true ); |
| 328 | DOMDataUtils::addNormalizedAttribute( $video, 'width', (string)$size['width'], null, true ); |
| 329 | |
| 330 | self::copyOverAttribute( $video, $span, 'resource' ); |
| 331 | |
| 332 | if ( $span->hasAttribute( 'lang' ) ) { |
| 333 | self::copyOverAttribute( $video, $span, 'lang' ); |
| 334 | } |
| 335 | |
| 336 | if ( $info['duration'] ?? null ) { |
| 337 | $video->setAttribute( 'data-durationhint', (string)ceil( (float)$info['duration'] ) ); |
| 338 | } |
| 339 | |
| 340 | self::addSources( $video, $info, $dataMw, true ); |
| 341 | self::addTracks( $video, $info ); |
| 342 | |
| 343 | return $video; |
| 344 | } |
| 345 | |
| 346 | /** |
| 347 | * Set up the actual image structure, attributes, etc. |
| 348 | * |
| 349 | * @param Env $env |
| 350 | * @param Element $span |
| 351 | * @param array $attrs |
| 352 | * @param array $info |
| 353 | * @param DataMw $dataMw |
| 354 | * @param Element $container |
| 355 | * @param string|null $alt |
| 356 | * @return Element |
| 357 | */ |
| 358 | private static function handleImage( |
| 359 | Env $env, Element $span, array $attrs, array $info, DataMw $dataMw, |
| 360 | Element $container, ?string $alt |
| 361 | ): Element { |
| 362 | $doc = $span->ownerDocument; |
| 363 | $img = $doc->createElement( 'img' ); |
| 364 | |
| 365 | if ( $alt !== null ) { |
| 366 | $img->setAttribute( 'alt', $alt ); |
| 367 | } |
| 368 | |
| 369 | self::copyOverAttribute( $img, $span, 'resource' ); |
| 370 | |
| 371 | $img->setAttribute( 'src', self::getPath( $info ) ); |
| 372 | $img->setAttribute( 'decoding', 'async' ); |
| 373 | |
| 374 | if ( $span->hasAttribute( 'lang' ) ) { |
| 375 | self::copyOverAttribute( $img, $span, 'lang' ); |
| 376 | } |
| 377 | |
| 378 | // Add (read-only) information about original file size (T64881) |
| 379 | $img->setAttribute( 'data-file-width', (string)$info['width'] ); |
| 380 | $img->setAttribute( 'data-file-height', (string)$info['height'] ); |
| 381 | $img->setAttribute( 'data-file-type', strtolower( $info['mediatype'] ?? '' ) ); |
| 382 | |
| 383 | $size = self::handleSize( $env, $attrs, $info ); |
| 384 | DOMDataUtils::addNormalizedAttribute( $img, 'height', (string)$size['height'], null, true ); |
| 385 | DOMDataUtils::addNormalizedAttribute( $img, 'width', (string)$size['width'], null, true ); |
| 386 | |
| 387 | // Handle "responsive" images, i.e. srcset |
| 388 | if ( !empty( $info['responsiveUrls'] ) ) { |
| 389 | $candidates = []; |
| 390 | foreach ( $info['responsiveUrls'] as $density => $url ) { |
| 391 | $candidates[] = $url . ' ' . $density . 'x'; |
| 392 | } |
| 393 | if ( $candidates ) { |
| 394 | $img->setAttribute( 'srcset', implode( ', ', $candidates ) ); |
| 395 | } |
| 396 | } |
| 397 | |
| 398 | return $img; |
| 399 | } |
| 400 | |
| 401 | private static function makeErr( |
| 402 | string $key, string $message, ?array $params = null |
| 403 | ): DataMwError { |
| 404 | return new DataMwError( $key, $params ?? [], $message ); |
| 405 | } |
| 406 | |
| 407 | /** |
| 408 | * @param Element $container |
| 409 | * @param Element $span |
| 410 | * @param list<DataMwError> $errs |
| 411 | * @param DataMw $dataMw |
| 412 | * @param ?string $alt |
| 413 | */ |
| 414 | private static function handleErrors( |
| 415 | Element $container, Element $span, array $errs, DataMw $dataMw, |
| 416 | ?string $alt |
| 417 | ): void { |
| 418 | if ( !DOMUtils::hasTypeOf( $container, 'mw:Error' ) ) { |
| 419 | DOMUtils::addTypeOf( $container, 'mw:Error', true ); |
| 420 | } |
| 421 | if ( is_array( $dataMw->errors ?? null ) ) { |
| 422 | $errs = array_merge( $dataMw->errors, $errs ); |
| 423 | } |
| 424 | $dataMw->errors = $errs; |
| 425 | if ( $alt !== null ) { |
| 426 | DOMCompat::replaceChildren( $span, $span->ownerDocument->createTextNode( $alt ) ); |
| 427 | } |
| 428 | } |
| 429 | |
| 430 | private static function copyOverAttribute( |
| 431 | Element $elt, Element $span, string $attribute |
| 432 | ): void { |
| 433 | DOMDataUtils::addNormalizedAttribute( |
| 434 | $elt, |
| 435 | $attribute, |
| 436 | DOMCompat::getAttribute( $span, $attribute ), |
| 437 | WTSUtils::getAttributeShadowInfo( $span, $attribute )['value'] |
| 438 | ); |
| 439 | } |
| 440 | |
| 441 | private static function replaceAnchor( |
| 442 | Env $env, PegTokenizer $urlParser, Element $container, |
| 443 | Element $oldAnchor, array $attrs, DataMw $dataMw, bool $isImage, |
| 444 | ?string $captionText, int $page, string $lang |
| 445 | ): Element { |
| 446 | $doc = $oldAnchor->ownerDocument; |
| 447 | $attr = WTSUtils::getAttrFromDataMw( $dataMw, 'link', true ); |
| 448 | |
| 449 | if ( $isImage ) { |
| 450 | $anchor = $doc->createElement( 'a' ); |
| 451 | $addDescriptionLink = static function ( Title $title ) use ( $env, $anchor, $page, $lang ) { |
| 452 | $href = $env->makeLink( $title ); |
| 453 | $qs = []; |
| 454 | if ( $page > 0 ) { |
| 455 | $qs['page'] = $page; |
| 456 | } |
| 457 | if ( $lang ) { |
| 458 | $qs['lang'] = $lang; |
| 459 | } |
| 460 | if ( $qs ) { |
| 461 | $href .= '?' . http_build_query( $qs ); |
| 462 | } |
| 463 | $anchor->setAttribute( 'href', $href ); |
| 464 | $anchor->setAttribute( 'class', 'mw-file-description' ); |
| 465 | }; |
| 466 | if ( $attr !== null ) { |
| 467 | $discard = true; |
| 468 | $val = $attr->value['txt']; |
| 469 | if ( $val === '' ) { |
| 470 | // No href if link= was specified |
| 471 | $anchor = $doc->createElement( 'span' ); |
| 472 | } elseif ( $urlParser->tokenizeURL( $val ) !== false ) { |
| 473 | // An external link! |
| 474 | $href = Sanitizer::cleanUrl( $env->getSiteConfig(), $val, 'external' ); |
| 475 | $anchor->setAttribute( 'href', $href ); |
| 476 | // Similar to AddLinkAttributes |
| 477 | $extLinkAttribs = $env->getExternalLinkAttribs( $href ); |
| 478 | foreach ( $extLinkAttribs as $key => $val ) { |
| 479 | if ( $key === 'rel' ) { |
| 480 | foreach ( $val as $v ) { |
| 481 | DOMUtils::addRel( $anchor, $v ); |
| 482 | } |
| 483 | } else { |
| 484 | $anchor->setAttribute( $key, $val ); |
| 485 | } |
| 486 | } |
| 487 | } else { |
| 488 | $link = $env->makeTitleFromText( $val, null, true ); |
| 489 | if ( $link !== null ) { |
| 490 | $anchor->setAttribute( 'href', $env->makeLink( $link ) ); |
| 491 | $anchor->setAttribute( 'title', $link->getPrefixedText() ); |
| 492 | } else { |
| 493 | // Treat same as if link weren't present |
| 494 | $addDescriptionLink( $attrs['title'] ); |
| 495 | // but preserve for roundtripping |
| 496 | $discard = false; |
| 497 | } |
| 498 | } |
| 499 | if ( $discard ) { |
| 500 | WTSUtils::getAttrFromDataMw( $dataMw, 'link', /* keep */false ); |
| 501 | } |
| 502 | } else { |
| 503 | $addDescriptionLink( $attrs['title'] ); |
| 504 | } |
| 505 | } else { |
| 506 | $anchor = $doc->createElement( 'span' ); |
| 507 | } |
| 508 | |
| 509 | if ( $captionText ) { |
| 510 | $anchor->setAttribute( 'title', $captionText ); |
| 511 | } |
| 512 | |
| 513 | $oldAnchor->parentNode->replaceChild( $anchor, $oldAnchor ); |
| 514 | return $anchor; |
| 515 | } |
| 516 | |
| 517 | /** |
| 518 | * @inheritDoc |
| 519 | */ |
| 520 | public function run( |
| 521 | Env $env, Node $root, array $options = [], bool $atTopLevel = false |
| 522 | ): void { |
| 523 | '@phan-var Element|DocumentFragment $root'; // @var Element|DocumentFragment $root |
| 524 | $urlParser = new PegTokenizer( $env ); |
| 525 | |
| 526 | $validContainers = []; |
| 527 | $files = []; |
| 528 | |
| 529 | $containers = DOMCompat::querySelectorAll( $root, '[typeof*="mw:File"]' ); |
| 530 | |
| 531 | foreach ( $containers as $container ) { |
| 532 | // DOMFragmentWrappers assume the element name of their outermost |
| 533 | // content so, depending how the above query is written, we're |
| 534 | // protecting against getting a figure of the wrong type. However, |
| 535 | // since we're currently using typeof, it shouldn't be a problem. |
| 536 | // Also note that info for the media nested in the fragment has |
| 537 | // already been added in their respective pipeline. |
| 538 | Assert::invariant( |
| 539 | !WTUtils::isDOMFragmentWrapper( $container ), |
| 540 | 'Media info for fragment was already added' |
| 541 | ); |
| 542 | |
| 543 | // We expect this structure to be predictable based on how it's |
| 544 | // emitted in the TT/WikiLinkHandler but treebuilding may have |
| 545 | // messed that up for us. |
| 546 | $anchor = $container; |
| 547 | $reopenedAFE = []; |
| 548 | do { |
| 549 | // An active formatting element may have been reopened inside |
| 550 | // the wrapper if a content model violation was encountered |
| 551 | // during treebuiling. Try to be a little lenient about that |
| 552 | // instead of bailing out |
| 553 | $anchor = $anchor->firstChild; |
| 554 | $anchorNodeName = DOMCompat::nodeName( $anchor ); |
| 555 | if ( $anchorNodeName !== 'a' ) { |
| 556 | $reopenedAFE[] = $anchor; |
| 557 | } |
| 558 | } while ( |
| 559 | $anchorNodeName !== 'a' && |
| 560 | isset( Consts::$HTML['FormattingTags'][$anchorNodeName] ) |
| 561 | ); |
| 562 | if ( $anchorNodeName !== 'a' ) { |
| 563 | $env->log( 'error', 'Unexpected structure when adding media info.' ); |
| 564 | continue; |
| 565 | } |
| 566 | $span = $anchor->firstChild; |
| 567 | if ( !( $span instanceof Element && DOMCompat::nodeName( $span ) === 'span' ) ) { |
| 568 | $env->log( 'error', 'Unexpected structure when adding media info.' ); |
| 569 | continue; |
| 570 | } |
| 571 | $caption = $anchor->nextSibling; |
| 572 | $isInlineMedia = WTUtils::isInlineMedia( $container ); |
| 573 | if ( !$isInlineMedia && DOMCompat::nodeName( $caption ) !== 'figcaption' ) { |
| 574 | $env->log( 'error', 'Unexpected structure when adding media info.' ); |
| 575 | continue; |
| 576 | } |
| 577 | |
| 578 | // For T314059. Migrate any active formatting tags we found open |
| 579 | // inside the container to the ficaption to conform to the spec. |
| 580 | // This should simplify selectors for clients and styling. |
| 581 | // TODO: Consider exposing these as lints |
| 582 | if ( $reopenedAFE ) { |
| 583 | $firstAFE = $reopenedAFE[0]; |
| 584 | $lastAFE = $reopenedAFE[count( $reopenedAFE ) - 1]; |
| 585 | DOMUtils::migrateChildren( $lastAFE, $container ); |
| 586 | if ( $isInlineMedia ) { |
| 587 | // Remove the formatting elements, they are of no use |
| 588 | // We could migrate them into the caption in data-mw, |
| 589 | // but that doesn't seem worthwhile |
| 590 | $firstAFE->parentNode->removeChild( $firstAFE ); |
| 591 | } else { |
| 592 | // Move the formatting elements into the figcaption |
| 593 | DOMUtils::migrateChildren( $caption, $lastAFE ); |
| 594 | $caption->appendChild( $firstAFE ); |
| 595 | // Unconditionally clear tsr out of an abundance of caution |
| 596 | // These tags should already be annotated as autoinserted anyways |
| 597 | foreach ( $reopenedAFE as $afe ) { |
| 598 | DOMDataUtils::getDataParsoid( $afe )->tsr = null; |
| 599 | } |
| 600 | } |
| 601 | } |
| 602 | |
| 603 | $dataMw = DOMDataUtils::getDataMw( $container ); |
| 604 | |
| 605 | $dims = [ |
| 606 | 'width' => (int)DOMCompat::getAttribute( $span, 'data-width' ) ?: null, |
| 607 | 'height' => (int)DOMCompat::getAttribute( $span, 'data-height' ) ?: null, |
| 608 | ]; |
| 609 | |
| 610 | $page = WTSUtils::getAttrFromDataMw( $dataMw, 'page', true ); |
| 611 | if ( $page ) { |
| 612 | $dims['page'] = $page->value['txt']; |
| 613 | } |
| 614 | |
| 615 | $lang = DOMCompat::getAttribute( $span, 'lang' ); |
| 616 | if ( $lang !== null ) { |
| 617 | $dims['lang'] = $lang; |
| 618 | } |
| 619 | |
| 620 | // "starttime" should be used if "thumbtime" isn't present, |
| 621 | // but only for rendering. |
| 622 | // "starttime" should be used if "thumbtime" isn't present, |
| 623 | // but only for rendering. |
| 624 | $thumbtime = WTSUtils::getAttrFromDataMw( $dataMw, 'thumbtime', true ); |
| 625 | $starttime = WTSUtils::getAttrFromDataMw( $dataMw, 'starttime', true ); |
| 626 | if ( $thumbtime || $starttime ) { |
| 627 | $seek = $thumbtime && $thumbtime->value !== null |
| 628 | ? $thumbtime->value['txt'] |
| 629 | : ( $starttime && $starttime->value !== null ? $starttime->value['txt'] : '' ); |
| 630 | $seek = self::parseTimeString( $seek ); |
| 631 | if ( $seek !== null ) { |
| 632 | $dims['seek'] = $seek; |
| 633 | } |
| 634 | } |
| 635 | |
| 636 | $attrs = [ |
| 637 | 'dims' => $dims, |
| 638 | 'format' => WTUtils::getMediaFormat( $container ), |
| 639 | 'title' => $env->makeTitleFromText( $span->textContent ), |
| 640 | ]; |
| 641 | |
| 642 | $file = [ $attrs['title']->getDBKey(), $dims ]; |
| 643 | $infoKey = md5( json_encode( $file ) ); |
| 644 | $files[$infoKey] = $file; |
| 645 | $errs = []; |
| 646 | |
| 647 | $manualKey = null; |
| 648 | $manualthumb = WTSUtils::getAttrFromDataMw( $dataMw, 'manualthumb', true ); |
| 649 | if ( $manualthumb !== null ) { |
| 650 | $val = $manualthumb->value['txt']; |
| 651 | $title = $env->makeTitleFromText( $val, $attrs['title']->getNamespace(), true ); |
| 652 | if ( $title === null ) { |
| 653 | $errs[] = self::makeErr( |
| 654 | 'apierror-invalidtitle', |
| 655 | 'Invalid thumbnail title.', |
| 656 | [ 'name' => $val ] |
| 657 | ); |
| 658 | } else { |
| 659 | $file = [ $title->getDBkey(), $dims ]; |
| 660 | $manualKey = md5( json_encode( $file ) ); |
| 661 | $files[$manualKey] = $file; |
| 662 | } |
| 663 | } |
| 664 | |
| 665 | $validContainers[] = [ |
| 666 | 'container' => $container, |
| 667 | 'attrs' => $attrs, |
| 668 | // Pass the anchor because we did some work to find it above |
| 669 | 'anchor' => $anchor, |
| 670 | 'infoKey' => $infoKey, |
| 671 | 'manualKey' => $manualKey, |
| 672 | 'errs' => $errs, |
| 673 | ]; |
| 674 | } |
| 675 | |
| 676 | if ( !$validContainers ) { |
| 677 | return; |
| 678 | } |
| 679 | |
| 680 | $start = hrtime( true ); |
| 681 | |
| 682 | $infos = $env->getDataAccess()->getFileInfo( |
| 683 | $env->getPageConfig(), |
| 684 | array_values( $files ) |
| 685 | ); |
| 686 | |
| 687 | if ( $env->profiling() ) { |
| 688 | $profile = $env->getCurrentProfile(); |
| 689 | $profile->bumpMWTime( "Media", hrtime( true ) - $start, "api" ); |
| 690 | $profile->bumpCount( "Media" ); |
| 691 | } |
| 692 | |
| 693 | $files = array_combine( |
| 694 | array_keys( $files ), |
| 695 | $infos |
| 696 | ); |
| 697 | |
| 698 | $hasThumb = false; |
| 699 | $needsTMHModules = false; |
| 700 | |
| 701 | foreach ( $validContainers as $c ) { |
| 702 | $container = $c['container']; |
| 703 | $anchor = $c['anchor']; |
| 704 | $span = $anchor->firstChild; |
| 705 | $attrs = $c['attrs']; |
| 706 | $dataMw = DOMDataUtils::getDataMw( $container ); |
| 707 | $errs = $c['errs']; |
| 708 | |
| 709 | $hasThumb = $hasThumb || DOMUtils::hasTypeOf( $container, 'mw:File/Thumb' ); |
| 710 | |
| 711 | $info = $files[$c['infoKey']]; |
| 712 | if ( !$info ) { |
| 713 | $env->getDataAccess()->addTrackingCategory( |
| 714 | $env->getPageConfig(), |
| 715 | $env->getMetadata(), |
| 716 | 'broken-file-category' |
| 717 | ); |
| 718 | $errs[] = self::makeErr( 'apierror-filedoesnotexist', 'This image does not exist.' ); |
| 719 | } elseif ( isset( $info['thumberror'] ) ) { |
| 720 | $errs[] = self::makeErr( 'apierror-unknownerror', $info['thumberror'] ); |
| 721 | } |
| 722 | |
| 723 | // FIXME: Should we fallback to $info if there are errors with $manualinfo? |
| 724 | // What does the legacy parser do? |
| 725 | if ( $c['manualKey'] !== null ) { |
| 726 | $manualinfo = $files[$c['manualKey']]; |
| 727 | if ( !$manualinfo ) { |
| 728 | $errs[] = self::makeErr( 'apierror-filedoesnotexist', 'This image does not exist.' ); |
| 729 | } elseif ( isset( $manualinfo['thumberror'] ) ) { |
| 730 | $errs[] = self::makeErr( 'apierror-unknownerror', $manualinfo['thumberror'] ); |
| 731 | } else { |
| 732 | $info = $manualinfo; |
| 733 | } |
| 734 | } |
| 735 | |
| 736 | if ( $info['badFile'] ?? false ) { |
| 737 | $errs[] = self::makeErr( 'apierror-badfile', 'This image is on the bad file list.' ); |
| 738 | } |
| 739 | |
| 740 | if ( WTUtils::hasVisibleCaption( $container ) ) { |
| 741 | $captionText = null; |
| 742 | } else { |
| 743 | if ( WTUtils::isInlineMedia( $container ) ) { |
| 744 | $caption = $dataMw->caption ?? $container->ownerDocument->createDocumentFragment(); |
| 745 | } else { |
| 746 | $caption = DOMCompat::querySelector( $container, 'figcaption' ); |
| 747 | // If the caption had tokens, it was placed in a DOMFragment |
| 748 | // and we haven't unpacked yet |
| 749 | if ( |
| 750 | $caption->firstChild && |
| 751 | DOMUtils::hasTypeOf( $caption->firstChild, 'mw:DOMFragment' ) |
| 752 | ) { |
| 753 | $id = DOMDataUtils::getDataParsoid( $caption->firstChild )->html; |
| 754 | $caption = $env->getDOMFragment( $id ); |
| 755 | } |
| 756 | } |
| 757 | $captionText = trim( WTUtils::textContentFromCaption( $caption ) ); |
| 758 | |
| 759 | // The sanitizer isn't going to do anything with a string value |
| 760 | // for alt/title and since we're going to use dom element setters, |
| 761 | // quote escaping should be fine. Note that if sanitization does |
| 762 | // happen here, it should also be done to $altFromCaption so that |
| 763 | // string comparison matches, where necessary. |
| 764 | // |
| 765 | // $sanitizedArgs = Sanitizer::sanitizeTagAttrs( $env->getSiteConfig(), 'img', null, [ |
| 766 | // new KV( 'alt', $captionText ) // Could be a 'title' too |
| 767 | // ] ); |
| 768 | // $captionText = $sanitizedArgs['alt']->key; |
| 769 | } |
| 770 | |
| 771 | // Info relates to the thumb, not necessarily the file. |
| 772 | // The distinction matters for manualthumb, in which case only |
| 773 | // the "resource" copied over from the span relates to the file. |
| 774 | |
| 775 | switch ( $info['mediatype'] ?? '' ) { |
| 776 | case 'AUDIO': |
| 777 | $handler = 'handleAudio'; |
| 778 | $isImage = false; |
| 779 | break; |
| 780 | case 'VIDEO': |
| 781 | $handler = 'handleVideo'; |
| 782 | $isImage = false; |
| 783 | break; |
| 784 | default: |
| 785 | $handler = 'handleImage'; |
| 786 | $isImage = true; |
| 787 | break; |
| 788 | } |
| 789 | |
| 790 | $alt = null; |
| 791 | $keepAltInDataMw = !$isImage || $errs; |
| 792 | $attr = WTSUtils::getAttrFromDataMw( $dataMw, 'alt', $keepAltInDataMw ); |
| 793 | if ( $attr !== null ) { |
| 794 | $alt = $attr->value['txt']; |
| 795 | } elseif ( $captionText ) { |
| 796 | $alt = $captionText; |
| 797 | } |
| 798 | |
| 799 | // Add mw:Error to the RDFa type. |
| 800 | if ( $errs ) { |
| 801 | self::handleErrors( $container, $span, $errs, $dataMw, $alt ); |
| 802 | continue; |
| 803 | } |
| 804 | |
| 805 | $needsTMHModules = $needsTMHModules || !$isImage; |
| 806 | |
| 807 | $env->getMetadata()->addImage( |
| 808 | $attrs['title'], |
| 809 | $info['timestamp'] ?? null, |
| 810 | $info['sha1'] ?? null, |
| 811 | ); |
| 812 | |
| 813 | $elt = self::$handler( $env, $span, $attrs, $info, $dataMw, $container, $alt ); |
| 814 | DOMCompat::getClassList( $elt )->add( 'mw-file-element' ); |
| 815 | |
| 816 | $anchor = self::replaceAnchor( |
| 817 | $env, $urlParser, $container, $anchor, $attrs, $dataMw, $isImage, $captionText, |
| 818 | (int)( $attrs['dims']['page'] ?? 0 ), |
| 819 | $attrs['dims']['lang'] ?? '' |
| 820 | ); |
| 821 | $anchor->appendChild( $elt ); |
| 822 | |
| 823 | if ( isset( $dataMw->attribs ) && count( $dataMw->attribs ) === 0 ) { |
| 824 | unset( $dataMw->attribs ); |
| 825 | } |
| 826 | } |
| 827 | |
| 828 | if ( $hasThumb ) { |
| 829 | $env->getMetadata()->appendOutputStrings( CMCSS::MODULE, [ 'mediawiki.page.media' ] ); |
| 830 | } |
| 831 | |
| 832 | if ( $needsTMHModules ) { |
| 833 | $env->getMetadata()->appendOutputStrings( CMCSS::MODULE_STYLE, [ 'ext.tmh.player.styles' ] ); |
| 834 | $env->getMetadata()->appendOutputStrings( CMCSS::MODULE, [ 'ext.tmh.player' ] ); |
| 835 | } |
| 836 | } |
| 837 | } |