Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 420 |
|
0.00% |
0 / 14 |
CRAP | |
0.00% |
0 / 1 |
AddMediaInfo | |
0.00% |
0 / 420 |
|
0.00% |
0 / 14 |
18360 | |
0.00% |
0 / 1 |
handleSize | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
420 | |||
parseTimeString | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
56 | |||
parseFrag | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
56 | |||
addSources | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
56 | |||
addTracks | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 | |||
getPath | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
handleAudio | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
30 | |||
handleVideo | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
42 | |||
handleImage | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
42 | |||
makeErr | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
handleErrors | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
copyOverAttribute | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
replaceAnchor | |
0.00% |
0 / 44 |
|
0.00% |
0 / 1 |
210 | |||
run | |
0.00% |
0 / 184 |
|
0.00% |
0 / 1 |
2450 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Processors; |
5 | |
6 | use stdClass; |
7 | use Wikimedia\Assert\Assert; |
8 | use Wikimedia\Parsoid\Config\Env; |
9 | use Wikimedia\Parsoid\Core\Sanitizer; |
10 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
11 | use Wikimedia\Parsoid\DOM\Element; |
12 | use Wikimedia\Parsoid\DOM\Node; |
13 | use Wikimedia\Parsoid\Html2Wt\WTSUtils; |
14 | use Wikimedia\Parsoid\NodeData\DataMw; |
15 | use Wikimedia\Parsoid\Utils\ContentUtils; |
16 | use Wikimedia\Parsoid\Utils\DOMCompat; |
17 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
18 | use Wikimedia\Parsoid\Utils\DOMUtils; |
19 | use Wikimedia\Parsoid\Utils\Title; |
20 | use Wikimedia\Parsoid\Utils\WTUtils; |
21 | use Wikimedia\Parsoid\Wikitext\Consts; |
22 | use Wikimedia\Parsoid\Wt2Html\PegTokenizer; |
23 | use Wikimedia\Parsoid\Wt2Html\Wt2HtmlDOMProcessor; |
24 | |
25 | class AddMediaInfo implements Wt2HtmlDOMProcessor { |
26 | /** |
27 | * Extract the dimensions for media. |
28 | * |
29 | * @param Env $env |
30 | * @param array $attrs |
31 | * @param array $info |
32 | * @phan-param array{size:array{height?:int,width?:int},format:string} $attrs |
33 | * @return array |
34 | */ |
35 | private static function handleSize( Env $env, array $attrs, array $info ): array { |
36 | $height = $info['height']; |
37 | $width = $info['width']; |
38 | |
39 | Assert::invariant( |
40 | is_numeric( $height ) && $height !== NAN, |
41 | 'Expected $height as a valid number' |
42 | ); |
43 | Assert::invariant( |
44 | is_numeric( $width ) && $width !== NAN, |
45 | 'Expected $width as a valid number' |
46 | ); |
47 | |
48 | if ( !empty( $info['thumburl'] ) && !empty( $info['thumbheight'] ) ) { |
49 | $height = $info['thumbheight']; |
50 | } |
51 | |
52 | if ( !empty( $info['thumburl'] ) && !empty( $info['thumbwidth'] ) ) { |
53 | $width = $info['thumbwidth']; |
54 | } |
55 | |
56 | // Audio files don't have dimensions, so we fallback to these arbitrary |
57 | // defaults, and the "mw-default-audio-height" class is added. |
58 | if ( $info['mediatype'] === 'AUDIO' ) { |
59 | // FIXME: TMH uses 23 but VE wants 32 |
60 | $height = /* height || */32; // Arguably, audio should respect a defined height |
61 | $width = max( 35, $width ?: $env->getSiteConfig()->widthOption() ); |
62 | } |
63 | |
64 | // Handle client-side upscaling (including 'border') |
65 | |
66 | $mustRender = $info['mustRender'] ?? $info['mediatype'] !== 'BITMAP'; |
67 | |
68 | // Calculate the scaling ratio from the user-specified width and height |
69 | $ratio = null; |
70 | if ( !empty( $attrs['dims']['height'] ) && !empty( $info['height'] ) ) { |
71 | $ratio = $attrs['dims']['height'] / $info['height']; |
72 | } |
73 | if ( !empty( $attrs['dims']['width'] ) && !empty( $info['width'] ) ) { |
74 | $r = $attrs['dims']['width'] / $info['width']; |
75 | $ratio = ( $ratio === null || $r < $ratio ) ? $r : $ratio; |
76 | } |
77 | |
78 | // If the user requested upscaling, then this is denied in the thumbnail |
79 | // and frameless format, except for files with mustRender. |
80 | if ( |
81 | $ratio !== null && $ratio > 1 && !$mustRender && |
82 | ( $attrs['format'] === 'Thumb' || $attrs['format'] === 'Frameless' ) |
83 | ) { |
84 | // Upscaling denied |
85 | $height = $info['height']; |
86 | $width = $info['width']; |
87 | } |
88 | |
89 | return [ 'height' => $height, 'width' => $width ]; |
90 | } |
91 | |
92 | /** |
93 | * This is a port of TMH's parseTimeString() |
94 | * |
95 | * @param string $timeString |
96 | * @param int|float|null $length |
97 | * @return int|float|null |
98 | */ |
99 | private static function parseTimeString( |
100 | string $timeString, $length = null |
101 | ) { |
102 | $parts = explode( ':', $timeString ); |
103 | $time = 0; |
104 | $countParts = count( $parts ); |
105 | if ( $countParts > 3 ) { |
106 | return null; |
107 | } |
108 | for ( $i = 0; $i < $countParts; $i++ ) { |
109 | if ( !is_numeric( $parts[$i] ) ) { |
110 | return null; |
111 | } |
112 | $time += floatval( $parts[$i] ) * pow( 60, $countParts - 1 - $i ); |
113 | } |
114 | if ( $time < 0 ) { |
115 | $time = 0; |
116 | } elseif ( $length !== null ) { |
117 | if ( $time > $length ) { |
118 | $time = $length - 1; |
119 | } |
120 | } |
121 | return $time; |
122 | } |
123 | |
124 | /** |
125 | * Handle media fragments |
126 | * https://www.w3.org/TR/media-frags/ |
127 | * |
128 | * @param array $info |
129 | * @param DataMw $dataMw |
130 | * @return string |
131 | */ |
132 | private static function parseFrag( array $info, DataMw $dataMw ): string { |
133 | $frag = ''; |
134 | $starttime = WTSUtils::getAttrFromDataMw( $dataMw, 'starttime', true ); |
135 | $endtime = WTSUtils::getAttrFromDataMw( $dataMw, 'endtime', true ); |
136 | if ( $starttime || $endtime ) { |
137 | $frag .= '#t='; |
138 | if ( $starttime ) { |
139 | $time = self::parseTimeString( $starttime->value['txt'], $info['duration'] ?? null ); |
140 | if ( $time !== null ) { |
141 | $frag .= $time; |
142 | } |
143 | } |
144 | if ( $endtime ) { |
145 | $time = self::parseTimeString( $endtime->value['txt'], $info['duration'] ?? null ); |
146 | if ( $time !== null ) { |
147 | $frag .= ',' . $time; |
148 | } |
149 | } |
150 | } |
151 | return $frag; |
152 | } |
153 | |
154 | private static function addSources( |
155 | Element $elt, array $info, DataMw $dataMw, bool $hasDimension |
156 | ): void { |
157 | $doc = $elt->ownerDocument; |
158 | $frag = self::parseFrag( $info, $dataMw ); |
159 | |
160 | if ( is_array( $info['thumbdata']['derivatives'] ?? null ) ) { |
161 | // BatchAPI's `getAPIData` |
162 | $derivatives = $info['thumbdata']['derivatives']; |
163 | } elseif ( is_array( $info['derivatives'] ?? null ) ) { |
164 | // "videoinfo" prop |
165 | $derivatives = $info['derivatives']; |
166 | } else { |
167 | $derivatives = [ |
168 | [ |
169 | 'src' => $info['url'], |
170 | 'type' => $info['mime'], |
171 | 'width' => (string)$info['width'], |
172 | 'height' => (string)$info['height'], |
173 | ], |
174 | ]; |
175 | } |
176 | |
177 | foreach ( $derivatives as $o ) { |
178 | $source = $doc->createElement( 'source' ); |
179 | $source->setAttribute( 'src', $o['src'] . $frag ); |
180 | $source->setAttribute( 'type', $o['type'] ); // T339375 |
181 | $fromFile = isset( $o['transcodekey'] ) ? '' : '-file'; |
182 | if ( $hasDimension ) { |
183 | $source->setAttribute( 'data' . $fromFile . '-width', (string)$o['width'] ); |
184 | $source->setAttribute( 'data' . $fromFile . '-height', (string)$o['height'] ); |
185 | } |
186 | if ( !$fromFile ) { |
187 | $source->setAttribute( 'data-transcodekey', $o['transcodekey'] ); |
188 | } |
189 | $elt->appendChild( $source ); |
190 | } |
191 | } |
192 | |
193 | private static function addTracks( Element $elt, array $info ): void { |
194 | $doc = $elt->ownerDocument; |
195 | if ( is_array( $info['thumbdata']['timedtext'] ?? null ) ) { |
196 | // BatchAPI's `getAPIData` |
197 | $timedtext = $info['thumbdata']['timedtext']; |
198 | } elseif ( is_array( $info['timedtext'] ?? null ) ) { |
199 | // "videoinfo" prop |
200 | $timedtext = $info['timedtext']; |
201 | } else { |
202 | $timedtext = []; |
203 | } |
204 | foreach ( $timedtext as $o ) { |
205 | $track = $doc->createElement( 'track' ); |
206 | $track->setAttribute( 'kind', $o['kind'] ?? '' ); |
207 | $track->setAttribute( 'type', $o['type'] ?? '' ); |
208 | $track->setAttribute( 'src', $o['src'] ?? '' ); |
209 | $track->setAttribute( 'srclang', $o['srclang'] ?? '' ); |
210 | $track->setAttribute( 'label', $o['label'] ?? '' ); |
211 | $track->setAttribute( 'data-mwtitle', $o['title'] ?? '' ); |
212 | $track->setAttribute( 'data-dir', $o['dir'] ?? '' ); |
213 | $elt->appendChild( $track ); |
214 | } |
215 | } |
216 | |
217 | /** |
218 | * Abstract way to get the path for an image given an info object. |
219 | * |
220 | * @param array $info |
221 | * @return string |
222 | */ |
223 | private static function getPath( array $info ) { |
224 | $path = ''; |
225 | if ( !empty( $info['thumburl'] ) ) { |
226 | $path = $info['thumburl']; |
227 | } elseif ( !empty( $info['url'] ) ) { |
228 | $path = $info['url']; |
229 | } |
230 | return $path; |
231 | } |
232 | |
233 | /** |
234 | * @param Env $env |
235 | * @param Element $span |
236 | * @param array $attrs |
237 | * @param array $info |
238 | * @param DataMw $dataMw |
239 | * @param Element $container |
240 | * @param string|null $alt Unused, but matches the signature of handlers |
241 | * @return Element |
242 | */ |
243 | private static function handleAudio( |
244 | Env $env, Element $span, array $attrs, array $info, DataMw $dataMw, |
245 | Element $container, ?string $alt |
246 | ): Element { |
247 | $doc = $span->ownerDocument; |
248 | $audio = $doc->createElement( 'audio' ); |
249 | |
250 | $audio->setAttribute( 'controls', '' ); |
251 | $audio->setAttribute( 'preload', 'none' ); |
252 | |
253 | $muted = WTSUtils::getAttrFromDataMw( $dataMw, 'muted', false ); |
254 | if ( $muted ) { |
255 | $audio->setAttribute( 'muted', '' ); |
256 | } |
257 | $loop = WTSUtils::getAttrFromDataMw( $dataMw, 'loop', false ); |
258 | if ( $loop ) { |
259 | $audio->setAttribute( 'loop', '' ); |
260 | } |
261 | |
262 | // HACK(T295514): Until T313875 is implemented |
263 | $audio->setAttribute( 'data-mw-tmh', '' ); |
264 | |
265 | $size = self::handleSize( $env, $attrs, $info ); |
266 | DOMDataUtils::addNormalizedAttribute( $audio, 'height', (string)$size['height'], null, true ); |
267 | DOMDataUtils::addNormalizedAttribute( $audio, 'width', (string)$size['width'], null, true ); |
268 | $audio->setAttribute( 'style', "width: {$size['width']}px;" ); |
269 | |
270 | // Hardcoded until defined heights are respected. |
271 | // See `AddMediaInfo::handleSize` |
272 | DOMCompat::getClassList( $container )->add( 'mw-default-audio-height' ); |
273 | |
274 | self::copyOverAttribute( $audio, $span, 'resource' ); |
275 | |
276 | if ( $span->hasAttribute( 'lang' ) ) { |
277 | self::copyOverAttribute( $audio, $span, 'lang' ); |
278 | } |
279 | |
280 | if ( $info['duration'] ?? null ) { |
281 | $audio->setAttribute( 'data-durationhint', (string)ceil( (float)$info['duration'] ) ); |
282 | } |
283 | |
284 | self::addSources( $audio, $info, $dataMw, false ); |
285 | self::addTracks( $audio, $info ); |
286 | |
287 | return $audio; |
288 | } |
289 | |
290 | /** |
291 | * @param Env $env |
292 | * @param Element $span |
293 | * @param array $attrs |
294 | * @param array $info |
295 | * @param DataMw $dataMw |
296 | * @param Element $container |
297 | * @param string|null $alt Unused, but matches the signature of handlers |
298 | * @return Element |
299 | */ |
300 | private static function handleVideo( |
301 | Env $env, Element $span, array $attrs, array $info, DataMw $dataMw, |
302 | Element $container, ?string $alt |
303 | ): Element { |
304 | $doc = $span->ownerDocument; |
305 | $video = $doc->createElement( 'video' ); |
306 | |
307 | if ( !empty( $info['thumburl'] ) ) { |
308 | $video->setAttribute( 'poster', self::getPath( $info ) ); |
309 | } |
310 | |
311 | $video->setAttribute( 'controls', '' ); |
312 | $video->setAttribute( 'preload', 'none' ); |
313 | |
314 | $muted = WTSUtils::getAttrFromDataMw( $dataMw, 'muted', false ); |
315 | if ( $muted ) { |
316 | $video->setAttribute( 'muted', '' ); |
317 | } |
318 | $loop = WTSUtils::getAttrFromDataMw( $dataMw, 'loop', false ); |
319 | if ( $loop ) { |
320 | $video->setAttribute( 'loop', '' ); |
321 | } |
322 | |
323 | // HACK(T295514): Until T313875 is implemented |
324 | $video->setAttribute( 'data-mw-tmh', '' ); |
325 | |
326 | $size = self::handleSize( $env, $attrs, $info ); |
327 | DOMDataUtils::addNormalizedAttribute( $video, 'height', (string)$size['height'], null, true ); |
328 | DOMDataUtils::addNormalizedAttribute( $video, 'width', (string)$size['width'], null, true ); |
329 | |
330 | self::copyOverAttribute( $video, $span, 'resource' ); |
331 | |
332 | if ( $span->hasAttribute( 'lang' ) ) { |
333 | self::copyOverAttribute( $video, $span, 'lang' ); |
334 | } |
335 | |
336 | if ( $info['duration'] ?? null ) { |
337 | $video->setAttribute( 'data-durationhint', (string)ceil( (float)$info['duration'] ) ); |
338 | } |
339 | |
340 | self::addSources( $video, $info, $dataMw, true ); |
341 | self::addTracks( $video, $info ); |
342 | |
343 | return $video; |
344 | } |
345 | |
346 | /** |
347 | * Set up the actual image structure, attributes, etc. |
348 | * |
349 | * @param Env $env |
350 | * @param Element $span |
351 | * @param array $attrs |
352 | * @param array $info |
353 | * @param DataMw $dataMw |
354 | * @param Element $container |
355 | * @param string|null $alt |
356 | * @return Element |
357 | */ |
358 | private static function handleImage( |
359 | Env $env, Element $span, array $attrs, array $info, DataMw $dataMw, |
360 | Element $container, ?string $alt |
361 | ): Element { |
362 | $doc = $span->ownerDocument; |
363 | $img = $doc->createElement( 'img' ); |
364 | |
365 | if ( $alt !== null ) { |
366 | $img->setAttribute( 'alt', $alt ); |
367 | } |
368 | |
369 | self::copyOverAttribute( $img, $span, 'resource' ); |
370 | |
371 | $img->setAttribute( 'src', self::getPath( $info ) ); |
372 | $img->setAttribute( 'decoding', 'async' ); |
373 | |
374 | if ( $span->hasAttribute( 'lang' ) ) { |
375 | self::copyOverAttribute( $img, $span, 'lang' ); |
376 | } |
377 | |
378 | // Add (read-only) information about original file size (T64881) |
379 | $img->setAttribute( 'data-file-width', (string)$info['width'] ); |
380 | $img->setAttribute( 'data-file-height', (string)$info['height'] ); |
381 | $img->setAttribute( 'data-file-type', strtolower( $info['mediatype'] ?? '' ) ); |
382 | |
383 | $size = self::handleSize( $env, $attrs, $info ); |
384 | DOMDataUtils::addNormalizedAttribute( $img, 'height', (string)$size['height'], null, true ); |
385 | DOMDataUtils::addNormalizedAttribute( $img, 'width', (string)$size['width'], null, true ); |
386 | |
387 | // Handle "responsive" images, i.e. srcset |
388 | if ( !empty( $info['responsiveUrls'] ) ) { |
389 | $candidates = []; |
390 | foreach ( $info['responsiveUrls'] as $density => $url ) { |
391 | $candidates[] = $url . ' ' . $density . 'x'; |
392 | } |
393 | if ( $candidates ) { |
394 | $img->setAttribute( 'srcset', implode( ', ', $candidates ) ); |
395 | } |
396 | } |
397 | |
398 | return $img; |
399 | } |
400 | |
401 | private static function makeErr( |
402 | string $key, string $message, ?array $params = null |
403 | ): stdClass { |
404 | $e = [ 'key' => $key, 'message' => $message ]; |
405 | // Additional error info for clients that could fix the error. |
406 | if ( $params !== null ) { |
407 | $e['params'] = $params; |
408 | } |
409 | // T367141: this should be a real class type |
410 | return (object)$e; |
411 | } |
412 | |
413 | /** |
414 | * @param Element $container |
415 | * @param Element $span |
416 | * @param list<stdClass> $errs |
417 | * @param DataMw $dataMw |
418 | * @param ?string $alt |
419 | */ |
420 | private static function handleErrors( |
421 | Element $container, Element $span, array $errs, DataMw $dataMw, |
422 | ?string $alt |
423 | ): void { |
424 | if ( !DOMUtils::hasTypeOf( $container, 'mw:Error' ) ) { |
425 | DOMUtils::addTypeOf( $container, 'mw:Error', true ); |
426 | } |
427 | if ( is_array( $dataMw->errors ?? null ) ) { |
428 | $errs = array_merge( $dataMw->errors, $errs ); |
429 | } |
430 | $dataMw->errors = $errs; |
431 | if ( $alt !== null ) { |
432 | DOMCompat::replaceChildren( $span, $span->ownerDocument->createTextNode( $alt ) ); |
433 | } |
434 | } |
435 | |
436 | private static function copyOverAttribute( |
437 | Element $elt, Element $span, string $attribute |
438 | ): void { |
439 | DOMDataUtils::addNormalizedAttribute( |
440 | $elt, |
441 | $attribute, |
442 | DOMCompat::getAttribute( $span, $attribute ), |
443 | WTSUtils::getAttributeShadowInfo( $span, $attribute )['value'] |
444 | ); |
445 | } |
446 | |
447 | private static function replaceAnchor( |
448 | Env $env, PegTokenizer $urlParser, Element $container, |
449 | Element $oldAnchor, array $attrs, DataMw $dataMw, bool $isImage, |
450 | ?string $captionText, int $page, string $lang |
451 | ): Element { |
452 | $doc = $oldAnchor->ownerDocument; |
453 | $attr = WTSUtils::getAttrFromDataMw( $dataMw, 'link', true ); |
454 | |
455 | if ( $isImage ) { |
456 | $anchor = $doc->createElement( 'a' ); |
457 | $addDescriptionLink = static function ( Title $title ) use ( $env, $anchor, $page, $lang ) { |
458 | $href = $env->makeLink( $title ); |
459 | $qs = []; |
460 | if ( $page > 0 ) { |
461 | $qs['page'] = $page; |
462 | } |
463 | if ( $lang ) { |
464 | $qs['lang'] = $lang; |
465 | } |
466 | if ( $qs ) { |
467 | $href .= '?' . http_build_query( $qs ); |
468 | } |
469 | $anchor->setAttribute( 'href', $href ); |
470 | $anchor->setAttribute( 'class', 'mw-file-description' ); |
471 | }; |
472 | if ( $attr !== null ) { |
473 | $discard = true; |
474 | $val = $attr->value['txt']; |
475 | if ( $val === '' ) { |
476 | // No href if link= was specified |
477 | $anchor = $doc->createElement( 'span' ); |
478 | } elseif ( $urlParser->tokenizeURL( $val ) !== false ) { |
479 | // An external link! |
480 | $href = Sanitizer::cleanUrl( $env->getSiteConfig(), $val, 'external' ); |
481 | $anchor->setAttribute( 'href', $href ); |
482 | // Similar to AddLinkAttributes |
483 | $extLinkAttribs = $env->getExternalLinkAttribs( $href ); |
484 | foreach ( $extLinkAttribs as $key => $val ) { |
485 | if ( $key === 'rel' ) { |
486 | foreach ( $val as $v ) { |
487 | DOMUtils::addRel( $anchor, $v ); |
488 | } |
489 | } else { |
490 | $anchor->setAttribute( $key, $val ); |
491 | } |
492 | } |
493 | } else { |
494 | $link = $env->makeTitleFromText( $val, null, true ); |
495 | if ( $link !== null ) { |
496 | $anchor->setAttribute( 'href', $env->makeLink( $link ) ); |
497 | $anchor->setAttribute( 'title', $link->getPrefixedText() ); |
498 | } else { |
499 | // Treat same as if link weren't present |
500 | $addDescriptionLink( $attrs['title'] ); |
501 | // but preserve for roundtripping |
502 | $discard = false; |
503 | } |
504 | } |
505 | if ( $discard ) { |
506 | WTSUtils::getAttrFromDataMw( $dataMw, 'link', /* keep */false ); |
507 | } |
508 | } else { |
509 | $addDescriptionLink( $attrs['title'] ); |
510 | } |
511 | } else { |
512 | $anchor = $doc->createElement( 'span' ); |
513 | } |
514 | |
515 | if ( $captionText ) { |
516 | $anchor->setAttribute( 'title', $captionText ); |
517 | } |
518 | |
519 | $oldAnchor->parentNode->replaceChild( $anchor, $oldAnchor ); |
520 | return $anchor; |
521 | } |
522 | |
523 | /** |
524 | * @inheritDoc |
525 | */ |
526 | public function run( |
527 | Env $env, Node $root, array $options = [], bool $atTopLevel = false |
528 | ): void { |
529 | '@phan-var Element|DocumentFragment $root'; // @var Element|DocumentFragment $root |
530 | $urlParser = new PegTokenizer( $env ); |
531 | |
532 | $validContainers = []; |
533 | $files = []; |
534 | |
535 | $containers = DOMCompat::querySelectorAll( $root, '[typeof*="mw:File"]' ); |
536 | |
537 | foreach ( $containers as $container ) { |
538 | // DOMFragmentWrappers assume the element name of their outermost |
539 | // content so, depending how the above query is written, we're |
540 | // protecting against getting a figure of the wrong type. However, |
541 | // since we're currently using typeof, it shouldn't be a problem. |
542 | // Also note that info for the media nested in the fragment has |
543 | // already been added in their respective pipeline. |
544 | Assert::invariant( |
545 | !WTUtils::isDOMFragmentWrapper( $container ), |
546 | 'Media info for fragment was already added' |
547 | ); |
548 | |
549 | // We expect this structure to be predictable based on how it's |
550 | // emitted in the TT/WikiLinkHandler but treebuilding may have |
551 | // messed that up for us. |
552 | $anchor = $container; |
553 | $reopenedAFE = []; |
554 | do { |
555 | // An active formatting element may have been reopened inside |
556 | // the wrapper if a content model violation was encountered |
557 | // during treebuiling. Try to be a little lenient about that |
558 | // instead of bailing out |
559 | $anchor = $anchor->firstChild; |
560 | $anchorNodeName = DOMCompat::nodeName( $anchor ); |
561 | if ( $anchorNodeName !== 'a' ) { |
562 | $reopenedAFE[] = $anchor; |
563 | } |
564 | } while ( |
565 | $anchorNodeName !== 'a' && |
566 | isset( Consts::$HTML['FormattingTags'][$anchorNodeName] ) |
567 | ); |
568 | if ( $anchorNodeName !== 'a' ) { |
569 | $env->log( 'error', 'Unexpected structure when adding media info.' ); |
570 | continue; |
571 | } |
572 | $span = $anchor->firstChild; |
573 | if ( !( $span instanceof Element && DOMCompat::nodeName( $span ) === 'span' ) ) { |
574 | $env->log( 'error', 'Unexpected structure when adding media info.' ); |
575 | continue; |
576 | } |
577 | $caption = $anchor->nextSibling; |
578 | $isInlineMedia = WTUtils::isInlineMedia( $container ); |
579 | if ( !$isInlineMedia && DOMCompat::nodeName( $caption ) !== 'figcaption' ) { |
580 | $env->log( 'error', 'Unexpected structure when adding media info.' ); |
581 | continue; |
582 | } |
583 | |
584 | // For T314059. Migrate any active formatting tags we found open |
585 | // inside the container to the ficaption to conform to the spec. |
586 | // This should simplify selectors for clients and styling. |
587 | // TODO: Consider exposing these as lints |
588 | if ( $reopenedAFE ) { |
589 | $firstAFE = $reopenedAFE[0]; |
590 | $lastAFE = $reopenedAFE[count( $reopenedAFE ) - 1]; |
591 | DOMUtils::migrateChildren( $lastAFE, $container ); |
592 | if ( $isInlineMedia ) { |
593 | // Remove the formatting elements, they are of no use |
594 | // We could migrate them into the caption in data-mw, |
595 | // but that doesn't seem worthwhile |
596 | $firstAFE->parentNode->removeChild( $firstAFE ); |
597 | } else { |
598 | // Move the formatting elements into the figcaption |
599 | DOMUtils::migrateChildren( $caption, $lastAFE ); |
600 | $caption->appendChild( $firstAFE ); |
601 | // Unconditionally clear tsr out of an abundance of caution |
602 | // These tags should already be annotated as autoinserted anyways |
603 | foreach ( $reopenedAFE as $afe ) { |
604 | DOMDataUtils::getDataParsoid( $afe )->tsr = null; |
605 | } |
606 | } |
607 | } |
608 | |
609 | $dataMw = DOMDataUtils::getDataMw( $container ); |
610 | |
611 | $dims = [ |
612 | 'width' => (int)DOMCompat::getAttribute( $span, 'data-width' ) ?: null, |
613 | 'height' => (int)DOMCompat::getAttribute( $span, 'data-height' ) ?: null, |
614 | ]; |
615 | |
616 | $page = WTSUtils::getAttrFromDataMw( $dataMw, 'page', true ); |
617 | if ( $page ) { |
618 | $dims['page'] = $page->value['txt']; |
619 | } |
620 | |
621 | $lang = DOMCompat::getAttribute( $span, 'lang' ); |
622 | if ( $lang !== null ) { |
623 | $dims['lang'] = $lang; |
624 | } |
625 | |
626 | // "starttime" should be used if "thumbtime" isn't present, |
627 | // but only for rendering. |
628 | // "starttime" should be used if "thumbtime" isn't present, |
629 | // but only for rendering. |
630 | $thumbtime = WTSUtils::getAttrFromDataMw( $dataMw, 'thumbtime', true ); |
631 | $starttime = WTSUtils::getAttrFromDataMw( $dataMw, 'starttime', true ); |
632 | if ( $thumbtime || $starttime ) { |
633 | $seek = isset( $thumbtime->value ) |
634 | ? $thumbtime->value['txt'] |
635 | : ( isset( $starttime->value ) ? $starttime->value['txt'] : '' ); |
636 | $seek = self::parseTimeString( $seek ); |
637 | if ( $seek !== null ) { |
638 | $dims['seek'] = $seek; |
639 | } |
640 | } |
641 | |
642 | $attrs = [ |
643 | 'dims' => $dims, |
644 | 'format' => WTUtils::getMediaFormat( $container ), |
645 | 'title' => $env->makeTitleFromText( $span->textContent ), |
646 | ]; |
647 | |
648 | $file = [ $attrs['title']->getKey(), $dims ]; |
649 | $infoKey = md5( json_encode( $file ) ); |
650 | $files[$infoKey] = $file; |
651 | $errs = []; |
652 | |
653 | $manualKey = null; |
654 | $manualthumb = WTSUtils::getAttrFromDataMw( $dataMw, 'manualthumb', true ); |
655 | if ( $manualthumb !== null ) { |
656 | $val = $manualthumb->value['txt']; |
657 | $title = $env->makeTitleFromText( $val, $attrs['title']->getNamespace(), true ); |
658 | if ( $title === null ) { |
659 | $errs[] = self::makeErr( |
660 | 'apierror-invalidtitle', |
661 | 'Invalid thumbnail title.', |
662 | [ 'name' => $val ] |
663 | ); |
664 | } else { |
665 | $file = [ $title->getKey(), $dims ]; |
666 | $manualKey = md5( json_encode( $file ) ); |
667 | $files[$manualKey] = $file; |
668 | } |
669 | } |
670 | |
671 | $validContainers[] = [ |
672 | 'container' => $container, |
673 | 'attrs' => $attrs, |
674 | // Pass the anchor because we did some work to find it above |
675 | 'anchor' => $anchor, |
676 | 'infoKey' => $infoKey, |
677 | 'manualKey' => $manualKey, |
678 | 'errs' => $errs, |
679 | ]; |
680 | } |
681 | |
682 | if ( !$validContainers ) { |
683 | return; |
684 | } |
685 | |
686 | $start = microtime( true ); |
687 | |
688 | $infos = $env->getDataAccess()->getFileInfo( |
689 | $env->getPageConfig(), |
690 | array_values( $files ) |
691 | ); |
692 | |
693 | if ( $env->profiling() ) { |
694 | $profile = $env->getCurrentProfile(); |
695 | $profile->bumpMWTime( "Media", 1000 * ( microtime( true ) - $start ), "api" ); |
696 | $profile->bumpCount( "Media" ); |
697 | } |
698 | |
699 | $files = array_combine( |
700 | array_keys( $files ), |
701 | $infos |
702 | ); |
703 | |
704 | $hasThumb = false; |
705 | $needsTMHModules = false; |
706 | |
707 | foreach ( $validContainers as $c ) { |
708 | $container = $c['container']; |
709 | $anchor = $c['anchor']; |
710 | $span = $anchor->firstChild; |
711 | $attrs = $c['attrs']; |
712 | $dataMw = DOMDataUtils::getDataMw( $container ); |
713 | $errs = $c['errs']; |
714 | |
715 | $hasThumb = $hasThumb || DOMUtils::hasTypeOf( $container, 'mw:File/Thumb' ); |
716 | |
717 | $info = $files[$c['infoKey']]; |
718 | if ( !$info ) { |
719 | $errs[] = self::makeErr( 'apierror-filedoesnotexist', 'This image does not exist.' ); |
720 | } elseif ( isset( $info['thumberror'] ) ) { |
721 | $errs[] = self::makeErr( 'apierror-unknownerror', $info['thumberror'] ); |
722 | } |
723 | |
724 | // FIXME: Should we fallback to $info if there are errors with $manualinfo? |
725 | // What does the legacy parser do? |
726 | if ( $c['manualKey'] !== null ) { |
727 | $manualinfo = $files[$c['manualKey']]; |
728 | if ( !$manualinfo ) { |
729 | $errs[] = self::makeErr( 'apierror-filedoesnotexist', 'This image does not exist.' ); |
730 | } elseif ( isset( $manualinfo['thumberror'] ) ) { |
731 | $errs[] = self::makeErr( 'apierror-unknownerror', $manualinfo['thumberror'] ); |
732 | } else { |
733 | $info = $manualinfo; |
734 | } |
735 | } |
736 | |
737 | if ( $info['badFile'] ?? false ) { |
738 | $errs[] = self::makeErr( 'apierror-badfile', 'This image is on the bad file list.' ); |
739 | } |
740 | |
741 | if ( WTUtils::hasVisibleCaption( $container ) ) { |
742 | $captionText = null; |
743 | } else { |
744 | if ( WTUtils::isInlineMedia( $container ) ) { |
745 | $caption = ContentUtils::createAndLoadDocumentFragment( |
746 | $container->ownerDocument, $dataMw->caption ?? '' |
747 | ); |
748 | } else { |
749 | $caption = DOMCompat::querySelector( $container, 'figcaption' ); |
750 | // If the caption had tokens, it was placed in a DOMFragment |
751 | // and we haven't unpacked yet |
752 | if ( |
753 | $caption->firstChild && |
754 | DOMUtils::hasTypeOf( $caption->firstChild, 'mw:DOMFragment' ) |
755 | ) { |
756 | $id = DOMDataUtils::getDataParsoid( $caption->firstChild )->html; |
757 | $caption = $env->getDOMFragment( $id ); |
758 | } |
759 | } |
760 | $captionText = trim( WTUtils::textContentFromCaption( $caption ) ); |
761 | |
762 | // The sanitizer isn't going to do anything with a string value |
763 | // for alt/title and since we're going to use dom element setters, |
764 | // quote escaping should be fine. Note that if sanitization does |
765 | // happen here, it should also be done to $altFromCaption so that |
766 | // string comparison matches, where necessary. |
767 | // |
768 | // $sanitizedArgs = Sanitizer::sanitizeTagAttrs( $env->getSiteConfig(), 'img', null, [ |
769 | // new KV( 'alt', $captionText ) // Could be a 'title' too |
770 | // ] ); |
771 | // $captionText = $sanitizedArgs['alt']->key; |
772 | } |
773 | |
774 | // Info relates to the thumb, not necessarily the file. |
775 | // The distinction matters for manualthumb, in which case only |
776 | // the "resource" copied over from the span relates to the file. |
777 | |
778 | switch ( $info['mediatype'] ?? '' ) { |
779 | case 'AUDIO': |
780 | $handler = 'handleAudio'; |
781 | $isImage = false; |
782 | break; |
783 | case 'VIDEO': |
784 | $handler = 'handleVideo'; |
785 | $isImage = false; |
786 | break; |
787 | default: |
788 | $handler = 'handleImage'; |
789 | $isImage = true; |
790 | break; |
791 | } |
792 | |
793 | $alt = null; |
794 | $keepAltInDataMw = !$isImage || $errs; |
795 | $attr = WTSUtils::getAttrFromDataMw( $dataMw, 'alt', $keepAltInDataMw ); |
796 | if ( $attr !== null ) { |
797 | $alt = $attr->value['txt']; |
798 | } elseif ( $captionText ) { |
799 | $alt = $captionText; |
800 | } |
801 | |
802 | // Add mw:Error to the RDFa type. |
803 | if ( $errs ) { |
804 | self::handleErrors( $container, $span, $errs, $dataMw, $alt ); |
805 | continue; |
806 | } |
807 | |
808 | $needsTMHModules = $needsTMHModules || !$isImage; |
809 | |
810 | $env->getMetadata()->addImage( |
811 | $attrs['title'], |
812 | $info['timestamp'] ?? null, |
813 | $info['sha1'] ?? null, |
814 | ); |
815 | |
816 | $elt = self::$handler( $env, $span, $attrs, $info, $dataMw, $container, $alt ); |
817 | DOMCompat::getClassList( $elt )->add( 'mw-file-element' ); |
818 | |
819 | $anchor = self::replaceAnchor( |
820 | $env, $urlParser, $container, $anchor, $attrs, $dataMw, $isImage, $captionText, |
821 | (int)( $attrs['dims']['page'] ?? 0 ), |
822 | $attrs['dims']['lang'] ?? '' |
823 | ); |
824 | $anchor->appendChild( $elt ); |
825 | |
826 | if ( isset( $dataMw->attribs ) && count( $dataMw->attribs ) === 0 ) { |
827 | unset( $dataMw->attribs ); |
828 | } |
829 | } |
830 | |
831 | if ( $hasThumb ) { |
832 | $env->getMetadata()->addModules( [ 'mediawiki.page.media' ] ); |
833 | } |
834 | |
835 | if ( $needsTMHModules ) { |
836 | $env->getMetadata()->addModuleStyles( [ 'ext.tmh.player.styles' ] ); |
837 | $env->getMetadata()->addModules( [ 'ext.tmh.player' ] ); |
838 | } |
839 | } |
840 | } |