Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 422 |
|
0.00% |
0 / 14 |
CRAP | |
0.00% |
0 / 1 |
AddMediaInfo | |
0.00% |
0 / 422 |
|
0.00% |
0 / 14 |
18090 | |
0.00% |
0 / 1 |
handleSize | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
420 | |||
parseTimeString | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
56 | |||
parseFrag | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
56 | |||
addSources | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
56 | |||
addTracks | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 | |||
getPath | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
handleAudio | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
30 | |||
handleVideo | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
42 | |||
handleImage | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
42 | |||
makeErr | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
handleErrors | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
copyOverAttribute | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
replaceAnchor | |
0.00% |
0 / 44 |
|
0.00% |
0 / 1 |
210 | |||
run | |
0.00% |
0 / 189 |
|
0.00% |
0 / 1 |
2450 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Processors; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Parsoid\Config\Env; |
8 | use Wikimedia\Parsoid\Core\ContentMetadataCollectorStringSets as CMCSS; |
9 | use Wikimedia\Parsoid\Core\Sanitizer; |
10 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
11 | use Wikimedia\Parsoid\DOM\Element; |
12 | use Wikimedia\Parsoid\DOM\Node; |
13 | use Wikimedia\Parsoid\Html2Wt\WTSUtils; |
14 | use Wikimedia\Parsoid\NodeData\DataMw; |
15 | use Wikimedia\Parsoid\NodeData\DataMwError; |
16 | use Wikimedia\Parsoid\Utils\ContentUtils; |
17 | use Wikimedia\Parsoid\Utils\DOMCompat; |
18 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
19 | use Wikimedia\Parsoid\Utils\DOMUtils; |
20 | use Wikimedia\Parsoid\Utils\Title; |
21 | use Wikimedia\Parsoid\Utils\WTUtils; |
22 | use Wikimedia\Parsoid\Wikitext\Consts; |
23 | use Wikimedia\Parsoid\Wt2Html\PegTokenizer; |
24 | use Wikimedia\Parsoid\Wt2Html\Wt2HtmlDOMProcessor; |
25 | |
26 | class AddMediaInfo implements Wt2HtmlDOMProcessor { |
27 | /** |
28 | * Extract the dimensions for media. |
29 | * |
30 | * @param Env $env |
31 | * @param array $attrs |
32 | * @param array $info |
33 | * @phan-param array{size:array{height?:int,width?:int},format:string} $attrs |
34 | * @return array |
35 | */ |
36 | private static function handleSize( Env $env, array $attrs, array $info ): array { |
37 | $height = $info['height']; |
38 | $width = $info['width']; |
39 | |
40 | Assert::invariant( |
41 | is_numeric( $height ) && $height !== NAN, |
42 | 'Expected $height as a valid number' |
43 | ); |
44 | Assert::invariant( |
45 | is_numeric( $width ) && $width !== NAN, |
46 | 'Expected $width as a valid number' |
47 | ); |
48 | |
49 | if ( !empty( $info['thumburl'] ) && !empty( $info['thumbheight'] ) ) { |
50 | $height = $info['thumbheight']; |
51 | } |
52 | |
53 | if ( !empty( $info['thumburl'] ) && !empty( $info['thumbwidth'] ) ) { |
54 | $width = $info['thumbwidth']; |
55 | } |
56 | |
57 | // Audio files don't have dimensions, so we fallback to these arbitrary |
58 | // defaults, and the "mw-default-audio-height" class is added. |
59 | if ( $info['mediatype'] === 'AUDIO' ) { |
60 | // FIXME: TMH uses 23 but VE wants 32 |
61 | $height = /* height || */32; // Arguably, audio should respect a defined height |
62 | $width = max( 35, $width ?: $env->getSiteConfig()->widthOption() ); |
63 | } |
64 | |
65 | // Handle client-side upscaling (including 'border') |
66 | |
67 | $mustRender = $info['mustRender'] ?? $info['mediatype'] !== 'BITMAP'; |
68 | |
69 | // Calculate the scaling ratio from the user-specified width and height |
70 | $ratio = null; |
71 | if ( !empty( $attrs['dims']['height'] ) && !empty( $info['height'] ) ) { |
72 | $ratio = $attrs['dims']['height'] / $info['height']; |
73 | } |
74 | if ( !empty( $attrs['dims']['width'] ) && !empty( $info['width'] ) ) { |
75 | $r = $attrs['dims']['width'] / $info['width']; |
76 | $ratio = ( $ratio === null || $r < $ratio ) ? $r : $ratio; |
77 | } |
78 | |
79 | // If the user requested upscaling, then this is denied in the thumbnail |
80 | // and frameless format, except for files with mustRender. |
81 | if ( |
82 | $ratio !== null && $ratio > 1 && !$mustRender && |
83 | ( $attrs['format'] === 'Thumb' || $attrs['format'] === 'Frameless' ) |
84 | ) { |
85 | // Upscaling denied |
86 | $height = $info['height']; |
87 | $width = $info['width']; |
88 | } |
89 | |
90 | return [ 'height' => $height, 'width' => $width ]; |
91 | } |
92 | |
93 | /** |
94 | * This is a port of TMH's parseTimeString() |
95 | * |
96 | * @param string $timeString |
97 | * @param int|float|null $length |
98 | * @return int|float|null |
99 | */ |
100 | private static function parseTimeString( |
101 | string $timeString, $length = null |
102 | ) { |
103 | $parts = explode( ':', $timeString ); |
104 | $time = 0; |
105 | $countParts = count( $parts ); |
106 | if ( $countParts > 3 ) { |
107 | return null; |
108 | } |
109 | for ( $i = 0; $i < $countParts; $i++ ) { |
110 | if ( !is_numeric( $parts[$i] ) ) { |
111 | return null; |
112 | } |
113 | $time += floatval( $parts[$i] ) * pow( 60, $countParts - 1 - $i ); |
114 | } |
115 | if ( $time < 0 ) { |
116 | $time = 0; |
117 | } elseif ( $length !== null ) { |
118 | if ( $time > $length ) { |
119 | $time = $length - 1; |
120 | } |
121 | } |
122 | return $time; |
123 | } |
124 | |
125 | /** |
126 | * Handle media fragments |
127 | * https://www.w3.org/TR/media-frags/ |
128 | * |
129 | * @param array $info |
130 | * @param DataMw $dataMw |
131 | * @return string |
132 | */ |
133 | private static function parseFrag( array $info, DataMw $dataMw ): string { |
134 | $frag = ''; |
135 | $starttime = WTSUtils::getAttrFromDataMw( $dataMw, 'starttime', true ); |
136 | $endtime = WTSUtils::getAttrFromDataMw( $dataMw, 'endtime', true ); |
137 | if ( $starttime || $endtime ) { |
138 | $frag .= '#t='; |
139 | if ( $starttime ) { |
140 | $time = self::parseTimeString( $starttime->value['txt'], $info['duration'] ?? null ); |
141 | if ( $time !== null ) { |
142 | $frag .= $time; |
143 | } |
144 | } |
145 | if ( $endtime ) { |
146 | $time = self::parseTimeString( $endtime->value['txt'], $info['duration'] ?? null ); |
147 | if ( $time !== null ) { |
148 | $frag .= ',' . $time; |
149 | } |
150 | } |
151 | } |
152 | return $frag; |
153 | } |
154 | |
155 | private static function addSources( |
156 | Element $elt, array $info, DataMw $dataMw, bool $hasDimension |
157 | ): void { |
158 | $doc = $elt->ownerDocument; |
159 | $frag = self::parseFrag( $info, $dataMw ); |
160 | |
161 | if ( is_array( $info['thumbdata']['derivatives'] ?? null ) ) { |
162 | // BatchAPI's `getAPIData` |
163 | $derivatives = $info['thumbdata']['derivatives']; |
164 | } elseif ( is_array( $info['derivatives'] ?? null ) ) { |
165 | // "videoinfo" prop |
166 | $derivatives = $info['derivatives']; |
167 | } else { |
168 | $derivatives = [ |
169 | [ |
170 | 'src' => $info['url'], |
171 | 'type' => $info['mime'], |
172 | 'width' => (string)$info['width'], |
173 | 'height' => (string)$info['height'], |
174 | ], |
175 | ]; |
176 | } |
177 | |
178 | foreach ( $derivatives as $o ) { |
179 | $source = $doc->createElement( 'source' ); |
180 | $source->setAttribute( 'src', $o['src'] . $frag ); |
181 | $source->setAttribute( 'type', $o['type'] ); // T339375 |
182 | $fromFile = isset( $o['transcodekey'] ) ? '' : '-file'; |
183 | if ( $hasDimension ) { |
184 | $source->setAttribute( 'data' . $fromFile . '-width', (string)$o['width'] ); |
185 | $source->setAttribute( 'data' . $fromFile . '-height', (string)$o['height'] ); |
186 | } |
187 | if ( !$fromFile ) { |
188 | $source->setAttribute( 'data-transcodekey', $o['transcodekey'] ); |
189 | } |
190 | $elt->appendChild( $source ); |
191 | } |
192 | } |
193 | |
194 | private static function addTracks( Element $elt, array $info ): void { |
195 | $doc = $elt->ownerDocument; |
196 | if ( is_array( $info['thumbdata']['timedtext'] ?? null ) ) { |
197 | // BatchAPI's `getAPIData` |
198 | $timedtext = $info['thumbdata']['timedtext']; |
199 | } elseif ( is_array( $info['timedtext'] ?? null ) ) { |
200 | // "videoinfo" prop |
201 | $timedtext = $info['timedtext']; |
202 | } else { |
203 | $timedtext = []; |
204 | } |
205 | foreach ( $timedtext as $o ) { |
206 | $track = $doc->createElement( 'track' ); |
207 | $track->setAttribute( 'kind', $o['kind'] ?? '' ); |
208 | $track->setAttribute( 'type', $o['type'] ?? '' ); |
209 | $track->setAttribute( 'src', $o['src'] ?? '' ); |
210 | $track->setAttribute( 'srclang', $o['srclang'] ?? '' ); |
211 | $track->setAttribute( 'label', $o['label'] ?? '' ); |
212 | $track->setAttribute( 'data-mwtitle', $o['title'] ?? '' ); |
213 | $track->setAttribute( 'data-dir', $o['dir'] ?? '' ); |
214 | $elt->appendChild( $track ); |
215 | } |
216 | } |
217 | |
218 | /** |
219 | * Abstract way to get the path for an image given an info object. |
220 | * |
221 | * @param array $info |
222 | * @return string |
223 | */ |
224 | private static function getPath( array $info ) { |
225 | $path = ''; |
226 | if ( !empty( $info['thumburl'] ) ) { |
227 | $path = $info['thumburl']; |
228 | } elseif ( !empty( $info['url'] ) ) { |
229 | $path = $info['url']; |
230 | } |
231 | return $path; |
232 | } |
233 | |
234 | /** |
235 | * @param Env $env |
236 | * @param Element $span |
237 | * @param array $attrs |
238 | * @param array $info |
239 | * @param DataMw $dataMw |
240 | * @param Element $container |
241 | * @param string|null $alt Unused, but matches the signature of handlers |
242 | * @return Element |
243 | */ |
244 | private static function handleAudio( |
245 | Env $env, Element $span, array $attrs, array $info, DataMw $dataMw, |
246 | Element $container, ?string $alt |
247 | ): Element { |
248 | $doc = $span->ownerDocument; |
249 | $audio = $doc->createElement( 'audio' ); |
250 | |
251 | $audio->setAttribute( 'controls', '' ); |
252 | $audio->setAttribute( 'preload', 'none' ); |
253 | |
254 | $muted = WTSUtils::getAttrFromDataMw( $dataMw, 'muted', false ); |
255 | if ( $muted ) { |
256 | $audio->setAttribute( 'muted', '' ); |
257 | } |
258 | $loop = WTSUtils::getAttrFromDataMw( $dataMw, 'loop', false ); |
259 | if ( $loop ) { |
260 | $audio->setAttribute( 'loop', '' ); |
261 | } |
262 | |
263 | // HACK(T295514): Until T313875 is implemented |
264 | $audio->setAttribute( 'data-mw-tmh', '' ); |
265 | |
266 | $size = self::handleSize( $env, $attrs, $info ); |
267 | DOMDataUtils::addNormalizedAttribute( $audio, 'height', (string)$size['height'], null, true ); |
268 | DOMDataUtils::addNormalizedAttribute( $audio, 'width', (string)$size['width'], null, true ); |
269 | $audio->setAttribute( 'style', "width: {$size['width']}px;" ); |
270 | |
271 | // Hardcoded until defined heights are respected. |
272 | // See `AddMediaInfo::handleSize` |
273 | DOMCompat::getClassList( $container )->add( 'mw-default-audio-height' ); |
274 | |
275 | self::copyOverAttribute( $audio, $span, 'resource' ); |
276 | |
277 | if ( $span->hasAttribute( 'lang' ) ) { |
278 | self::copyOverAttribute( $audio, $span, 'lang' ); |
279 | } |
280 | |
281 | if ( $info['duration'] ?? null ) { |
282 | $audio->setAttribute( 'data-durationhint', (string)ceil( (float)$info['duration'] ) ); |
283 | } |
284 | |
285 | self::addSources( $audio, $info, $dataMw, false ); |
286 | self::addTracks( $audio, $info ); |
287 | |
288 | return $audio; |
289 | } |
290 | |
291 | /** |
292 | * @param Env $env |
293 | * @param Element $span |
294 | * @param array $attrs |
295 | * @param array $info |
296 | * @param DataMw $dataMw |
297 | * @param Element $container |
298 | * @param string|null $alt Unused, but matches the signature of handlers |
299 | * @return Element |
300 | */ |
301 | private static function handleVideo( |
302 | Env $env, Element $span, array $attrs, array $info, DataMw $dataMw, |
303 | Element $container, ?string $alt |
304 | ): Element { |
305 | $doc = $span->ownerDocument; |
306 | $video = $doc->createElement( 'video' ); |
307 | |
308 | if ( !empty( $info['thumburl'] ) ) { |
309 | $video->setAttribute( 'poster', self::getPath( $info ) ); |
310 | } |
311 | |
312 | $video->setAttribute( 'controls', '' ); |
313 | $video->setAttribute( 'preload', 'none' ); |
314 | |
315 | $muted = WTSUtils::getAttrFromDataMw( $dataMw, 'muted', false ); |
316 | if ( $muted ) { |
317 | $video->setAttribute( 'muted', '' ); |
318 | } |
319 | $loop = WTSUtils::getAttrFromDataMw( $dataMw, 'loop', false ); |
320 | if ( $loop ) { |
321 | $video->setAttribute( 'loop', '' ); |
322 | } |
323 | |
324 | // HACK(T295514): Until T313875 is implemented |
325 | $video->setAttribute( 'data-mw-tmh', '' ); |
326 | |
327 | $size = self::handleSize( $env, $attrs, $info ); |
328 | DOMDataUtils::addNormalizedAttribute( $video, 'height', (string)$size['height'], null, true ); |
329 | DOMDataUtils::addNormalizedAttribute( $video, 'width', (string)$size['width'], null, true ); |
330 | |
331 | self::copyOverAttribute( $video, $span, 'resource' ); |
332 | |
333 | if ( $span->hasAttribute( 'lang' ) ) { |
334 | self::copyOverAttribute( $video, $span, 'lang' ); |
335 | } |
336 | |
337 | if ( $info['duration'] ?? null ) { |
338 | $video->setAttribute( 'data-durationhint', (string)ceil( (float)$info['duration'] ) ); |
339 | } |
340 | |
341 | self::addSources( $video, $info, $dataMw, true ); |
342 | self::addTracks( $video, $info ); |
343 | |
344 | return $video; |
345 | } |
346 | |
347 | /** |
348 | * Set up the actual image structure, attributes, etc. |
349 | * |
350 | * @param Env $env |
351 | * @param Element $span |
352 | * @param array $attrs |
353 | * @param array $info |
354 | * @param DataMw $dataMw |
355 | * @param Element $container |
356 | * @param string|null $alt |
357 | * @return Element |
358 | */ |
359 | private static function handleImage( |
360 | Env $env, Element $span, array $attrs, array $info, DataMw $dataMw, |
361 | Element $container, ?string $alt |
362 | ): Element { |
363 | $doc = $span->ownerDocument; |
364 | $img = $doc->createElement( 'img' ); |
365 | |
366 | if ( $alt !== null ) { |
367 | $img->setAttribute( 'alt', $alt ); |
368 | } |
369 | |
370 | self::copyOverAttribute( $img, $span, 'resource' ); |
371 | |
372 | $img->setAttribute( 'src', self::getPath( $info ) ); |
373 | $img->setAttribute( 'decoding', 'async' ); |
374 | |
375 | if ( $span->hasAttribute( 'lang' ) ) { |
376 | self::copyOverAttribute( $img, $span, 'lang' ); |
377 | } |
378 | |
379 | // Add (read-only) information about original file size (T64881) |
380 | $img->setAttribute( 'data-file-width', (string)$info['width'] ); |
381 | $img->setAttribute( 'data-file-height', (string)$info['height'] ); |
382 | $img->setAttribute( 'data-file-type', strtolower( $info['mediatype'] ?? '' ) ); |
383 | |
384 | $size = self::handleSize( $env, $attrs, $info ); |
385 | DOMDataUtils::addNormalizedAttribute( $img, 'height', (string)$size['height'], null, true ); |
386 | DOMDataUtils::addNormalizedAttribute( $img, 'width', (string)$size['width'], null, true ); |
387 | |
388 | // Handle "responsive" images, i.e. srcset |
389 | if ( !empty( $info['responsiveUrls'] ) ) { |
390 | $candidates = []; |
391 | foreach ( $info['responsiveUrls'] as $density => $url ) { |
392 | $candidates[] = $url . ' ' . $density . 'x'; |
393 | } |
394 | if ( $candidates ) { |
395 | $img->setAttribute( 'srcset', implode( ', ', $candidates ) ); |
396 | } |
397 | } |
398 | |
399 | return $img; |
400 | } |
401 | |
402 | private static function makeErr( |
403 | string $key, string $message, ?array $params = null |
404 | ): DataMwError { |
405 | return new DataMwError( $key, $params ?? [], $message ); |
406 | } |
407 | |
408 | /** |
409 | * @param Element $container |
410 | * @param Element $span |
411 | * @param list<DataMwError> $errs |
412 | * @param DataMw $dataMw |
413 | * @param ?string $alt |
414 | */ |
415 | private static function handleErrors( |
416 | Element $container, Element $span, array $errs, DataMw $dataMw, |
417 | ?string $alt |
418 | ): void { |
419 | if ( !DOMUtils::hasTypeOf( $container, 'mw:Error' ) ) { |
420 | DOMUtils::addTypeOf( $container, 'mw:Error', true ); |
421 | } |
422 | if ( is_array( $dataMw->errors ?? null ) ) { |
423 | $errs = array_merge( $dataMw->errors, $errs ); |
424 | } |
425 | $dataMw->errors = $errs; |
426 | if ( $alt !== null ) { |
427 | DOMCompat::replaceChildren( $span, $span->ownerDocument->createTextNode( $alt ) ); |
428 | } |
429 | } |
430 | |
431 | private static function copyOverAttribute( |
432 | Element $elt, Element $span, string $attribute |
433 | ): void { |
434 | DOMDataUtils::addNormalizedAttribute( |
435 | $elt, |
436 | $attribute, |
437 | DOMCompat::getAttribute( $span, $attribute ), |
438 | WTSUtils::getAttributeShadowInfo( $span, $attribute )['value'] |
439 | ); |
440 | } |
441 | |
442 | private static function replaceAnchor( |
443 | Env $env, PegTokenizer $urlParser, Element $container, |
444 | Element $oldAnchor, array $attrs, DataMw $dataMw, bool $isImage, |
445 | ?string $captionText, int $page, string $lang |
446 | ): Element { |
447 | $doc = $oldAnchor->ownerDocument; |
448 | $attr = WTSUtils::getAttrFromDataMw( $dataMw, 'link', true ); |
449 | |
450 | if ( $isImage ) { |
451 | $anchor = $doc->createElement( 'a' ); |
452 | $addDescriptionLink = static function ( Title $title ) use ( $env, $anchor, $page, $lang ) { |
453 | $href = $env->makeLink( $title ); |
454 | $qs = []; |
455 | if ( $page > 0 ) { |
456 | $qs['page'] = $page; |
457 | } |
458 | if ( $lang ) { |
459 | $qs['lang'] = $lang; |
460 | } |
461 | if ( $qs ) { |
462 | $href .= '?' . http_build_query( $qs ); |
463 | } |
464 | $anchor->setAttribute( 'href', $href ); |
465 | $anchor->setAttribute( 'class', 'mw-file-description' ); |
466 | }; |
467 | if ( $attr !== null ) { |
468 | $discard = true; |
469 | $val = $attr->value['txt']; |
470 | if ( $val === '' ) { |
471 | // No href if link= was specified |
472 | $anchor = $doc->createElement( 'span' ); |
473 | } elseif ( $urlParser->tokenizeURL( $val ) !== false ) { |
474 | // An external link! |
475 | $href = Sanitizer::cleanUrl( $env->getSiteConfig(), $val, 'external' ); |
476 | $anchor->setAttribute( 'href', $href ); |
477 | // Similar to AddLinkAttributes |
478 | $extLinkAttribs = $env->getExternalLinkAttribs( $href ); |
479 | foreach ( $extLinkAttribs as $key => $val ) { |
480 | if ( $key === 'rel' ) { |
481 | foreach ( $val as $v ) { |
482 | DOMUtils::addRel( $anchor, $v ); |
483 | } |
484 | } else { |
485 | $anchor->setAttribute( $key, $val ); |
486 | } |
487 | } |
488 | } else { |
489 | $link = $env->makeTitleFromText( $val, null, true ); |
490 | if ( $link !== null ) { |
491 | $anchor->setAttribute( 'href', $env->makeLink( $link ) ); |
492 | $anchor->setAttribute( 'title', $link->getPrefixedText() ); |
493 | } else { |
494 | // Treat same as if link weren't present |
495 | $addDescriptionLink( $attrs['title'] ); |
496 | // but preserve for roundtripping |
497 | $discard = false; |
498 | } |
499 | } |
500 | if ( $discard ) { |
501 | WTSUtils::getAttrFromDataMw( $dataMw, 'link', /* keep */false ); |
502 | } |
503 | } else { |
504 | $addDescriptionLink( $attrs['title'] ); |
505 | } |
506 | } else { |
507 | $anchor = $doc->createElement( 'span' ); |
508 | } |
509 | |
510 | if ( $captionText ) { |
511 | $anchor->setAttribute( 'title', $captionText ); |
512 | } |
513 | |
514 | $oldAnchor->parentNode->replaceChild( $anchor, $oldAnchor ); |
515 | return $anchor; |
516 | } |
517 | |
518 | /** |
519 | * @inheritDoc |
520 | */ |
521 | public function run( |
522 | Env $env, Node $root, array $options = [], bool $atTopLevel = false |
523 | ): void { |
524 | '@phan-var Element|DocumentFragment $root'; // @var Element|DocumentFragment $root |
525 | $urlParser = new PegTokenizer( $env ); |
526 | |
527 | $validContainers = []; |
528 | $files = []; |
529 | |
530 | $containers = DOMCompat::querySelectorAll( $root, '[typeof*="mw:File"]' ); |
531 | |
532 | foreach ( $containers as $container ) { |
533 | // DOMFragmentWrappers assume the element name of their outermost |
534 | // content so, depending how the above query is written, we're |
535 | // protecting against getting a figure of the wrong type. However, |
536 | // since we're currently using typeof, it shouldn't be a problem. |
537 | // Also note that info for the media nested in the fragment has |
538 | // already been added in their respective pipeline. |
539 | Assert::invariant( |
540 | !WTUtils::isDOMFragmentWrapper( $container ), |
541 | 'Media info for fragment was already added' |
542 | ); |
543 | |
544 | // We expect this structure to be predictable based on how it's |
545 | // emitted in the TT/WikiLinkHandler but treebuilding may have |
546 | // messed that up for us. |
547 | $anchor = $container; |
548 | $reopenedAFE = []; |
549 | do { |
550 | // An active formatting element may have been reopened inside |
551 | // the wrapper if a content model violation was encountered |
552 | // during treebuiling. Try to be a little lenient about that |
553 | // instead of bailing out |
554 | $anchor = $anchor->firstChild; |
555 | $anchorNodeName = DOMCompat::nodeName( $anchor ); |
556 | if ( $anchorNodeName !== 'a' ) { |
557 | $reopenedAFE[] = $anchor; |
558 | } |
559 | } while ( |
560 | $anchorNodeName !== 'a' && |
561 | isset( Consts::$HTML['FormattingTags'][$anchorNodeName] ) |
562 | ); |
563 | if ( $anchorNodeName !== 'a' ) { |
564 | $env->log( 'error', 'Unexpected structure when adding media info.' ); |
565 | continue; |
566 | } |
567 | $span = $anchor->firstChild; |
568 | if ( !( $span instanceof Element && DOMCompat::nodeName( $span ) === 'span' ) ) { |
569 | $env->log( 'error', 'Unexpected structure when adding media info.' ); |
570 | continue; |
571 | } |
572 | $caption = $anchor->nextSibling; |
573 | $isInlineMedia = WTUtils::isInlineMedia( $container ); |
574 | if ( !$isInlineMedia && DOMCompat::nodeName( $caption ) !== 'figcaption' ) { |
575 | $env->log( 'error', 'Unexpected structure when adding media info.' ); |
576 | continue; |
577 | } |
578 | |
579 | // For T314059. Migrate any active formatting tags we found open |
580 | // inside the container to the ficaption to conform to the spec. |
581 | // This should simplify selectors for clients and styling. |
582 | // TODO: Consider exposing these as lints |
583 | if ( $reopenedAFE ) { |
584 | $firstAFE = $reopenedAFE[0]; |
585 | $lastAFE = $reopenedAFE[count( $reopenedAFE ) - 1]; |
586 | DOMUtils::migrateChildren( $lastAFE, $container ); |
587 | if ( $isInlineMedia ) { |
588 | // Remove the formatting elements, they are of no use |
589 | // We could migrate them into the caption in data-mw, |
590 | // but that doesn't seem worthwhile |
591 | $firstAFE->parentNode->removeChild( $firstAFE ); |
592 | } else { |
593 | // Move the formatting elements into the figcaption |
594 | DOMUtils::migrateChildren( $caption, $lastAFE ); |
595 | $caption->appendChild( $firstAFE ); |
596 | // Unconditionally clear tsr out of an abundance of caution |
597 | // These tags should already be annotated as autoinserted anyways |
598 | foreach ( $reopenedAFE as $afe ) { |
599 | DOMDataUtils::getDataParsoid( $afe )->tsr = null; |
600 | } |
601 | } |
602 | } |
603 | |
604 | $dataMw = DOMDataUtils::getDataMw( $container ); |
605 | |
606 | $dims = [ |
607 | 'width' => (int)DOMCompat::getAttribute( $span, 'data-width' ) ?: null, |
608 | 'height' => (int)DOMCompat::getAttribute( $span, 'data-height' ) ?: null, |
609 | ]; |
610 | |
611 | $page = WTSUtils::getAttrFromDataMw( $dataMw, 'page', true ); |
612 | if ( $page ) { |
613 | $dims['page'] = $page->value['txt']; |
614 | } |
615 | |
616 | $lang = DOMCompat::getAttribute( $span, 'lang' ); |
617 | if ( $lang !== null ) { |
618 | $dims['lang'] = $lang; |
619 | } |
620 | |
621 | // "starttime" should be used if "thumbtime" isn't present, |
622 | // but only for rendering. |
623 | // "starttime" should be used if "thumbtime" isn't present, |
624 | // but only for rendering. |
625 | $thumbtime = WTSUtils::getAttrFromDataMw( $dataMw, 'thumbtime', true ); |
626 | $starttime = WTSUtils::getAttrFromDataMw( $dataMw, 'starttime', true ); |
627 | if ( $thumbtime || $starttime ) { |
628 | $seek = isset( $thumbtime->value ) |
629 | ? $thumbtime->value['txt'] |
630 | : ( isset( $starttime->value ) ? $starttime->value['txt'] : '' ); |
631 | $seek = self::parseTimeString( $seek ); |
632 | if ( $seek !== null ) { |
633 | $dims['seek'] = $seek; |
634 | } |
635 | } |
636 | |
637 | $attrs = [ |
638 | 'dims' => $dims, |
639 | 'format' => WTUtils::getMediaFormat( $container ), |
640 | 'title' => $env->makeTitleFromText( $span->textContent ), |
641 | ]; |
642 | |
643 | $file = [ $attrs['title']->getDBKey(), $dims ]; |
644 | $infoKey = md5( json_encode( $file ) ); |
645 | $files[$infoKey] = $file; |
646 | $errs = []; |
647 | |
648 | $manualKey = null; |
649 | $manualthumb = WTSUtils::getAttrFromDataMw( $dataMw, 'manualthumb', true ); |
650 | if ( $manualthumb !== null ) { |
651 | $val = $manualthumb->value['txt']; |
652 | $title = $env->makeTitleFromText( $val, $attrs['title']->getNamespace(), true ); |
653 | if ( $title === null ) { |
654 | $errs[] = self::makeErr( |
655 | 'apierror-invalidtitle', |
656 | 'Invalid thumbnail title.', |
657 | [ 'name' => $val ] |
658 | ); |
659 | } else { |
660 | $file = [ $title->getDBkey(), $dims ]; |
661 | $manualKey = md5( json_encode( $file ) ); |
662 | $files[$manualKey] = $file; |
663 | } |
664 | } |
665 | |
666 | $validContainers[] = [ |
667 | 'container' => $container, |
668 | 'attrs' => $attrs, |
669 | // Pass the anchor because we did some work to find it above |
670 | 'anchor' => $anchor, |
671 | 'infoKey' => $infoKey, |
672 | 'manualKey' => $manualKey, |
673 | 'errs' => $errs, |
674 | ]; |
675 | } |
676 | |
677 | if ( !$validContainers ) { |
678 | return; |
679 | } |
680 | |
681 | $start = microtime( true ); |
682 | |
683 | $infos = $env->getDataAccess()->getFileInfo( |
684 | $env->getPageConfig(), |
685 | array_values( $files ) |
686 | ); |
687 | |
688 | if ( $env->profiling() ) { |
689 | $profile = $env->getCurrentProfile(); |
690 | $profile->bumpMWTime( "Media", 1000 * ( microtime( true ) - $start ), "api" ); |
691 | $profile->bumpCount( "Media" ); |
692 | } |
693 | |
694 | $files = array_combine( |
695 | array_keys( $files ), |
696 | $infos |
697 | ); |
698 | |
699 | $hasThumb = false; |
700 | $needsTMHModules = false; |
701 | |
702 | foreach ( $validContainers as $c ) { |
703 | $container = $c['container']; |
704 | $anchor = $c['anchor']; |
705 | $span = $anchor->firstChild; |
706 | $attrs = $c['attrs']; |
707 | $dataMw = DOMDataUtils::getDataMw( $container ); |
708 | $errs = $c['errs']; |
709 | |
710 | $hasThumb = $hasThumb || DOMUtils::hasTypeOf( $container, 'mw:File/Thumb' ); |
711 | |
712 | $info = $files[$c['infoKey']]; |
713 | if ( !$info ) { |
714 | $env->getDataAccess()->addTrackingCategory( |
715 | $env->getPageConfig(), |
716 | $env->getMetadata(), |
717 | 'broken-file-category' |
718 | ); |
719 | $errs[] = self::makeErr( 'apierror-filedoesnotexist', 'This image does not exist.' ); |
720 | } elseif ( isset( $info['thumberror'] ) ) { |
721 | $errs[] = self::makeErr( 'apierror-unknownerror', $info['thumberror'] ); |
722 | } |
723 | |
724 | // FIXME: Should we fallback to $info if there are errors with $manualinfo? |
725 | // What does the legacy parser do? |
726 | if ( $c['manualKey'] !== null ) { |
727 | $manualinfo = $files[$c['manualKey']]; |
728 | if ( !$manualinfo ) { |
729 | $errs[] = self::makeErr( 'apierror-filedoesnotexist', 'This image does not exist.' ); |
730 | } elseif ( isset( $manualinfo['thumberror'] ) ) { |
731 | $errs[] = self::makeErr( 'apierror-unknownerror', $manualinfo['thumberror'] ); |
732 | } else { |
733 | $info = $manualinfo; |
734 | } |
735 | } |
736 | |
737 | if ( $info['badFile'] ?? false ) { |
738 | $errs[] = self::makeErr( 'apierror-badfile', 'This image is on the bad file list.' ); |
739 | } |
740 | |
741 | if ( WTUtils::hasVisibleCaption( $container ) ) { |
742 | $captionText = null; |
743 | } else { |
744 | if ( WTUtils::isInlineMedia( $container ) ) { |
745 | $caption = ContentUtils::createAndLoadDocumentFragment( |
746 | $container->ownerDocument, $dataMw->caption ?? '' |
747 | ); |
748 | } else { |
749 | $caption = DOMCompat::querySelector( $container, 'figcaption' ); |
750 | // If the caption had tokens, it was placed in a DOMFragment |
751 | // and we haven't unpacked yet |
752 | if ( |
753 | $caption->firstChild && |
754 | DOMUtils::hasTypeOf( $caption->firstChild, 'mw:DOMFragment' ) |
755 | ) { |
756 | $id = DOMDataUtils::getDataParsoid( $caption->firstChild )->html; |
757 | $caption = $env->getDOMFragment( $id ); |
758 | } |
759 | } |
760 | $captionText = trim( WTUtils::textContentFromCaption( $caption ) ); |
761 | |
762 | // The sanitizer isn't going to do anything with a string value |
763 | // for alt/title and since we're going to use dom element setters, |
764 | // quote escaping should be fine. Note that if sanitization does |
765 | // happen here, it should also be done to $altFromCaption so that |
766 | // string comparison matches, where necessary. |
767 | // |
768 | // $sanitizedArgs = Sanitizer::sanitizeTagAttrs( $env->getSiteConfig(), 'img', null, [ |
769 | // new KV( 'alt', $captionText ) // Could be a 'title' too |
770 | // ] ); |
771 | // $captionText = $sanitizedArgs['alt']->key; |
772 | } |
773 | |
774 | // Info relates to the thumb, not necessarily the file. |
775 | // The distinction matters for manualthumb, in which case only |
776 | // the "resource" copied over from the span relates to the file. |
777 | |
778 | switch ( $info['mediatype'] ?? '' ) { |
779 | case 'AUDIO': |
780 | $handler = 'handleAudio'; |
781 | $isImage = false; |
782 | break; |
783 | case 'VIDEO': |
784 | $handler = 'handleVideo'; |
785 | $isImage = false; |
786 | break; |
787 | default: |
788 | $handler = 'handleImage'; |
789 | $isImage = true; |
790 | break; |
791 | } |
792 | |
793 | $alt = null; |
794 | $keepAltInDataMw = !$isImage || $errs; |
795 | $attr = WTSUtils::getAttrFromDataMw( $dataMw, 'alt', $keepAltInDataMw ); |
796 | if ( $attr !== null ) { |
797 | $alt = $attr->value['txt']; |
798 | } elseif ( $captionText ) { |
799 | $alt = $captionText; |
800 | } |
801 | |
802 | // Add mw:Error to the RDFa type. |
803 | if ( $errs ) { |
804 | self::handleErrors( $container, $span, $errs, $dataMw, $alt ); |
805 | continue; |
806 | } |
807 | |
808 | $needsTMHModules = $needsTMHModules || !$isImage; |
809 | |
810 | $env->getMetadata()->addImage( |
811 | $attrs['title'], |
812 | $info['timestamp'] ?? null, |
813 | $info['sha1'] ?? null, |
814 | ); |
815 | |
816 | $elt = self::$handler( $env, $span, $attrs, $info, $dataMw, $container, $alt ); |
817 | DOMCompat::getClassList( $elt )->add( 'mw-file-element' ); |
818 | |
819 | $anchor = self::replaceAnchor( |
820 | $env, $urlParser, $container, $anchor, $attrs, $dataMw, $isImage, $captionText, |
821 | (int)( $attrs['dims']['page'] ?? 0 ), |
822 | $attrs['dims']['lang'] ?? '' |
823 | ); |
824 | $anchor->appendChild( $elt ); |
825 | |
826 | if ( isset( $dataMw->attribs ) && count( $dataMw->attribs ) === 0 ) { |
827 | unset( $dataMw->attribs ); |
828 | } |
829 | } |
830 | |
831 | if ( $hasThumb ) { |
832 | $env->getMetadata()->appendOutputStrings( CMCSS::MODULE, [ 'mediawiki.page.media' ] ); |
833 | } |
834 | |
835 | if ( $needsTMHModules ) { |
836 | $env->getMetadata()->appendOutputStrings( CMCSS::MODULE_STYLE, [ 'ext.tmh.player.styles' ] ); |
837 | $env->getMetadata()->appendOutputStrings( CMCSS::MODULE, [ 'ext.tmh.player' ] ); |
838 | } |
839 | } |
840 | } |