Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 417 |
|
0.00% |
0 / 14 |
CRAP | |
0.00% |
0 / 1 |
AddMediaInfo | |
0.00% |
0 / 417 |
|
0.00% |
0 / 14 |
18360 | |
0.00% |
0 / 1 |
handleSize | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
420 | |||
parseTimeString | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
56 | |||
parseFrag | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
56 | |||
addSources | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
56 | |||
addTracks | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 | |||
getPath | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
handleAudio | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
30 | |||
handleVideo | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
42 | |||
handleImage | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
42 | |||
makeErr | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
handleErrors | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
copyOverAttribute | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
replaceAnchor | |
0.00% |
0 / 44 |
|
0.00% |
0 / 1 |
210 | |||
run | |
0.00% |
0 / 184 |
|
0.00% |
0 / 1 |
2450 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\PP\Processors; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Parsoid\Config\Env; |
8 | use Wikimedia\Parsoid\Core\Sanitizer; |
9 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
10 | use Wikimedia\Parsoid\DOM\Element; |
11 | use Wikimedia\Parsoid\DOM\Node; |
12 | use Wikimedia\Parsoid\Html2Wt\WTSUtils; |
13 | use Wikimedia\Parsoid\NodeData\DataMw; |
14 | use Wikimedia\Parsoid\Utils\ContentUtils; |
15 | use Wikimedia\Parsoid\Utils\DOMCompat; |
16 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
17 | use Wikimedia\Parsoid\Utils\DOMUtils; |
18 | use Wikimedia\Parsoid\Utils\Title; |
19 | use Wikimedia\Parsoid\Utils\WTUtils; |
20 | use Wikimedia\Parsoid\Wikitext\Consts; |
21 | use Wikimedia\Parsoid\Wt2Html\PegTokenizer; |
22 | use Wikimedia\Parsoid\Wt2Html\Wt2HtmlDOMProcessor; |
23 | |
24 | class AddMediaInfo implements Wt2HtmlDOMProcessor { |
25 | /** |
26 | * Extract the dimensions for media. |
27 | * |
28 | * @param Env $env |
29 | * @param array $attrs |
30 | * @param array $info |
31 | * @phan-param array{size:array{height?:int,width?:int},format:string} $attrs |
32 | * @return array |
33 | */ |
34 | private static function handleSize( Env $env, array $attrs, array $info ): array { |
35 | $height = $info['height']; |
36 | $width = $info['width']; |
37 | |
38 | Assert::invariant( |
39 | is_numeric( $height ) && $height !== NAN, |
40 | 'Expected $height as a valid number' |
41 | ); |
42 | Assert::invariant( |
43 | is_numeric( $width ) && $width !== NAN, |
44 | 'Expected $width as a valid number' |
45 | ); |
46 | |
47 | if ( !empty( $info['thumburl'] ) && !empty( $info['thumbheight'] ) ) { |
48 | $height = $info['thumbheight']; |
49 | } |
50 | |
51 | if ( !empty( $info['thumburl'] ) && !empty( $info['thumbwidth'] ) ) { |
52 | $width = $info['thumbwidth']; |
53 | } |
54 | |
55 | // Audio files don't have dimensions, so we fallback to these arbitrary |
56 | // defaults, and the "mw-default-audio-height" class is added. |
57 | if ( $info['mediatype'] === 'AUDIO' ) { |
58 | $height = /* height || */32; // Arguably, audio should respect a defined height |
59 | $width = $width ?: $env->getSiteConfig()->widthOption(); |
60 | } |
61 | |
62 | // Handle client-side upscaling (including 'border') |
63 | |
64 | $mustRender = $info['mustRender'] ?? $info['mediatype'] !== 'BITMAP'; |
65 | |
66 | // Calculate the scaling ratio from the user-specified width and height |
67 | $ratio = null; |
68 | if ( !empty( $attrs['dims']['height'] ) && !empty( $info['height'] ) ) { |
69 | $ratio = $attrs['dims']['height'] / $info['height']; |
70 | } |
71 | if ( !empty( $attrs['dims']['width'] ) && !empty( $info['width'] ) ) { |
72 | $r = $attrs['dims']['width'] / $info['width']; |
73 | $ratio = ( $ratio === null || $r < $ratio ) ? $r : $ratio; |
74 | } |
75 | |
76 | // If the user requested upscaling, then this is denied in the thumbnail |
77 | // and frameless format, except for files with mustRender. |
78 | if ( |
79 | $ratio !== null && $ratio > 1 && !$mustRender && |
80 | ( $attrs['format'] === 'Thumb' || $attrs['format'] === 'Frameless' ) |
81 | ) { |
82 | // Upscaling denied |
83 | $height = $info['height']; |
84 | $width = $info['width']; |
85 | } |
86 | |
87 | return [ 'height' => $height, 'width' => $width ]; |
88 | } |
89 | |
90 | /** |
91 | * This is a port of TMH's parseTimeString() |
92 | * |
93 | * @param string $timeString |
94 | * @param int|float|null $length |
95 | * @return int|float|null |
96 | */ |
97 | private static function parseTimeString( |
98 | string $timeString, $length = null |
99 | ) { |
100 | $parts = explode( ':', $timeString ); |
101 | $time = 0; |
102 | $countParts = count( $parts ); |
103 | if ( $countParts > 3 ) { |
104 | return null; |
105 | } |
106 | for ( $i = 0; $i < $countParts; $i++ ) { |
107 | if ( !is_numeric( $parts[$i] ) ) { |
108 | return null; |
109 | } |
110 | $time += floatval( $parts[$i] ) * pow( 60, $countParts - 1 - $i ); |
111 | } |
112 | if ( $time < 0 ) { |
113 | $time = 0; |
114 | } elseif ( $length !== null ) { |
115 | if ( $time > $length ) { |
116 | $time = $length - 1; |
117 | } |
118 | } |
119 | return $time; |
120 | } |
121 | |
122 | /** |
123 | * Handle media fragments |
124 | * https://www.w3.org/TR/media-frags/ |
125 | * |
126 | * @param array $info |
127 | * @param DataMw $dataMw |
128 | * @return string |
129 | */ |
130 | private static function parseFrag( array $info, DataMw $dataMw ): string { |
131 | $frag = ''; |
132 | $starttime = WTSUtils::getAttrFromDataMw( $dataMw, 'starttime', true ); |
133 | $endtime = WTSUtils::getAttrFromDataMw( $dataMw, 'endtime', true ); |
134 | if ( $starttime || $endtime ) { |
135 | $frag .= '#t='; |
136 | if ( $starttime ) { |
137 | $time = self::parseTimeString( $starttime[1]->txt, $info['duration'] ?? null ); |
138 | if ( $time !== null ) { |
139 | $frag .= $time; |
140 | } |
141 | } |
142 | if ( $endtime ) { |
143 | $time = self::parseTimeString( $endtime[1]->txt, $info['duration'] ?? null ); |
144 | if ( $time !== null ) { |
145 | $frag .= ',' . $time; |
146 | } |
147 | } |
148 | } |
149 | return $frag; |
150 | } |
151 | |
152 | private static function addSources( |
153 | Element $elt, array $info, DataMw $dataMw, bool $hasDimension |
154 | ): void { |
155 | $doc = $elt->ownerDocument; |
156 | $frag = self::parseFrag( $info, $dataMw ); |
157 | |
158 | if ( is_array( $info['thumbdata']['derivatives'] ?? null ) ) { |
159 | // BatchAPI's `getAPIData` |
160 | $derivatives = $info['thumbdata']['derivatives']; |
161 | } elseif ( is_array( $info['derivatives'] ?? null ) ) { |
162 | // "videoinfo" prop |
163 | $derivatives = $info['derivatives']; |
164 | } else { |
165 | $derivatives = [ |
166 | [ |
167 | 'src' => $info['url'], |
168 | 'type' => $info['mime'], |
169 | 'width' => (string)$info['width'], |
170 | 'height' => (string)$info['height'], |
171 | ], |
172 | ]; |
173 | } |
174 | |
175 | foreach ( $derivatives as $o ) { |
176 | $source = $doc->createElement( 'source' ); |
177 | $source->setAttribute( 'src', $o['src'] . $frag ); |
178 | $source->setAttribute( 'type', $o['type'] ); // T339375 |
179 | $fromFile = isset( $o['transcodekey'] ) ? '' : '-file'; |
180 | if ( $hasDimension ) { |
181 | $source->setAttribute( 'data' . $fromFile . '-width', (string)$o['width'] ); |
182 | $source->setAttribute( 'data' . $fromFile . '-height', (string)$o['height'] ); |
183 | } |
184 | if ( !$fromFile ) { |
185 | $source->setAttribute( 'data-transcodekey', $o['transcodekey'] ); |
186 | } |
187 | $elt->appendChild( $source ); |
188 | } |
189 | } |
190 | |
191 | private static function addTracks( Element $elt, array $info ): void { |
192 | $doc = $elt->ownerDocument; |
193 | if ( is_array( $info['thumbdata']['timedtext'] ?? null ) ) { |
194 | // BatchAPI's `getAPIData` |
195 | $timedtext = $info['thumbdata']['timedtext']; |
196 | } elseif ( is_array( $info['timedtext'] ?? null ) ) { |
197 | // "videoinfo" prop |
198 | $timedtext = $info['timedtext']; |
199 | } else { |
200 | $timedtext = []; |
201 | } |
202 | foreach ( $timedtext as $o ) { |
203 | $track = $doc->createElement( 'track' ); |
204 | $track->setAttribute( 'kind', $o['kind'] ?? '' ); |
205 | $track->setAttribute( 'type', $o['type'] ?? '' ); |
206 | $track->setAttribute( 'src', $o['src'] ?? '' ); |
207 | $track->setAttribute( 'srclang', $o['srclang'] ?? '' ); |
208 | $track->setAttribute( 'label', $o['label'] ?? '' ); |
209 | $track->setAttribute( 'data-mwtitle', $o['title'] ?? '' ); |
210 | $track->setAttribute( 'data-dir', $o['dir'] ?? '' ); |
211 | $elt->appendChild( $track ); |
212 | } |
213 | } |
214 | |
215 | /** |
216 | * Abstract way to get the path for an image given an info object. |
217 | * |
218 | * @param array $info |
219 | * @return string |
220 | */ |
221 | private static function getPath( array $info ) { |
222 | $path = ''; |
223 | if ( !empty( $info['thumburl'] ) ) { |
224 | $path = $info['thumburl']; |
225 | } elseif ( !empty( $info['url'] ) ) { |
226 | $path = $info['url']; |
227 | } |
228 | return $path; |
229 | } |
230 | |
231 | /** |
232 | * @param Env $env |
233 | * @param Element $span |
234 | * @param array $attrs |
235 | * @param array $info |
236 | * @param DataMw $dataMw |
237 | * @param Element $container |
238 | * @param string|null $alt Unused, but matches the signature of handlers |
239 | * @return Element |
240 | */ |
241 | private static function handleAudio( |
242 | Env $env, Element $span, array $attrs, array $info, DataMw $dataMw, |
243 | Element $container, ?string $alt |
244 | ): Element { |
245 | $doc = $span->ownerDocument; |
246 | $audio = $doc->createElement( 'audio' ); |
247 | |
248 | $audio->setAttribute( 'controls', '' ); |
249 | $audio->setAttribute( 'preload', 'none' ); |
250 | |
251 | $muted = WTSUtils::getAttrFromDataMw( $dataMw, 'muted', false ); |
252 | if ( $muted ) { |
253 | $audio->setAttribute( 'muted', '' ); |
254 | } |
255 | $loop = WTSUtils::getAttrFromDataMw( $dataMw, 'loop', false ); |
256 | if ( $loop ) { |
257 | $audio->setAttribute( 'loop', '' ); |
258 | } |
259 | |
260 | $size = self::handleSize( $env, $attrs, $info ); |
261 | DOMDataUtils::addNormalizedAttribute( $audio, 'height', (string)$size['height'], null, true ); |
262 | DOMDataUtils::addNormalizedAttribute( $audio, 'width', (string)$size['width'], null, true ); |
263 | |
264 | // Hardcoded until defined heights are respected. |
265 | // See `AddMediaInfo.handleSize` |
266 | DOMCompat::getClassList( $container )->add( 'mw-default-audio-height' ); |
267 | |
268 | self::copyOverAttribute( $audio, $span, 'resource' ); |
269 | |
270 | if ( $span->hasAttribute( 'lang' ) ) { |
271 | self::copyOverAttribute( $audio, $span, 'lang' ); |
272 | } |
273 | |
274 | if ( $info['duration'] ?? null ) { |
275 | $audio->setAttribute( 'data-durationhint', (string)ceil( (float)$info['duration'] ) ); |
276 | } |
277 | |
278 | self::addSources( $audio, $info, $dataMw, false ); |
279 | self::addTracks( $audio, $info ); |
280 | |
281 | return $audio; |
282 | } |
283 | |
284 | /** |
285 | * @param Env $env |
286 | * @param Element $span |
287 | * @param array $attrs |
288 | * @param array $info |
289 | * @param DataMw $dataMw |
290 | * @param Element $container |
291 | * @param string|null $alt Unused, but matches the signature of handlers |
292 | * @return Element |
293 | */ |
294 | private static function handleVideo( |
295 | Env $env, Element $span, array $attrs, array $info, DataMw $dataMw, |
296 | Element $container, ?string $alt |
297 | ): Element { |
298 | $doc = $span->ownerDocument; |
299 | $video = $doc->createElement( 'video' ); |
300 | |
301 | if ( !empty( $info['thumburl'] ) ) { |
302 | $video->setAttribute( 'poster', self::getPath( $info ) ); |
303 | } |
304 | |
305 | $video->setAttribute( 'controls', '' ); |
306 | $video->setAttribute( 'preload', 'none' ); |
307 | |
308 | $muted = WTSUtils::getAttrFromDataMw( $dataMw, 'muted', false ); |
309 | if ( $muted ) { |
310 | $video->setAttribute( 'muted', '' ); |
311 | } |
312 | $loop = WTSUtils::getAttrFromDataMw( $dataMw, 'loop', false ); |
313 | if ( $loop ) { |
314 | $video->setAttribute( 'loop', '' ); |
315 | } |
316 | |
317 | $size = self::handleSize( $env, $attrs, $info ); |
318 | DOMDataUtils::addNormalizedAttribute( $video, 'height', (string)$size['height'], null, true ); |
319 | DOMDataUtils::addNormalizedAttribute( $video, 'width', (string)$size['width'], null, true ); |
320 | |
321 | self::copyOverAttribute( $video, $span, 'resource' ); |
322 | |
323 | if ( $span->hasAttribute( 'lang' ) ) { |
324 | self::copyOverAttribute( $video, $span, 'lang' ); |
325 | } |
326 | |
327 | if ( $info['duration'] ?? null ) { |
328 | $video->setAttribute( 'data-durationhint', (string)ceil( (float)$info['duration'] ) ); |
329 | } |
330 | |
331 | self::addSources( $video, $info, $dataMw, true ); |
332 | self::addTracks( $video, $info ); |
333 | |
334 | return $video; |
335 | } |
336 | |
337 | /** |
338 | * Set up the actual image structure, attributes, etc. |
339 | * |
340 | * @param Env $env |
341 | * @param Element $span |
342 | * @param array $attrs |
343 | * @param array $info |
344 | * @param DataMw $dataMw |
345 | * @param Element $container |
346 | * @param string|null $alt |
347 | * @return Element |
348 | */ |
349 | private static function handleImage( |
350 | Env $env, Element $span, array $attrs, array $info, DataMw $dataMw, |
351 | Element $container, ?string $alt |
352 | ): Element { |
353 | $doc = $span->ownerDocument; |
354 | $img = $doc->createElement( 'img' ); |
355 | |
356 | if ( $alt !== null ) { |
357 | $img->setAttribute( 'alt', $alt ); |
358 | } |
359 | |
360 | self::copyOverAttribute( $img, $span, 'resource' ); |
361 | |
362 | $img->setAttribute( 'src', self::getPath( $info ) ); |
363 | $img->setAttribute( 'decoding', 'async' ); |
364 | |
365 | if ( $span->hasAttribute( 'lang' ) ) { |
366 | self::copyOverAttribute( $img, $span, 'lang' ); |
367 | } |
368 | |
369 | // Add (read-only) information about original file size (T64881) |
370 | $img->setAttribute( 'data-file-width', (string)$info['width'] ); |
371 | $img->setAttribute( 'data-file-height', (string)$info['height'] ); |
372 | $img->setAttribute( 'data-file-type', strtolower( $info['mediatype'] ?? '' ) ); |
373 | |
374 | $size = self::handleSize( $env, $attrs, $info ); |
375 | DOMDataUtils::addNormalizedAttribute( $img, 'height', (string)$size['height'], null, true ); |
376 | DOMDataUtils::addNormalizedAttribute( $img, 'width', (string)$size['width'], null, true ); |
377 | |
378 | // Handle "responsive" images, i.e. srcset |
379 | if ( !empty( $info['responsiveUrls'] ) ) { |
380 | $candidates = []; |
381 | foreach ( $info['responsiveUrls'] as $density => $url ) { |
382 | $candidates[] = $url . ' ' . $density . 'x'; |
383 | } |
384 | if ( $candidates ) { |
385 | $img->setAttribute( 'srcset', implode( ', ', $candidates ) ); |
386 | } |
387 | } |
388 | |
389 | return $img; |
390 | } |
391 | |
392 | private static function makeErr( |
393 | string $key, string $message, ?array $params = null |
394 | ): array { |
395 | $e = [ 'key' => $key, 'message' => $message ]; |
396 | // Additional error info for clients that could fix the error. |
397 | if ( $params !== null ) { |
398 | $e['params'] = $params; |
399 | } |
400 | return $e; |
401 | } |
402 | |
403 | private static function handleErrors( |
404 | Element $container, Element $span, array $errs, DataMw $dataMw, |
405 | ?string $alt |
406 | ): void { |
407 | if ( !DOMUtils::hasTypeOf( $container, 'mw:Error' ) ) { |
408 | DOMUtils::addTypeOf( $container, 'mw:Error', true ); |
409 | } |
410 | if ( is_array( $dataMw->errors ?? null ) ) { |
411 | $errs = array_merge( $dataMw->errors, $errs ); |
412 | } |
413 | $dataMw->errors = $errs; |
414 | if ( $alt !== null ) { |
415 | DOMCompat::replaceChildren( $span, $span->ownerDocument->createTextNode( $alt ) ); |
416 | } |
417 | } |
418 | |
419 | private static function copyOverAttribute( |
420 | Element $elt, Element $span, string $attribute |
421 | ): void { |
422 | DOMDataUtils::addNormalizedAttribute( |
423 | $elt, |
424 | $attribute, |
425 | DOMCompat::getAttribute( $span, $attribute ), |
426 | WTSUtils::getAttributeShadowInfo( $span, $attribute )['value'] |
427 | ); |
428 | } |
429 | |
430 | private static function replaceAnchor( |
431 | Env $env, PegTokenizer $urlParser, Element $container, |
432 | Element $oldAnchor, array $attrs, DataMw $dataMw, bool $isImage, |
433 | ?string $captionText, int $page, string $lang |
434 | ): Element { |
435 | $doc = $oldAnchor->ownerDocument; |
436 | $attr = WTSUtils::getAttrFromDataMw( $dataMw, 'link', true ); |
437 | |
438 | if ( $isImage ) { |
439 | $anchor = $doc->createElement( 'a' ); |
440 | $addDescriptionLink = static function ( Title $title ) use ( $env, $anchor, $page, $lang ) { |
441 | $href = $env->makeLink( $title ); |
442 | $qs = []; |
443 | if ( $page > 0 ) { |
444 | $qs['page'] = $page; |
445 | } |
446 | if ( $lang ) { |
447 | $qs['lang'] = $lang; |
448 | } |
449 | if ( $qs ) { |
450 | $href .= '?' . http_build_query( $qs ); |
451 | } |
452 | $anchor->setAttribute( 'href', $href ); |
453 | $anchor->setAttribute( 'class', 'mw-file-description' ); |
454 | }; |
455 | if ( $attr !== null ) { |
456 | $discard = true; |
457 | $val = $attr[1]->txt; |
458 | if ( $val === '' ) { |
459 | // No href if link= was specified |
460 | $anchor = $doc->createElement( 'span' ); |
461 | } elseif ( $urlParser->tokenizeURL( $val ) !== false ) { |
462 | // An external link! |
463 | $href = Sanitizer::cleanUrl( $env->getSiteConfig(), $val, 'external' ); |
464 | $anchor->setAttribute( 'href', $href ); |
465 | // Similar to AddLinkAttributes |
466 | $extLinkAttribs = $env->getExternalLinkAttribs( $href ); |
467 | foreach ( $extLinkAttribs as $key => $val ) { |
468 | if ( $key === 'rel' ) { |
469 | foreach ( $val as $v ) { |
470 | DOMUtils::addRel( $anchor, $v ); |
471 | } |
472 | } else { |
473 | $anchor->setAttribute( $key, $val ); |
474 | } |
475 | } |
476 | } else { |
477 | $link = $env->makeTitleFromText( $val, null, true ); |
478 | if ( $link !== null ) { |
479 | $anchor->setAttribute( 'href', $env->makeLink( $link ) ); |
480 | $anchor->setAttribute( 'title', $link->getPrefixedText() ); |
481 | } else { |
482 | // Treat same as if link weren't present |
483 | $addDescriptionLink( $attrs['title'] ); |
484 | // but preserve for roundtripping |
485 | $discard = false; |
486 | } |
487 | } |
488 | if ( $discard ) { |
489 | WTSUtils::getAttrFromDataMw( $dataMw, 'link', /* keep */false ); |
490 | } |
491 | } else { |
492 | $addDescriptionLink( $attrs['title'] ); |
493 | } |
494 | } else { |
495 | $anchor = $doc->createElement( 'span' ); |
496 | } |
497 | |
498 | if ( $captionText ) { |
499 | $anchor->setAttribute( 'title', $captionText ); |
500 | } |
501 | |
502 | $oldAnchor->parentNode->replaceChild( $anchor, $oldAnchor ); |
503 | return $anchor; |
504 | } |
505 | |
506 | /** |
507 | * @inheritDoc |
508 | */ |
509 | public function run( |
510 | Env $env, Node $root, array $options = [], bool $atTopLevel = false |
511 | ): void { |
512 | '@phan-var Element|DocumentFragment $root'; // @var Element|DocumentFragment $root |
513 | $urlParser = new PegTokenizer( $env ); |
514 | |
515 | $validContainers = []; |
516 | $files = []; |
517 | |
518 | $containers = DOMCompat::querySelectorAll( $root, '[typeof*="mw:File"]' ); |
519 | |
520 | foreach ( $containers as $container ) { |
521 | // DOMFragmentWrappers assume the element name of their outermost |
522 | // content so, depending how the above query is written, we're |
523 | // protecting against getting a figure of the wrong type. However, |
524 | // since we're currently using typeof, it shouldn't be a problem. |
525 | // Also note that info for the media nested in the fragment has |
526 | // already been added in their respective pipeline. |
527 | Assert::invariant( |
528 | !WTUtils::isDOMFragmentWrapper( $container ), |
529 | 'Media info for fragment was already added' |
530 | ); |
531 | |
532 | // We expect this structure to be predictable based on how it's |
533 | // emitted in the TT/WikiLinkHandler but treebuilding may have |
534 | // messed that up for us. |
535 | $anchor = $container; |
536 | $reopenedAFE = []; |
537 | do { |
538 | // An active formatting element may have been reopened inside |
539 | // the wrapper if a content model violation was encountered |
540 | // during treebuiling. Try to be a little lenient about that |
541 | // instead of bailing out |
542 | $anchor = $anchor->firstChild; |
543 | $anchorNodeName = DOMCompat::nodeName( $anchor ); |
544 | if ( $anchorNodeName !== 'a' ) { |
545 | $reopenedAFE[] = $anchor; |
546 | } |
547 | } while ( |
548 | $anchorNodeName !== 'a' && |
549 | isset( Consts::$HTML['FormattingTags'][$anchorNodeName] ) |
550 | ); |
551 | if ( $anchorNodeName !== 'a' ) { |
552 | $env->log( 'error', 'Unexpected structure when adding media info.' ); |
553 | continue; |
554 | } |
555 | $span = $anchor->firstChild; |
556 | if ( !( $span instanceof Element && DOMCompat::nodeName( $span ) === 'span' ) ) { |
557 | $env->log( 'error', 'Unexpected structure when adding media info.' ); |
558 | continue; |
559 | } |
560 | $caption = $anchor->nextSibling; |
561 | $isInlineMedia = WTUtils::isInlineMedia( $container ); |
562 | if ( !$isInlineMedia && DOMCompat::nodeName( $caption ) !== 'figcaption' ) { |
563 | $env->log( 'error', 'Unexpected structure when adding media info.' ); |
564 | continue; |
565 | } |
566 | |
567 | // For T314059. Migrate any active formatting tags we found open |
568 | // inside the container to the ficaption to conform to the spec. |
569 | // This should simplify selectors for clients and styling. |
570 | // TODO: Consider exposing these as lints |
571 | if ( $reopenedAFE ) { |
572 | $firstAFE = $reopenedAFE[0]; |
573 | $lastAFE = $reopenedAFE[count( $reopenedAFE ) - 1]; |
574 | DOMUtils::migrateChildren( $lastAFE, $container ); |
575 | if ( $isInlineMedia ) { |
576 | // Remove the formatting elements, they are of no use |
577 | // We could migrate them into the caption in data-mw, |
578 | // but that doesn't seem worthwhile |
579 | $firstAFE->parentNode->removeChild( $firstAFE ); |
580 | } else { |
581 | // Move the formatting elements into the figcaption |
582 | DOMUtils::migrateChildren( $caption, $lastAFE ); |
583 | $caption->appendChild( $firstAFE ); |
584 | // Unconditionally clear tsr out of an abundance of caution |
585 | // These tags should already be annotated as autoinserted anyways |
586 | foreach ( $reopenedAFE as $afe ) { |
587 | DOMDataUtils::getDataParsoid( $afe )->tsr = null; |
588 | } |
589 | } |
590 | } |
591 | |
592 | $dataMw = DOMDataUtils::getDataMw( $container ); |
593 | |
594 | $dims = [ |
595 | 'width' => (int)DOMCompat::getAttribute( $span, 'data-width' ) ?: null, |
596 | 'height' => (int)DOMCompat::getAttribute( $span, 'data-height' ) ?: null, |
597 | ]; |
598 | |
599 | $page = WTSUtils::getAttrFromDataMw( $dataMw, 'page', true ); |
600 | if ( $page ) { |
601 | $dims['page'] = $page[1]->txt; |
602 | } |
603 | |
604 | $lang = DOMCompat::getAttribute( $span, 'lang' ); |
605 | if ( $lang !== null ) { |
606 | $dims['lang'] = $lang; |
607 | } |
608 | |
609 | // "starttime" should be used if "thumbtime" isn't present, |
610 | // but only for rendering. |
611 | // "starttime" should be used if "thumbtime" isn't present, |
612 | // but only for rendering. |
613 | $thumbtime = WTSUtils::getAttrFromDataMw( $dataMw, 'thumbtime', true ); |
614 | $starttime = WTSUtils::getAttrFromDataMw( $dataMw, 'starttime', true ); |
615 | if ( $thumbtime || $starttime ) { |
616 | $seek = isset( $thumbtime[1] ) |
617 | ? $thumbtime[1]->txt |
618 | : ( isset( $starttime[1] ) ? $starttime[1]->txt : '' ); |
619 | $seek = self::parseTimeString( $seek ); |
620 | if ( $seek !== null ) { |
621 | $dims['seek'] = $seek; |
622 | } |
623 | } |
624 | |
625 | $attrs = [ |
626 | 'dims' => $dims, |
627 | 'format' => WTUtils::getMediaFormat( $container ), |
628 | 'title' => $env->makeTitleFromText( $span->textContent ), |
629 | ]; |
630 | |
631 | $file = [ $attrs['title']->getKey(), $dims ]; |
632 | $infoKey = md5( json_encode( $file ) ); |
633 | $files[$infoKey] = $file; |
634 | $errs = []; |
635 | |
636 | $manualKey = null; |
637 | $manualthumb = WTSUtils::getAttrFromDataMw( $dataMw, 'manualthumb', true ); |
638 | if ( $manualthumb !== null ) { |
639 | $val = $manualthumb[1]->txt; |
640 | $title = $env->makeTitleFromText( $val, $attrs['title']->getNamespace(), true ); |
641 | if ( $title === null ) { |
642 | $errs[] = self::makeErr( |
643 | 'apierror-invalidtitle', |
644 | 'Invalid thumbnail title.', |
645 | [ 'name' => $val ] |
646 | ); |
647 | } else { |
648 | $file = [ $title->getKey(), $dims ]; |
649 | $manualKey = md5( json_encode( $file ) ); |
650 | $files[$manualKey] = $file; |
651 | } |
652 | } |
653 | |
654 | $validContainers[] = [ |
655 | 'container' => $container, |
656 | 'attrs' => $attrs, |
657 | // Pass the anchor because we did some work to find it above |
658 | 'anchor' => $anchor, |
659 | 'infoKey' => $infoKey, |
660 | 'manualKey' => $manualKey, |
661 | 'errs' => $errs, |
662 | ]; |
663 | } |
664 | |
665 | if ( !$validContainers ) { |
666 | return; |
667 | } |
668 | |
669 | $start = microtime( true ); |
670 | |
671 | $infos = $env->getDataAccess()->getFileInfo( |
672 | $env->getPageConfig(), |
673 | array_values( $files ) |
674 | ); |
675 | |
676 | if ( $env->profiling() ) { |
677 | $profile = $env->getCurrentProfile(); |
678 | $profile->bumpMWTime( "Media", 1000 * ( microtime( true ) - $start ), "api" ); |
679 | $profile->bumpCount( "Media" ); |
680 | } |
681 | |
682 | $files = array_combine( |
683 | array_keys( $files ), |
684 | $infos |
685 | ); |
686 | |
687 | $hasThumb = false; |
688 | $needsTMHModules = false; |
689 | |
690 | foreach ( $validContainers as $c ) { |
691 | $container = $c['container']; |
692 | $anchor = $c['anchor']; |
693 | $span = $anchor->firstChild; |
694 | $attrs = $c['attrs']; |
695 | $dataMw = DOMDataUtils::getDataMw( $container ); |
696 | $errs = $c['errs']; |
697 | |
698 | $hasThumb = $hasThumb || DOMUtils::hasTypeOf( $container, 'mw:File/Thumb' ); |
699 | |
700 | $info = $files[$c['infoKey']]; |
701 | if ( !$info ) { |
702 | $errs[] = self::makeErr( 'apierror-filedoesnotexist', 'This image does not exist.' ); |
703 | } elseif ( isset( $info['thumberror'] ) ) { |
704 | $errs[] = self::makeErr( 'apierror-unknownerror', $info['thumberror'] ); |
705 | } |
706 | |
707 | // FIXME: Should we fallback to $info if there are errors with $manualinfo? |
708 | // What does the legacy parser do? |
709 | if ( $c['manualKey'] !== null ) { |
710 | $manualinfo = $files[$c['manualKey']]; |
711 | if ( !$manualinfo ) { |
712 | $errs[] = self::makeErr( 'apierror-filedoesnotexist', 'This image does not exist.' ); |
713 | } elseif ( isset( $manualinfo['thumberror'] ) ) { |
714 | $errs[] = self::makeErr( 'apierror-unknownerror', $manualinfo['thumberror'] ); |
715 | } else { |
716 | $info = $manualinfo; |
717 | } |
718 | } |
719 | |
720 | if ( $info['badFile'] ?? false ) { |
721 | $errs[] = self::makeErr( 'apierror-badfile', 'This image is on the bad file list.' ); |
722 | } |
723 | |
724 | if ( WTUtils::hasVisibleCaption( $container ) ) { |
725 | $captionText = null; |
726 | } else { |
727 | if ( WTUtils::isInlineMedia( $container ) ) { |
728 | $caption = ContentUtils::createAndLoadDocumentFragment( |
729 | $container->ownerDocument, $dataMw->caption ?? '' |
730 | ); |
731 | } else { |
732 | $caption = DOMCompat::querySelector( $container, 'figcaption' ); |
733 | // If the caption had tokens, it was placed in a DOMFragment |
734 | // and we haven't unpacked yet |
735 | if ( |
736 | $caption->firstChild && |
737 | DOMUtils::hasTypeOf( $caption->firstChild, 'mw:DOMFragment' ) |
738 | ) { |
739 | $id = DOMDataUtils::getDataParsoid( $caption->firstChild )->html; |
740 | $caption = $env->getDOMFragment( $id ); |
741 | } |
742 | } |
743 | $captionText = trim( WTUtils::textContentFromCaption( $caption ) ); |
744 | |
745 | // The sanitizer isn't going to do anything with a string value |
746 | // for alt/title and since we're going to use dom element setters, |
747 | // quote escaping should be fine. Note that if sanitization does |
748 | // happen here, it should also be done to $altFromCaption so that |
749 | // string comparison matches, where necessary. |
750 | // |
751 | // $sanitizedArgs = Sanitizer::sanitizeTagAttrs( $env->getSiteConfig(), 'img', null, [ |
752 | // new KV( 'alt', $captionText ) // Could be a 'title' too |
753 | // ] ); |
754 | // $captionText = $sanitizedArgs['alt'][0]; |
755 | } |
756 | |
757 | // Info relates to the thumb, not necessarily the file. |
758 | // The distinction matters for manualthumb, in which case only |
759 | // the "resource" copied over from the span relates to the file. |
760 | |
761 | switch ( $info['mediatype'] ?? '' ) { |
762 | case 'AUDIO': |
763 | $handler = 'handleAudio'; |
764 | $isImage = false; |
765 | break; |
766 | case 'VIDEO': |
767 | $handler = 'handleVideo'; |
768 | $isImage = false; |
769 | break; |
770 | default: |
771 | $handler = 'handleImage'; |
772 | $isImage = true; |
773 | break; |
774 | } |
775 | |
776 | $alt = null; |
777 | $keepAltInDataMw = !$isImage || $errs; |
778 | $attr = WTSUtils::getAttrFromDataMw( $dataMw, 'alt', $keepAltInDataMw ); |
779 | if ( $attr !== null ) { |
780 | $alt = $attr[1]->txt; |
781 | } elseif ( $captionText ) { |
782 | $alt = $captionText; |
783 | } |
784 | |
785 | // Add mw:Error to the RDFa type. |
786 | if ( $errs ) { |
787 | self::handleErrors( $container, $span, $errs, $dataMw, $alt ); |
788 | continue; |
789 | } |
790 | |
791 | $needsTMHModules = $needsTMHModules || !$isImage; |
792 | |
793 | $env->getMetadata()->addImage( |
794 | $attrs['title'], |
795 | $info['timestamp'] ?? null, |
796 | $info['sha1'] ?? null, |
797 | ); |
798 | |
799 | $elt = self::$handler( $env, $span, $attrs, $info, $dataMw, $container, $alt ); |
800 | DOMCompat::getClassList( $elt )->add( 'mw-file-element' ); |
801 | |
802 | $anchor = self::replaceAnchor( |
803 | $env, $urlParser, $container, $anchor, $attrs, $dataMw, $isImage, $captionText, |
804 | (int)( $attrs['dims']['page'] ?? 0 ), |
805 | $attrs['dims']['lang'] ?? '' |
806 | ); |
807 | $anchor->appendChild( $elt ); |
808 | |
809 | if ( isset( $dataMw->attribs ) && count( $dataMw->attribs ) === 0 ) { |
810 | unset( $dataMw->attribs ); |
811 | } |
812 | } |
813 | |
814 | if ( $hasThumb ) { |
815 | $env->getMetadata()->addModules( [ 'mediawiki.page.media' ] ); |
816 | } |
817 | |
818 | if ( $needsTMHModules ) { |
819 | $env->getMetadata()->addModuleStyles( [ 'ext.tmh.player.styles' ] ); |
820 | $env->getMetadata()->addModules( [ 'ext.tmh.player' ] ); |
821 | } |
822 | } |
823 | } |