Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
4.44% |
34 / 766 |
|
0.00% |
0 / 25 |
CRAP | |
0.00% |
0 / 1 |
WikiLinkHandler | |
4.44% |
34 / 766 |
|
0.00% |
0 / 25 |
48017.58 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
hrefParts | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getWikiLinkTargetInfo | |
55.74% |
34 / 61 |
|
0.00% |
0 / 1 |
38.20 | |||
onRedirect | |
0.00% |
0 / 34 |
|
0.00% |
0 / 1 |
30 | |||
bailTokens | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
6 | |||
onWikiLink | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
56 | |||
wikiLinkHandler | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
156 | |||
buildLinkAttrs | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
156 | |||
addLinkAttributesAndGetContent | |
0.00% |
0 / 71 |
|
0.00% |
0 / 1 |
930 | |||
renderWikiLink | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
renderCategory | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
42 | |||
renderLanguageLink | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
30 | |||
renderInterwikiLink | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
30 | |||
getWrapperInfo | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
72 | |||
getOptionInfo | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
42 | |||
isWikitextOpt | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
stringifyOptionTokens | |
0.00% |
0 / 63 |
|
0.00% |
0 / 1 |
650 | |||
getFormat | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getUsed | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
6 | |||
hasTransclusion | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
renderFile | |
0.00% |
0 / 243 |
|
0.00% |
0 / 1 |
4290 | |||
specialFilePath | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
linkToMedia | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
30 | |||
renderMedia | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
onTag | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | /** |
5 | * Simple link handler. |
6 | * |
7 | * TODO: keep round-trip information in meta tag or the like |
8 | */ |
9 | |
10 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
11 | |
12 | use stdClass; |
13 | use Wikimedia\Assert\Assert; |
14 | use Wikimedia\Parsoid\Config\Env; |
15 | use Wikimedia\Parsoid\Core\DomSourceRange; |
16 | use Wikimedia\Parsoid\Core\InternalException; |
17 | use Wikimedia\Parsoid\Core\Sanitizer; |
18 | use Wikimedia\Parsoid\Language\Language; |
19 | use Wikimedia\Parsoid\NodeData\DataMw; |
20 | use Wikimedia\Parsoid\NodeData\DataMwAttrib; |
21 | use Wikimedia\Parsoid\NodeData\DataMwError; |
22 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
23 | use Wikimedia\Parsoid\NodeData\TempData; |
24 | use Wikimedia\Parsoid\Tokens\EndTagTk; |
25 | use Wikimedia\Parsoid\Tokens\EOFTk; |
26 | use Wikimedia\Parsoid\Tokens\KV; |
27 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
28 | use Wikimedia\Parsoid\Tokens\SourceRange; |
29 | use Wikimedia\Parsoid\Tokens\TagTk; |
30 | use Wikimedia\Parsoid\Tokens\Token; |
31 | use Wikimedia\Parsoid\Utils\ContentUtils; |
32 | use Wikimedia\Parsoid\Utils\DOMCompat; |
33 | use Wikimedia\Parsoid\Utils\DOMUtils; |
34 | use Wikimedia\Parsoid\Utils\PHPUtils; |
35 | use Wikimedia\Parsoid\Utils\PipelineUtils; |
36 | use Wikimedia\Parsoid\Utils\Title; |
37 | use Wikimedia\Parsoid\Utils\TitleException; |
38 | use Wikimedia\Parsoid\Utils\TokenUtils; |
39 | use Wikimedia\Parsoid\Utils\Utils; |
40 | use Wikimedia\Parsoid\Wikitext\Consts; |
41 | use Wikimedia\Parsoid\Wt2Html\PegTokenizer; |
42 | use Wikimedia\Parsoid\Wt2Html\TokenHandlerPipeline; |
43 | |
44 | class WikiLinkHandler extends TokenHandler { |
45 | /** |
46 | * @var PegTokenizer |
47 | */ |
48 | private $urlParser; |
49 | |
50 | /** @inheritDoc */ |
51 | public function __construct( TokenHandlerPipeline $manager, array $options ) { |
52 | parent::__construct( $manager, $options ); |
53 | |
54 | // Create a new peg parser for image options. |
55 | if ( !$this->urlParser ) { |
56 | // Actually the regular tokenizer, but we'll call it with the |
57 | // url rule only. |
58 | $this->urlParser = new PegTokenizer( $this->env ); |
59 | } |
60 | } |
61 | |
62 | private static function hrefParts( string $str ): ?array { |
63 | if ( preg_match( '/^([^:]+):(.*)$/D', $str, $matches ) ) { |
64 | return [ 'prefix' => $matches[1], 'title' => $matches[2] ]; |
65 | } else { |
66 | return null; |
67 | } |
68 | } |
69 | |
70 | /** |
71 | * Normalize and analyze a wikilink target. |
72 | * |
73 | * Returns an object containing |
74 | * - href: The expanded target string |
75 | * - hrefSrc: The original target wikitext |
76 | * - title: A title object *or* |
77 | * - language: An interwikiInfo object *or* |
78 | * - interwiki: An interwikiInfo object. |
79 | * - localprefix: Set if the link had a localinterwiki prefix (or prefixes) |
80 | * - fromColonEscapedText: Target was colon-escaped ([[:en:foo]]) |
81 | * - prefix: The original namespace or language/interwiki prefix without a |
82 | * colon escape. |
83 | * |
84 | * @param Token $token |
85 | * @param string $href |
86 | * @param string $hrefSrc |
87 | * @return stdClass The target info. |
88 | * @throws InternalException |
89 | */ |
90 | private function getWikiLinkTargetInfo( Token $token, string $href, string $hrefSrc ): stdClass { |
91 | $env = $this->env; |
92 | $siteConfig = $env->getSiteConfig(); |
93 | $info = (object)[ |
94 | 'href' => $href, |
95 | 'hrefSrc' => $hrefSrc, |
96 | // Initialize these properties to avoid isset checks |
97 | 'interwiki' => null, |
98 | 'language' => null, |
99 | 'localprefix' => null, |
100 | 'fromColonEscapedText' => null |
101 | ]; |
102 | |
103 | if ( ( ltrim( $info->href )[0] ?? '' ) === ':' ) { |
104 | $info->fromColonEscapedText = true; |
105 | // Remove the colon escape |
106 | $info->href = substr( ltrim( $info->href ), 1 ); |
107 | } |
108 | if ( ( $info->href[0] ?? '' ) === ':' ) { |
109 | if ( $env->linting( 'multi-colon-escape' ) ) { |
110 | $lint = [ |
111 | 'dsr' => DomSourceRange::fromTsr( $token->dataParsoid->tsr ), |
112 | 'params' => [ 'href' => ':' . $info->href ], |
113 | 'templateInfo' => null |
114 | ]; |
115 | if ( $this->options['inTemplate'] ) { |
116 | // Match Linter.findEnclosingTemplateName(), by first |
117 | // converting the title to an href using env.makeLink |
118 | $name = PHPUtils::stripPrefix( |
119 | $env->makeLink( $this->manager->getFrame()->getTitle() ), |
120 | './' |
121 | ); |
122 | $lint['templateInfo'] = [ 'name' => $name ]; |
123 | // TODO(arlolra): Pass tsr info to the frame |
124 | $lint['dsr'] = new DomSourceRange( 0, 0, null, null ); |
125 | } |
126 | $env->recordLint( 'multi-colon-escape', $lint ); |
127 | } |
128 | // This will get caught by the caller, and mark the target as invalid |
129 | throw new InternalException( 'Multiple colons prefixing href.' ); |
130 | } |
131 | |
132 | $title = $env->resolveTitle( Utils::decodeURIComponent( $info->href ) ); |
133 | $hrefBits = self::hrefParts( $info->href ); |
134 | if ( $hrefBits ) { |
135 | $nsPrefix = $hrefBits['prefix']; |
136 | $info->prefix = $nsPrefix; |
137 | $nnn = Utils::normalizeNamespaceName( trim( $nsPrefix ) ); |
138 | $interwikiInfo = $siteConfig->interwikiMapNoNamespaces()[$nnn] ?? null; |
139 | // check for interwiki / language links |
140 | $ns = $siteConfig->namespaceId( $nnn ); |
141 | // also check for url to protect against [[constructor:foo]] |
142 | if ( $ns !== null ) { |
143 | $info->title = $env->makeTitleFromURLDecodedStr( $title ); |
144 | } elseif ( isset( $interwikiInfo['localinterwiki'] ) ) { |
145 | if ( $hrefBits['title'] === '' ) { |
146 | // Empty title => main page (T66167) |
147 | $info->title = Title::newFromLinkTarget( |
148 | $siteConfig->mainPageLinkTarget(), $siteConfig |
149 | ); |
150 | } else { |
151 | $info->href = str_contains( $hrefBits['title'], ':' ) |
152 | ? ':' . $hrefBits['title'] : $hrefBits['title']; |
153 | // Recurse! |
154 | $info = $this->getWikiLinkTargetInfo( $token, $info->href, $info->hrefSrc ); |
155 | $info->localprefix = $nsPrefix . |
156 | ( $info->localprefix ? ( ':' . $info->localprefix ) : '' ); |
157 | } |
158 | } elseif ( !empty( $interwikiInfo['url'] ) ) { |
159 | $info->href = $hrefBits['title']; |
160 | // Ensure a valid title and store it for later use. |
161 | // (don't store as $info->title because that signals a wikilink) |
162 | $interwikiInfo['title'] = $env->makeTitleFromURLDecodedStr( $title ); |
163 | // Interwiki or language link? If no language info, or if it starts |
164 | // with an explicit ':' (like [[:en:Foo]]), it's not a language link. |
165 | if ( $info->fromColonEscapedText || |
166 | ( !isset( $interwikiInfo['language'] ) && !isset( $interwikiInfo['extralanglink'] ) ) |
167 | ) { |
168 | // An interwiki link. |
169 | $info->interwiki = $interwikiInfo; |
170 | // Remove the colon escape after an interwiki prefix |
171 | if ( ( ltrim( $info->href )[0] ?? '' ) === ':' ) { |
172 | $info->href = substr( ltrim( $info->href ), 1 ); |
173 | } |
174 | } else { |
175 | // A language link. |
176 | $info->language = $interwikiInfo; |
177 | } |
178 | } else { |
179 | $info->title = $env->makeTitleFromURLDecodedStr( $title ); |
180 | } |
181 | } else { |
182 | $info->title = $env->makeTitleFromURLDecodedStr( $title ); |
183 | } |
184 | |
185 | return $info; |
186 | } |
187 | |
188 | /** |
189 | * Handle mw:redirect tokens |
190 | * |
191 | * @param Token $token |
192 | * @return TokenHandlerResult |
193 | * @throws InternalException |
194 | */ |
195 | private function onRedirect( Token $token ): TokenHandlerResult { |
196 | // Avoid duplicating the link-processing code by invoking the |
197 | // standard onWikiLink handler on the embedded link, intercepting |
198 | // the generated tokens using the callback mechanism, reading |
199 | // the href from the result, and then creating a |
200 | // <link rel="mw:PageProp/redirect"> token from it. |
201 | |
202 | $rlink = new SelfclosingTagTk( 'link', |
203 | Utils::clone( $token->attribs ), |
204 | clone $token->dataParsoid, |
205 | $token->dataMw ? clone $token->dataMw : null ); |
206 | $wikiLinkTk = $rlink->dataParsoid->linkTk; |
207 | $rlink->setAttribute( 'rel', 'mw:PageProp/redirect' ); |
208 | |
209 | // Remove the nested wikiLinkTk token and the cloned href attribute |
210 | unset( $rlink->dataParsoid->linkTk ); |
211 | $rlink->removeAttribute( 'href' ); |
212 | |
213 | // Transfer href attribute back to wikiLinkTk, since it may have been |
214 | // template-expanded in the pipeline prior to this point. |
215 | $wikiLinkTk->attribs = Utils::clone( $token->attribs ); |
216 | |
217 | // Set "redirect" attribute on the wikilink token to indicate that |
218 | // image and category links should be handled as plain links. |
219 | $wikiLinkTk->setAttribute( 'redirect', 'true' ); |
220 | |
221 | // Render the wikilink (including interwiki links, etc) then collect |
222 | // the resulting href and transfer it to rlink. |
223 | $r = $this->onWikiLink( $wikiLinkTk ); |
224 | $firstToken = ( $r->tokens[0] ?? null ); |
225 | $isValid = $firstToken instanceof Token && |
226 | in_array( $firstToken->getName(), [ 'a', 'link' ], true ); |
227 | if ( $isValid ) { |
228 | $da = $r->tokens[0]->dataParsoid; |
229 | $rlink->addNormalizedAttribute( 'href', $da->a['href'], $da->sa['href'] ); |
230 | return new TokenHandlerResult( [ $rlink ] ); |
231 | } else { |
232 | // Bail! Emit tokens as if they were parsed as a list item: |
233 | // #REDIRECT.... |
234 | $src = $rlink->dataParsoid->src; |
235 | $tsr = $rlink->dataParsoid->tsr; |
236 | preg_match( '/^([^#]*)(#)/', $src, $srcMatch ); |
237 | $ntokens = strlen( $srcMatch[1] ) ? [ $srcMatch[1] ] : []; |
238 | $hashPos = $tsr->start + strlen( $srcMatch[1] ); |
239 | $tsr0 = new SourceRange( $hashPos, $hashPos + 1 ); |
240 | $dp = new DataParsoid; |
241 | $dp->tsr = $tsr0; |
242 | $li = new TagTk( |
243 | 'listItem', |
244 | [ new KV( 'bullets', [ '#' ], $tsr0->expandTsrV() ) ], |
245 | $dp ); |
246 | $ntokens[] = $li; |
247 | $ntokens[] = substr( $src, strlen( $srcMatch[0] ) ); |
248 | PHPUtils::pushArray( $ntokens, $r->tokens ); |
249 | return new TokenHandlerResult( $ntokens ); |
250 | } |
251 | } |
252 | |
253 | public static function bailTokens( TokenHandlerPipeline $manager, Token $token ): array { |
254 | $frame = $manager->getFrame(); |
255 | $tsr = $token->dataParsoid->tsr; |
256 | $frameSrc = $frame->getSrcText(); |
257 | $linkSrc = $tsr->substr( $frameSrc ); |
258 | $src = substr( $linkSrc, 1 ); |
259 | if ( $src === false ) { |
260 | $manager->getEnv()->log( |
261 | 'error', 'Unable to determine link source.', |
262 | "frame: $frameSrc", 'tsr: ', $tsr, |
263 | "link: $linkSrc" |
264 | ); |
265 | return [ $linkSrc ]; // Forget about trying to tokenize this |
266 | } |
267 | $startOffset = $tsr->start + 1; |
268 | $toks = PipeLineUtils::processContentInPipeline( |
269 | $manager->getEnv(), $frame, $src, [ |
270 | // FIXME: Set toplevel when bailing |
271 | // 'toplevel' => $atTopLevel ?? false, |
272 | 'sol' => false, |
273 | 'pipelineType' => 'wikitext-to-expanded-tokens', |
274 | 'srcOffsets' => new SourceRange( $startOffset, $startOffset + strlen( $src ) ), |
275 | 'pipelineOpts' => [ |
276 | 'expandTemplates' => $manager->getOptions()['expandTemplates'], |
277 | 'inTemplate' => $manager->getOptions()['inTemplate'], |
278 | ], |
279 | ] |
280 | ); |
281 | TokenUtils::stripEOFTkfromTokens( $toks ); |
282 | return array_merge( [ '[' ], $toks ); |
283 | } |
284 | |
285 | /** |
286 | * Handle a mw:WikiLink token. |
287 | * |
288 | * @param Token $token |
289 | * @return TokenHandlerResult |
290 | * @throws InternalException |
291 | */ |
292 | private function onWikiLink( Token $token ): TokenHandlerResult { |
293 | $env = $this->env; |
294 | $hrefKV = $token->getAttributeKV( 'href' ); |
295 | $hrefTokenStr = TokenUtils::tokensToString( $hrefKV->v ); |
296 | |
297 | // Don't allow internal links to pages containing PROTO: |
298 | // See Parser::handleInternalLinks2() |
299 | if ( $env->getSiteConfig()->hasValidProtocol( $hrefTokenStr ) ) { |
300 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
301 | } |
302 | |
303 | // Xmlish tags in title position are invalid. Not according to the |
304 | // preprocessor ABNF but at later stages in the legacy parser, |
305 | // namely handleInternalLinks. |
306 | if ( is_array( $hrefKV->v ) ) { |
307 | // Use the expanded attr instead of trying to unpackDOMFragments |
308 | // since the fragment will have been released when expanding to DOM |
309 | $expandedVal = $token->fetchExpandedAttrValue( 'href' ); |
310 | $expandedDom = DOMUtils::parseHTML( $expandedVal ?? '' ); |
311 | foreach ( DOMCompat::querySelectorAll( $expandedDom, '[typeof]' ) as $el ) { |
312 | if ( DOMUtils::matchTypeOf( $el, '#^mw:(Nowiki|Extension|DOMFragment/sealed)#' ) !== null ) { |
313 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
314 | } |
315 | } |
316 | } |
317 | |
318 | // First check if the expanded href contains a pipe. |
319 | if ( str_contains( $hrefTokenStr, '|' ) ) { |
320 | // It does. This 'href' was templated and also returned other |
321 | // parameters separated by a pipe. We don't have any sensible way to |
322 | // handle such a construct currently, so prevent people from editing |
323 | // it. See T226523 |
324 | // TODO: add useful debugging info for editors ('if you would like to |
325 | // make this content editable, then fix template X..') |
326 | // TODO: also check other parameters for pipes! |
327 | // NOTE: We'd need to clear firstPipeSrc if this case gets supported |
328 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
329 | } |
330 | |
331 | $target = null; |
332 | try { |
333 | $target = $this->getWikiLinkTargetInfo( $token, $hrefTokenStr, $hrefKV->vsrc ); |
334 | } catch ( TitleException | InternalException $e ) { |
335 | // Invalid title |
336 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
337 | } |
338 | |
339 | // Ok, it looks like we have a sensible href. Figure out which handler to use. |
340 | $isRedirect = (bool)$token->getAttributeV( 'redirect' ); |
341 | return $this->wikiLinkHandler( $token, $target, $isRedirect ); |
342 | } |
343 | |
344 | /** |
345 | * Figure out which handler to use to render a given WikiLink token. Override |
346 | * this method to add new handlers or swap out existing handlers based on the |
347 | * target structure. |
348 | * |
349 | * @param Token $token |
350 | * @param stdClass $target |
351 | * @param bool $isRedirect |
352 | * @return TokenHandlerResult |
353 | * @throws InternalException |
354 | */ |
355 | private function wikiLinkHandler( |
356 | Token $token, stdClass $target, bool $isRedirect |
357 | ): TokenHandlerResult { |
358 | $title = $target->title ?? null; |
359 | if ( $title ) { |
360 | if ( $isRedirect ) { |
361 | return $this->renderWikiLink( $token, $target ); |
362 | } |
363 | $siteConfig = $this->env->getSiteConfig(); |
364 | $nsId = $title->getNamespace(); |
365 | if ( $nsId === $siteConfig->canonicalNamespaceId( 'media' ) ) { |
366 | // Render as a media link. |
367 | return $this->renderMedia( $token, $target ); |
368 | } |
369 | if ( |
370 | !$target->fromColonEscapedText && |
371 | // Protect from purely fragment links on pages in these namespaces |
372 | ( $target->href[0] ?? '' ) !== '#' |
373 | ) { |
374 | if ( $nsId === $siteConfig->canonicalNamespaceId( 'file' ) ) { |
375 | // Render as a file. |
376 | return $this->renderFile( $token, $target ); |
377 | } |
378 | if ( $nsId === $siteConfig->canonicalNamespaceId( 'category' ) ) { |
379 | // Render as a category membership. |
380 | return $this->renderCategory( $token, $target ); |
381 | } |
382 | } |
383 | |
384 | // Render as plain wiki links. |
385 | return $this->renderWikiLink( $token, $target ); |
386 | } |
387 | |
388 | // language and interwiki links |
389 | if ( $target->interwiki ) { |
390 | return $this->renderInterwikiLink( $token, $target ); |
391 | } |
392 | if ( $target->language ) { |
393 | $ns = $this->env->getContextTitle()->getNamespace(); |
394 | $noLanguageLinks = $this->env->getSiteConfig()->namespaceIsTalk( $ns ) || |
395 | !$this->env->getSiteConfig()->interwikiMagic(); |
396 | if ( $noLanguageLinks ) { |
397 | $target->interwiki = $target->language; |
398 | return $this->renderInterwikiLink( $token, $target ); |
399 | } |
400 | |
401 | return $this->renderLanguageLink( $token, $target ); |
402 | } |
403 | |
404 | // Neither a title, nor a language or interwiki. Should not happen. |
405 | throw new InternalException( 'Unknown link type' ); |
406 | } |
407 | |
408 | /** ------------------------------------------------------------ |
409 | * This (overloaded) function does three different things: |
410 | * - Extracts link text from attrs (when k === "mw:maybeContent"). |
411 | * As a performance micro-opt, only does if asked to (getLinkText) |
412 | * - Updates existing rdfa type with an additional rdf-type, |
413 | * if one is provided (rdfaType) |
414 | * - Collates about, typeof, and linkAttrs into a new attr. array |
415 | * |
416 | * @param array $attrs |
417 | * @param bool $getLinkText |
418 | * @param ?string $rdfaType |
419 | * @param ?array $linkAttrs |
420 | * @return array |
421 | */ |
422 | public static function buildLinkAttrs( |
423 | array $attrs, bool $getLinkText, ?string $rdfaType, |
424 | ?array $linkAttrs |
425 | ): array { |
426 | $newAttrs = []; |
427 | $linkTextKVs = []; |
428 | $about = null; |
429 | |
430 | // In one pass through the attribute array, fetch about, typeof, and linkText |
431 | // |
432 | // about && typeof are usually at the end of the array if at all present |
433 | foreach ( $attrs as $kv ) { |
434 | $k = $kv->k; |
435 | $v = $kv->v; |
436 | |
437 | // link-text attrs have the key "maybeContent" |
438 | if ( $getLinkText && $k === 'mw:maybeContent' ) { |
439 | $linkTextKVs[] = $kv; |
440 | } elseif ( is_string( $k ) && $k ) { |
441 | if ( trim( $k ) === 'typeof' ) { |
442 | $rdfaType = $rdfaType ? $rdfaType . ' ' . $v : $v; |
443 | } elseif ( trim( $k ) === 'about' ) { |
444 | $about = $v; |
445 | } |
446 | } |
447 | } |
448 | |
449 | if ( $rdfaType ) { |
450 | $newAttrs[] = new KV( 'typeof', $rdfaType ); |
451 | } |
452 | |
453 | if ( $about ) { |
454 | $newAttrs[] = new KV( 'about', $about ); |
455 | } |
456 | |
457 | if ( $linkAttrs ) { |
458 | PHPUtils::pushArray( $newAttrs, $linkAttrs ); |
459 | } |
460 | |
461 | return [ |
462 | 'attribs' => $newAttrs, |
463 | 'contentKVs' => $linkTextKVs, |
464 | 'hasRdfaType' => $rdfaType !== null |
465 | ]; |
466 | } |
467 | |
468 | /** |
469 | * Generic wiki link attribute setup on a passed-in new token based on the |
470 | * wikilink token and target. As a side effect, this method also extracts the |
471 | * link content tokens and returns them. |
472 | * |
473 | * @param Token $newTk |
474 | * @param Token $token |
475 | * @param stdClass $target |
476 | * @param bool $buildDOMFragment |
477 | * @return array |
478 | * @throws InternalException |
479 | */ |
480 | private function addLinkAttributesAndGetContent( |
481 | Token $newTk, Token $token, stdClass $target, bool $buildDOMFragment = false |
482 | ): array { |
483 | $attribs = $token->attribs; |
484 | $dataParsoid = $token->dataParsoid; |
485 | $dataMw = $token->dataMw; |
486 | $newAttrData = self::buildLinkAttrs( $attribs, true, null, [ new KV( 'rel', 'mw:WikiLink' ) ] ); |
487 | $content = $newAttrData['contentKVs']; |
488 | $env = $this->env; |
489 | |
490 | // Set attribs and dataParsoid |
491 | $newTk->attribs = $newAttrData['attribs']; |
492 | $newTk->dataParsoid = clone $dataParsoid; |
493 | $newTk->dataMw = $dataMw !== null ? clone $dataMw : null; |
494 | unset( $newTk->dataParsoid->src ); // clear src string since we can serialize this |
495 | |
496 | // Note: Link tails are handled on the DOM in handleLinkNeighbours, so no |
497 | // need to handle them here. |
498 | $l = count( $content ); |
499 | if ( $l > 0 ) { |
500 | $newTk->dataParsoid->stx = 'piped'; |
501 | $out = []; |
502 | // re-join content bits |
503 | foreach ( $content as $i => $kv ) { |
504 | $toks = $kv->v; |
505 | // since this is already a link, strip autolinks from content |
506 | // FIXME: Maybe add a stop in the grammar so that autolinks |
507 | // aren't tokenized in link content to begin with? |
508 | if ( !is_array( $toks ) ) { |
509 | $toks = [ $toks ]; |
510 | } |
511 | |
512 | $toks = array_values( array_filter( $toks, static function ( $t ) { |
513 | return $t !== ''; |
514 | } ) ); |
515 | $n = count( $toks ); |
516 | foreach ( $toks as $j => $t ) { |
517 | // Bail on media-syntax in wikilink-syntax scenarios, |
518 | // since the legacy parser explodes on [[, last one wins. |
519 | // Note that without this, anchors tags in media output |
520 | // will be stripped and we won't have the right structure |
521 | // when we get to the dom pass to add media info. |
522 | if ( |
523 | $t instanceof TagTk && |
524 | ( $t->getName() === 'figure' || $t->getName() === 'span' ) && |
525 | TokenUtils::matchTypeOf( $t, '#^mw:File($|/)#D' ) !== null |
526 | ) { |
527 | throw new InternalException( 'Media-in-link' ); |
528 | } |
529 | |
530 | if ( $t instanceof TagTk && $t->getName() === 'a' ) { |
531 | // Bail on wikilink-syntax in wiklink-syntax scenarios, |
532 | // since the legacy parser explodes on [[, last one wins |
533 | if ( |
534 | preg_match( |
535 | '#^mw:WikiLink(/Interwiki)?$#D', |
536 | $t->getAttributeV( 'rel' ) ?? '' |
537 | ) && |
538 | // ISBN links don't use wikilink-syntax but still |
539 | // get the same "rel", so should be ignored |
540 | ( $t->dataParsoid->stx ?? '' ) !== 'magiclink' |
541 | ) { |
542 | throw new InternalException( 'Link-in-link' ); |
543 | } |
544 | if ( $j + 1 < $n && $toks[$j + 1] instanceof EndTagTk && |
545 | $toks[$j + 1]->getName() === 'a' |
546 | ) { |
547 | // autonumbered links in the stream get rendered |
548 | // as an <a> tag with no content -- but these ought |
549 | // to be treated as plaintext since we don't allow |
550 | // nested links. |
551 | $out[] = '[' . $t->getAttributeV( 'href' ) . ']'; |
552 | } |
553 | // suppress <a> |
554 | continue; |
555 | } |
556 | |
557 | // Categories also use wikilink syntax so we bail to match |
558 | // legacy output. However, this isn't an a-in-a scenario |
559 | // so maybe should be permitted in the future. |
560 | if ( |
561 | $t instanceof SelfclosingTagTk && $t->getName() === 'link' && |
562 | preg_match( |
563 | '#^mw:PageProp/Category$#D', |
564 | $t->getAttributeV( 'rel' ) ?? '' |
565 | ) |
566 | ) { |
567 | throw new InternalException( 'Category-in-link' ); |
568 | } |
569 | |
570 | if ( $t instanceof EndTagTk && $t->getName() === 'a' ) { |
571 | continue; // suppress </a> |
572 | } |
573 | |
574 | $out[] = $t; |
575 | } |
576 | if ( $i < $l - 1 ) { |
577 | $out[] = '|'; |
578 | } |
579 | } |
580 | |
581 | if ( $buildDOMFragment ) { |
582 | // content = [part 0, .. part l-1] |
583 | // offsets = [start(part-0), end(part l-1)] |
584 | $offsets = isset( $dataParsoid->tsr ) ? |
585 | new SourceRange( $content[0]->srcOffsets->value->start, |
586 | $content[$l - 1]->srcOffsets->value->end ) : null; |
587 | $content = [ PipelineUtils::getDOMFragmentToken( $out, $offsets, |
588 | [ 'inlineContext' => true, 'token' => $token ] ) ]; |
589 | } else { |
590 | $content = $out; |
591 | } |
592 | } else { |
593 | $newTk->dataParsoid->stx = 'simple'; |
594 | $morecontent = Utils::decodeURIComponent( $target->href ); |
595 | |
596 | // Try to match labeling in core |
597 | if ( $env->getSiteConfig()->namespaceHasSubpages( |
598 | $env->getContextTitle()->getNamespace() |
599 | ) ) { |
600 | // subpage links with a trailing slash get the trailing slashes stripped. |
601 | // See https://gerrit.wikimedia.org/r/173431 |
602 | if ( preg_match( '#^((\.\./)+|/)(?!\.\./)(.*?[^/])/+$#D', $morecontent, $match ) ) { |
603 | $morecontent = $match[3]; |
604 | } elseif ( str_starts_with( $morecontent, '../' ) ) { |
605 | // Subpages on interwiki / language links aren't valid, |
606 | // so $target->title should always be present here |
607 | $morecontent = $target->title->getPrefixedText(); |
608 | } |
609 | } |
610 | |
611 | // for interwiki links, include the interwiki prefix in the link text |
612 | if ( $target->interwiki ) { |
613 | $morecontent = $target->prefix . ':' . $morecontent; |
614 | } |
615 | |
616 | // for local links, include the local prefix in the link text |
617 | if ( $target->localprefix ) { |
618 | $morecontent = $target->localprefix . ':' . $morecontent; |
619 | } |
620 | |
621 | $content = [ $morecontent ]; |
622 | } |
623 | return $content; |
624 | } |
625 | |
626 | /** |
627 | * Render a plain wiki link. |
628 | * |
629 | * @param Token $token |
630 | * @param stdClass $target |
631 | * @return TokenHandlerResult |
632 | */ |
633 | private function renderWikiLink( Token $token, stdClass $target ): TokenHandlerResult { |
634 | $newTk = new TagTk( 'a' ); |
635 | try { |
636 | $content = $this->addLinkAttributesAndGetContent( $newTk, $token, $target, true ); |
637 | } catch ( InternalException $e ) { |
638 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
639 | } |
640 | |
641 | $newTk->addNormalizedAttribute( 'href', $this->env->makeLink( $target->title ), |
642 | $target->hrefSrc ); |
643 | |
644 | $newTk->setAttribute( 'title', $target->title->getPrefixedText() ); |
645 | |
646 | return new TokenHandlerResult( array_merge( [ $newTk ], $content, [ new EndTagTk( 'a' ) ] ) ); |
647 | } |
648 | |
649 | /** |
650 | * Render a category 'link'. Categories are really page properties, and are |
651 | * normally rendered in a box at the bottom of an article. |
652 | * |
653 | * @param Token $token |
654 | * @param stdClass $target |
655 | * @return TokenHandlerResult |
656 | */ |
657 | private function renderCategory( Token $token, stdClass $target ): TokenHandlerResult { |
658 | $newTk = new SelfclosingTagTk( 'link' ); |
659 | try { |
660 | $content = $this->addLinkAttributesAndGetContent( $newTk, $token, $target ); |
661 | } catch ( InternalException $e ) { |
662 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
663 | } |
664 | $env = $this->env; |
665 | |
666 | // Change the rel to be mw:PageProp/Category |
667 | $newTk->getAttributeKV( 'rel' )->v = 'mw:PageProp/Category'; |
668 | |
669 | $newTk->addNormalizedAttribute( 'href', $env->makeLink( $target->title ), $target->hrefSrc ); |
670 | |
671 | // Change the href to include the sort key, if any (but don't update the rt info) |
672 | // Fallback to empty string for default sorting |
673 | $categorySort = ''; |
674 | $strContent = str_replace( "\n", '', TokenUtils::tokensToString( $content ) ); |
675 | if ( $strContent !== '' && $strContent !== $target->href ) { |
676 | $categorySort = $strContent; |
677 | $hrefkv = $newTk->getAttributeKV( 'href' ); |
678 | $hrefkv->v .= '#'; |
679 | $hrefkv->v .= str_replace( '#', '%23', Sanitizer::sanitizeTitleURI( $categorySort, false ) ); |
680 | } |
681 | |
682 | if ( count( $content ) !== 1 ) { |
683 | // Deal with sort keys that come from generated content (transclusions, etc.) |
684 | $key = [ 'txt' => 'mw:sortKey' ]; |
685 | $contentKV = $token->getAttributeKV( 'mw:maybeContent' ); |
686 | $so = $contentKV->valueOffset(); |
687 | $val = PipelineUtils::expandAttrValueToDOM( |
688 | $this->env, |
689 | $this->manager->getFrame(), |
690 | [ 'html' => $content, 'srcOffsets' => $so ], |
691 | $this->options['expandTemplates'], |
692 | $this->options['inTemplate'] |
693 | ); |
694 | $attr = new DataMwAttrib( $key, $val ); |
695 | $dataMw = $newTk->dataMw; |
696 | if ( $dataMw ) { |
697 | $dataMw->attribs[] = $attr; |
698 | } else { |
699 | $dataMw = new DataMw( [ 'attribs' => [ $attr ] ] ); |
700 | } |
701 | |
702 | // Mark token as having expanded attrs |
703 | $newTk->addAttribute( 'about', $env->newAboutId() ); |
704 | $newTk->addSpaceSeparatedAttribute( 'typeof', 'mw:ExpandedAttrs' ); |
705 | $newTk->dataMw = $dataMw; |
706 | } |
707 | $this->env->getMetadata()->addCategory( $target->title, $categorySort ); |
708 | return new TokenHandlerResult( [ $newTk ] ); |
709 | } |
710 | |
711 | /** |
712 | * Render a language link. Those normally appear in the list of alternate |
713 | * languages for an article in the sidebar, so are really a page property. |
714 | * |
715 | * @param Token $token |
716 | * @param stdClass $target |
717 | * @return TokenHandlerResult |
718 | */ |
719 | private function renderLanguageLink( Token $token, stdClass $target ): TokenHandlerResult { |
720 | // The prefix is listed in the interwiki map |
721 | |
722 | // TODO: If $target->language['deprecated'] is set and |
723 | // $target->language['extralanglink'] is *not* set, then we |
724 | // should use the normalized language name/prefix (from |
725 | // 'deprecated') when calling |
726 | // ContentMetadataCollector::addLanguageLink() here (which |
727 | // we should eventualy be doing) |
728 | |
729 | // TODO: might also want to add the language *code* here, |
730 | // which would be the language['bcp47'] property (added in |
731 | // change I82465261bc66f0b0cd30d361c299f08066494762) for an |
732 | // extralanglink, or the interwiki prefix otherwise; the |
733 | // latter is mediawiki-internal and maybe not BCP-47 compliant. |
734 | // This is for clients of the MediaWiki DOM spec HTML: the |
735 | // WMF domain prefix, the MediaWiki internal language code, |
736 | // and the actual *language* (ie bcp-47 code) can all differ |
737 | // from each other, due to various historical infelicities. |
738 | // Perhaps a `lang` attribute on the `link` would be appropriate. |
739 | |
740 | $newTk = new SelfclosingTagTk( 'link', [], $token->dataParsoid ); |
741 | try { |
742 | $this->addLinkAttributesAndGetContent( $newTk, $token, $target ); |
743 | } catch ( InternalException $e ) { |
744 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
745 | } |
746 | |
747 | // add title attribute giving the presentation name of the |
748 | // "extra language link" |
749 | // T329303: the 'linktext' comes from the system message |
750 | // `interlanguage-link-$prefix` and should be set in integrated mode |
751 | // using the localization features; the integrated-mode SiteConfig |
752 | // currently never sets the `linktext` property in |
753 | // SiteConfig::interwikiMap(). |
754 | // I52d50e2f75942a849908c6be7fc5169f00a5983a has some partial work |
755 | // on this. |
756 | if ( isset( $target->language['extralanglink'] ) && |
757 | !empty( $target->language['linktext'] ) |
758 | ) { |
759 | // XXX in standalone mode, this is user-interface-language text, |
760 | // not "content language" text. |
761 | $newTk->addNormalizedAttribute( 'title', $target->language['linktext'], null ); |
762 | } |
763 | |
764 | // We set an absolute link to the article in the other wiki/language |
765 | $title = Sanitizer::sanitizeTitleURI( Utils::decodeURIComponent( $target->href ), false ); |
766 | $absHref = str_replace( '$1', $title, $target->language['url'] ); |
767 | if ( isset( $target->language['protorel'] ) ) { |
768 | $absHref = preg_replace( '/^https?:/', '', $absHref, 1 ); |
769 | } |
770 | $newTk->addNormalizedAttribute( 'href', $absHref, $target->hrefSrc ); |
771 | |
772 | // Change the rel to be mw:PageProp/Language |
773 | $newTk->getAttributeKV( 'rel' )->v = 'mw:PageProp/Language'; |
774 | |
775 | // Add language link(s) to metadata |
776 | $this->env->getMetadata()->addLanguageLink( $target->language['title'] ); |
777 | |
778 | return new TokenHandlerResult( [ $newTk ] ); |
779 | } |
780 | |
781 | /** |
782 | * Render an interwiki link. |
783 | * |
784 | * @param Token $token |
785 | * @param stdClass $target |
786 | * @return TokenHandlerResult |
787 | */ |
788 | private function renderInterwikiLink( Token $token, stdClass $target ): TokenHandlerResult { |
789 | // The prefix is listed in the interwiki map |
790 | |
791 | $tokens = []; |
792 | $newTk = new TagTk( 'a', [], $token->dataParsoid ); |
793 | try { |
794 | $content = $this->addLinkAttributesAndGetContent( $newTk, $token, $target, true ); |
795 | } catch ( InternalException $e ) { |
796 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
797 | } |
798 | |
799 | // We set an absolute link to the article in the other wiki/language |
800 | $isLocal = !empty( $target->interwiki['local'] ); |
801 | $trimmedHref = trim( $target->href ); |
802 | $title = Sanitizer::sanitizeTitleURI( |
803 | Utils::decodeURIComponent( $trimmedHref ), |
804 | !$isLocal |
805 | ); |
806 | $absHref = str_replace( '$1', $title, $target->interwiki['url'] ); |
807 | if ( isset( $target->interwiki['protorel'] ) ) { |
808 | $absHref = preg_replace( '/^https?:/', '', $absHref, 1 ); |
809 | } |
810 | $newTk->addNormalizedAttribute( 'href', $absHref, $target->hrefSrc ); |
811 | |
812 | $newTk->getAttributeKV( 'rel' )->v = 'mw:WikiLink/Interwiki'; |
813 | |
814 | // Add title unless it's just a fragment (and trim off fragment) |
815 | // (The normalization here is similar to what Title#getPrefixedDBKey() does.) |
816 | if ( $target->href === '' || $target->href[0] !== '#' ) { |
817 | $titleAttr = $target->interwiki['prefix'] . ':' . |
818 | Utils::decodeURIComponent( str_replace( '_', ' ', |
819 | preg_replace( '/#.*/s', '', $trimmedHref, 1 ) ) ); |
820 | $newTk->setAttribute( 'title', $titleAttr ); |
821 | } |
822 | $tokens[] = $newTk; |
823 | |
824 | PHPUtils::pushArray( $tokens, $content ); |
825 | $tokens[] = new EndTagTk( 'a' ); |
826 | return new TokenHandlerResult( $tokens ); |
827 | } |
828 | |
829 | private const HORIZONTAL_ALIGNS = [ |
830 | // PHP parser wraps in <div class="floatnone"> |
831 | 'left', |
832 | // PHP parser wraps in <div class="center"><div class="floatnone"> |
833 | 'right', |
834 | // PHP parser wraps in <div class="floatleft"> |
835 | 'center', |
836 | // PHP parser wraps in <div class="floatright"> |
837 | 'none', |
838 | ]; |
839 | private const VERTICAL_ALIGNS = [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', |
840 | 'bottom', 'text-bottom' ]; |
841 | |
842 | /** |
843 | * Get the style and class lists for an image's wrapper element. |
844 | * |
845 | * @param array $opts The option hash from renderFile. |
846 | * @return array with boolean isInline Whether the image is inline after handling options. |
847 | * or classes The list of classes for the wrapper. |
848 | */ |
849 | private static function getWrapperInfo( array $opts ) { |
850 | $format = self::getFormat( $opts ); |
851 | $isInline = !in_array( $format, [ 'thumbnail', 'manualthumb', 'framed' ], true ); |
852 | $classes = []; |
853 | |
854 | if ( |
855 | !isset( $opts['size']['src'] ) && |
856 | // Framed and manualthumb images aren't scaled |
857 | !in_array( $format, [ 'manualthumb', 'framed' ], true ) |
858 | ) { |
859 | $classes[] = 'mw-default-size'; |
860 | } |
861 | |
862 | // Border isn't applicable to 'thumbnail', 'manualthumb', or 'framed' formats |
863 | // Using $isInline as a shorthand for that here (see above), |
864 | // but this isn't about being *inline* per se |
865 | if ( $isInline && isset( $opts['border'] ) ) { |
866 | $classes[] = 'mw-image-border'; |
867 | } |
868 | |
869 | $halign = $opts['halign']['v'] ?? null; |
870 | if ( in_array( $halign, self::HORIZONTAL_ALIGNS, true ) ) { |
871 | $isInline = false; |
872 | $classes[] = "mw-halign-$halign"; |
873 | } |
874 | |
875 | if ( $isInline ) { |
876 | $valignOpt = $opts['valign']['v'] ?? null; |
877 | if ( in_array( $valignOpt, self::VERTICAL_ALIGNS, true ) ) { |
878 | $classes[] = str_replace( '_', '-', "mw-valign-$valignOpt" ); |
879 | } |
880 | } |
881 | |
882 | return [ 'classes' => $classes, 'isInline' => $isInline ]; |
883 | } |
884 | |
885 | /** |
886 | * Determine the name of an option. |
887 | * |
888 | * @param string $optStr |
889 | * @param Env $env |
890 | * @return array|null |
891 | * ck Canonical key for the image option. |
892 | * v Value of the option. |
893 | * ak Aliased key for the image option - includes `"$1"` for placeholder. |
894 | * s Whether it's a simple option or one with a value. |
895 | */ |
896 | private static function getOptionInfo( string $optStr, Env $env ): ?array { |
897 | $oText = trim( $optStr ); |
898 | $siteConfig = $env->getSiteConfig(); |
899 | $getOption = $siteConfig->getMediaPrefixParameterizedAliasMatcher(); |
900 | // oText contains the localized name of this option. the |
901 | // canonical option names (from mediawiki upstream) are in |
902 | // English and contain an '(img|timedmedia)_' prefix. We drop the |
903 | // prefix before stuffing them in data-parsoid in order to |
904 | // save space (that's shortCanonicalOption) |
905 | $canonicalOption = $siteConfig->getMagicWordForMediaOption( $oText ) ?? ''; |
906 | $shortCanonicalOption = preg_replace( '/^(img|timedmedia)_/', '', $canonicalOption, 1 ); |
907 | // 'imgOption' is the key we'd put in opts; it names the 'group' |
908 | // for the option, and doesn't have an img_ prefix. |
909 | $imgOption = Consts::$Media['SimpleOptions'][$canonicalOption] ?? null; |
910 | if ( !empty( $imgOption ) ) { |
911 | return [ |
912 | 'ck' => $imgOption, |
913 | 'v' => $shortCanonicalOption, |
914 | 'ak' => $optStr, |
915 | 's' => true |
916 | ]; |
917 | } |
918 | // If there isn't a literal match for the option, look for a |
919 | // prefix match (ie, img_width => `$1px`) |
920 | |
921 | // *Note* that the legacy parser doesn't have a "principled" |
922 | // precedence here (T372935), it just so happens that members |
923 | // of Consts::PrefixOptions like |
924 | // img_width/img_page/img_lang/timedmedia_* are added last (as |
925 | // handler parameters), and other prefixed options like |
926 | // img_link/img_alt/img_class *happen* to be last in the |
927 | // $internalParamMap. But the possibility for conflicts |
928 | // between prefixed parameters and literal options still |
929 | // exists in the legacy parser. |
930 | $bits = $getOption( $oText ); |
931 | $normalizedBit0 = $bits ? mb_strtolower( trim( $bits['k'] ) ) : null; |
932 | $key = $bits ? ( Consts::$Media['PrefixOptions'][$normalizedBit0] ?? null ) : null; |
933 | |
934 | // bits.a *used to have* the localized name for the prefix option |
935 | // (see SiteConfig::getMediaPrefixParameterizedAliasMatcher, this was |
936 | // dropped in the port from JS.) |
937 | // with $1 as a placeholder for the value, which is in bits.v |
938 | // 'normalizedBit0' is the canonical English option name |
939 | // (from mediawiki upstream) with a prefix. |
940 | // 'key' is the parsoid 'group' for the option; it doesn't |
941 | // have a prefix (it's the key we'd put in opts) |
942 | if ( $bits && $key ) { |
943 | $shortCanonicalOption = preg_replace( '/^(img|timedmedia)_/', '', $normalizedBit0, 1 ); |
944 | // map short canonical name to the localized version used |
945 | |
946 | // Note that we deliberately do entity decoding |
947 | // *after* splitting so that HTML-encoded pipes don't |
948 | // separate options. This matches PHP, whether or |
949 | // not it's a good idea. |
950 | return [ |
951 | 'ck' => $shortCanonicalOption, |
952 | 'v' => Utils::decodeWtEntities( $bits['v'] ), |
953 | 'ak' => $optStr, |
954 | 's' => false |
955 | ]; |
956 | } |
957 | |
958 | return null; |
959 | } |
960 | |
961 | private static function isWikitextOpt( |
962 | Env $env, ?array &$optInfo, string $prefix, string $resultStr |
963 | ): bool { |
964 | // link and alt options are allowed to contain arbitrary |
965 | // wikitext (even though only strings are supported in reality) |
966 | // FIXME(SSS): Is this actually true of all options rather than |
967 | // just link and alt? |
968 | if ( $optInfo === null ) { |
969 | $optInfo = self::getOptionInfo( $prefix . $resultStr, $env ); |
970 | } |
971 | return $optInfo !== null && in_array( $optInfo['ck'], [ 'link', 'alt' ], true ); |
972 | } |
973 | |
974 | /** |
975 | * Make option token streams into a stringy thing that we can recognize. |
976 | * |
977 | * @param array $tstream |
978 | * @param string $prefix Anything that came before this part of the recursive call stack. |
979 | * @param Env $env |
980 | * @return string|string[]|null |
981 | */ |
982 | private static function stringifyOptionTokens( array $tstream, string $prefix, Env $env ) { |
983 | // Seems like this should be a more general "stripTags"-like function? |
984 | $tokenType = null; |
985 | $tkHref = null; |
986 | $nextResult = null; |
987 | $skipToEndOf = null; |
988 | $optInfo = null; |
989 | $resultStr = ''; |
990 | |
991 | for ( $i = 0; $i < count( $tstream ); $i++ ) { |
992 | $currentToken = $tstream[$i]; |
993 | |
994 | if ( $skipToEndOf ) { |
995 | if ( $currentToken instanceof EndTagTk && $currentToken->getName() === $skipToEndOf ) { |
996 | $skipToEndOf = null; |
997 | } |
998 | continue; |
999 | } |
1000 | |
1001 | if ( is_string( $currentToken ) ) { |
1002 | $resultStr .= $currentToken; |
1003 | } elseif ( is_array( $currentToken ) ) { |
1004 | $nextResult = self::stringifyOptionTokens( $currentToken, $prefix . $resultStr, $env ); |
1005 | |
1006 | if ( $nextResult === null ) { |
1007 | return null; |
1008 | } |
1009 | |
1010 | $resultStr .= $nextResult; |
1011 | } elseif ( !( $currentToken instanceof EndTagTk ) ) { |
1012 | // This is actually a token |
1013 | if ( TokenUtils::hasDOMFragmentType( $currentToken ) ) { |
1014 | if ( self::isWikitextOpt( $env, $optInfo, $prefix, $resultStr ) ) { |
1015 | $str = TokenUtils::tokensToString( [ $currentToken ], false, [ |
1016 | // These tokens haven't been expanded to DOM yet |
1017 | // so unpacking them here is justifiable |
1018 | // FIXME: It's a little convoluted to figure out |
1019 | // that this is actually the case in the |
1020 | // AttributeExpander, but it seems like only |
1021 | // target/href ever gets expanded to DOM and |
1022 | // the rest of the wikilink_content/options |
1023 | // become mw:maybeContent that gets expanded |
1024 | // below where $hasExpandableOpt is set. |
1025 | 'unpackDOMFragments' => true, |
1026 | // FIXME: Sneaking in `env` to avoid changing the signature |
1027 | 'env' => $env |
1028 | ] |
1029 | ); |
1030 | // Entity encode pipes since we wouldn't have split on |
1031 | // them from fragments and we're about to attempt to |
1032 | // when this function returns. |
1033 | // This is similar to getting the shadow "href" below. |
1034 | $resultStr .= preg_replace( '/\|/', '|', $str, 1 ); |
1035 | $optInfo = null; // might change the nature of opt |
1036 | continue; |
1037 | } else { |
1038 | // if this is a nowiki, we must be in a caption |
1039 | return null; |
1040 | } |
1041 | } |
1042 | if ( $currentToken->getName() === 'mw-quote' ) { |
1043 | if ( self::isWikitextOpt( $env, $optInfo, $prefix, $resultStr ) ) { |
1044 | // just recurse inside |
1045 | $optInfo = null; // might change the nature of opt |
1046 | continue; |
1047 | } |
1048 | return null; |
1049 | } |
1050 | // Similar to TokenUtils.tokensToString()'s includeEntities |
1051 | if ( TokenUtils::isEntitySpanToken( $currentToken ) ) { |
1052 | $resultStr .= $currentToken->dataParsoid->src; |
1053 | $skipToEndOf = 'span'; |
1054 | continue; |
1055 | } |
1056 | if ( $currentToken->getName() === 'a' ) { |
1057 | if ( $optInfo === null ) { |
1058 | $optInfo = self::getOptionInfo( $prefix . $resultStr, $env ); |
1059 | if ( $optInfo === null ) { |
1060 | // An <a> tag before a valid option? |
1061 | // This is most likely a caption. |
1062 | return null; |
1063 | } |
1064 | } |
1065 | |
1066 | if ( self::isWikitextOpt( $env, $optInfo, $prefix, $resultStr ) ) { |
1067 | $tokenType = $currentToken->getAttributeV( 'rel' ); |
1068 | // Using the shadow since entities (think pipes) would |
1069 | // have already been decoded. |
1070 | $tkHref = $currentToken->getAttributeShadowInfo( 'href' )['value']; |
1071 | $isLink = $optInfo && $optInfo['ck'] === 'link'; |
1072 | // Reset the optInfo since we're changing the nature of it |
1073 | $optInfo = null; |
1074 | // Figure out the proper string to put here and break. |
1075 | if ( |
1076 | $tokenType === 'mw:ExtLink' && |
1077 | ( $currentToken->dataParsoid->stx ?? '' ) === 'url' |
1078 | ) { |
1079 | // Add the URL |
1080 | $resultStr .= $tkHref; |
1081 | // Tell our loop to skip to the end of this tag |
1082 | $skipToEndOf = 'a'; |
1083 | } elseif ( $tokenType === 'mw:WikiLink/Interwiki' ) { |
1084 | if ( $isLink ) { |
1085 | $resultStr .= $currentToken->getAttributeV( 'href' ); |
1086 | $i += 2; |
1087 | continue; |
1088 | } |
1089 | // Nothing to do -- the link content will be |
1090 | // captured by walking the rest of the tokens. |
1091 | } elseif ( $tokenType === 'mw:WikiLink' || $tokenType === 'mw:MediaLink' ) { |
1092 | |
1093 | // Nothing to do -- the link content will be |
1094 | // captured by walking the rest of the tokens. |
1095 | } else { |
1096 | // There shouldn't be any other kind of link... |
1097 | // This is likely a caption. |
1098 | return null; |
1099 | } |
1100 | } else { |
1101 | // Why would there be an a tag without a link? |
1102 | return null; |
1103 | } |
1104 | } |
1105 | } |
1106 | } |
1107 | |
1108 | return $resultStr; |
1109 | } |
1110 | |
1111 | /** |
1112 | * Get the format for media. |
1113 | * |
1114 | * @param array $opts |
1115 | * @return string|null |
1116 | */ |
1117 | private static function getFormat( array $opts ): ?string { |
1118 | if ( $opts['manualthumb'] ) { |
1119 | return 'manualthumb'; |
1120 | } |
1121 | return $opts['format']['v'] ?? null; |
1122 | } |
1123 | |
1124 | private array $used = []; |
1125 | |
1126 | /** |
1127 | * This is the set of file options that apply to the container, rather |
1128 | * than the media element itself (or, apply generically to a span). |
1129 | * Other options depend on the fetched media type and won't necessary be |
1130 | * applied. |
1131 | * |
1132 | * @return array |
1133 | */ |
1134 | private function getUsed(): array { |
1135 | if ( $this->used ) { |
1136 | return $this->used; |
1137 | } |
1138 | $this->used = PHPUtils::makeSet( |
1139 | array_merge( |
1140 | [ |
1141 | 'lang', 'width', 'class', 'upright', |
1142 | 'border', 'frameless', 'framed', 'thumbnail', |
1143 | ], |
1144 | self::HORIZONTAL_ALIGNS, |
1145 | self::VERTICAL_ALIGNS |
1146 | ) |
1147 | ); |
1148 | return $this->used; |
1149 | } |
1150 | |
1151 | private function hasTransclusion( array $toks ): bool { |
1152 | foreach ( $toks as $t ) { |
1153 | if ( |
1154 | $t instanceof SelfclosingTagTk && |
1155 | TokenUtils::hasTypeOf( $t, 'mw:Transclusion' ) |
1156 | ) { |
1157 | return true; |
1158 | } |
1159 | } |
1160 | return false; |
1161 | } |
1162 | |
1163 | /** |
1164 | * Render a file. This can be an image, a sound, a PDF etc. |
1165 | * |
1166 | * @param Token $token |
1167 | * @param stdClass $target |
1168 | * @return TokenHandlerResult |
1169 | */ |
1170 | private function renderFile( Token $token, stdClass $target ): TokenHandlerResult { |
1171 | $manager = $this->manager; |
1172 | $env = $this->env; |
1173 | |
1174 | // FIXME: Re-enable use of media cache and figure out how that fits |
1175 | // into this new processing model. See T98995 |
1176 | |
1177 | $dataParsoid = clone $token->dataParsoid; |
1178 | $dataParsoid->optList = []; |
1179 | |
1180 | // Account for the possibility of an expanded target |
1181 | $dataMw = $token->dataMw ?? new DataMw(); |
1182 | |
1183 | $opts = [ |
1184 | 'title' => [ |
1185 | 'v' => $env->makeLink( $target->title ), |
1186 | 'src' => $token->getAttributeKV( 'href' )->vsrc |
1187 | ], |
1188 | 'size' => [ |
1189 | 'v' => [ |
1190 | 'height' => null, |
1191 | 'width' => null |
1192 | ] |
1193 | ], |
1194 | // Initialize these properties to avoid isset checks |
1195 | 'caption' => null, |
1196 | 'format' => null, |
1197 | 'manualthumb' => null, |
1198 | 'class' => null |
1199 | ]; |
1200 | |
1201 | $hasExpandableOpt = false; |
1202 | |
1203 | $optKVs = self::buildLinkAttrs( $token->attribs, true, null, null )['contentKVs']; |
1204 | while ( count( $optKVs ) > 0 ) { |
1205 | $oContent = array_shift( $optKVs ); |
1206 | Assert::invariant( $oContent instanceof KV, 'bad type' ); |
1207 | |
1208 | $origOptSrc = $oContent->v; |
1209 | if ( is_array( $origOptSrc ) && count( $origOptSrc ) === 1 ) { |
1210 | $origOptSrc = $origOptSrc[0]; |
1211 | } |
1212 | |
1213 | $oText = TokenUtils::tokensToString( $origOptSrc, true, [ 'includeEntities' => true ] ); |
1214 | |
1215 | if ( !is_string( $oText ) ) { |
1216 | // Might be that this is a valid option whose value is just |
1217 | // complicated. Try to figure it out, step through all tokens. |
1218 | $maybeOText = self::stringifyOptionTokens( $oText, '', $env ); |
1219 | if ( $maybeOText !== null ) { |
1220 | $oText = $maybeOText; |
1221 | } |
1222 | } |
1223 | |
1224 | $optInfo = null; |
1225 | if ( is_string( $oText ) ) { |
1226 | if ( str_contains( $oText, '|' ) ) { |
1227 | // Split the pipe-separated string into pieces |
1228 | // and convert each one into a KV obj and add them |
1229 | // to the beginning of the array. Note that this is |
1230 | // a hack to support templates that provide multiple |
1231 | // image options as a pipe-separated string. We aren't |
1232 | // really providing editing support for this yet, or |
1233 | // ever, maybe. |
1234 | // |
1235 | // TODO(arlolra): Tables in captions suppress breaking on |
1236 | // "linkdesc" pipes so `stringifyOptionTokens` should account |
1237 | // for pipes in table cell content. For the moment, breaking |
1238 | // here is acceptable since it matches the php implementation |
1239 | // bug for bug. |
1240 | $pieces = array_map( static function ( $s ) { |
1241 | return new KV( 'mw:maybeContent', $s ); |
1242 | }, explode( '|', $oText ) ); |
1243 | $optKVs = array_merge( $pieces, $optKVs ); |
1244 | |
1245 | // Record the fact that we won't provide editing support for this. |
1246 | $dataParsoid->uneditable = true; |
1247 | continue; |
1248 | } else { |
1249 | // We're being overly accepting of media options at this point, |
1250 | // since we don't know the type yet. After the info request, |
1251 | // we'll filter out those that aren't appropriate. |
1252 | $optInfo = self::getOptionInfo( $oText, $env ); |
1253 | } |
1254 | } |
1255 | |
1256 | $recordCaption = static function () use ( $oContent, $oText, $dataParsoid, &$opts ) { |
1257 | $optsCaption = [ |
1258 | 'v' => $oContent->v, |
1259 | 'src' => $oContent->vsrc ?? $oText, |
1260 | 'srcOffsets' => $oContent->valueOffset(), |
1261 | // remember the position |
1262 | 'pos' => count( $dataParsoid->optList ) |
1263 | ]; |
1264 | // if there was a 'caption' previously, round-trip it as a |
1265 | // "bogus option". |
1266 | if ( !empty( $opts['caption'] ) ) { |
1267 | // Wrap the caption opt in an array since the option itself is an array! |
1268 | // Without the wrapping, the splicing will flatten the value. |
1269 | array_splice( $dataParsoid->optList, $opts['caption']['pos'], 0, [ [ |
1270 | 'ck' => 'bogus', |
1271 | 'ak' => $opts['caption']['src'] |
1272 | ] ] |
1273 | ); |
1274 | $optsCaption['pos']++; |
1275 | } |
1276 | $opts['caption'] = $optsCaption; |
1277 | }; |
1278 | |
1279 | // For the values of the caption and options, see |
1280 | // getOptionInfo's documentation above. |
1281 | // |
1282 | // If there are multiple captions, this code always |
1283 | // picks the last entry. This is the spec; see |
1284 | // "Image with multiple captions" parserTest. |
1285 | if ( !is_string( $oText ) || $optInfo === null || |
1286 | // Deprecated options |
1287 | in_array( $optInfo['ck'], [ 'disablecontrols' ], true ) |
1288 | ) { |
1289 | // No valid option found!? |
1290 | // Record for RT-ing |
1291 | $recordCaption(); |
1292 | continue; |
1293 | } |
1294 | |
1295 | // First option wins, the rest are 'bogus' |
1296 | // FIXME: For now, see T305628 |
1297 | if ( isset( $opts[$optInfo['ck']] ) || ( |
1298 | // All the formats are simple options with the key "format" |
1299 | // except for "manualthumb", so check if the format has been set |
1300 | in_array( $optInfo['ck'], [ 'format', 'manualthumb' ], true ) && ( |
1301 | self::getFormat( $opts ) || |
1302 | ( $this->options['extTagOpts']['suppressMediaFormats'] ?? false ) |
1303 | ) |
1304 | ) ) { |
1305 | $dataParsoid->optList[] = [ |
1306 | 'ck' => 'bogus', |
1307 | 'ak' => $optInfo['ak'] |
1308 | ]; |
1309 | continue; |
1310 | } |
1311 | |
1312 | $opt = [ |
1313 | 'ck' => $optInfo['v'], |
1314 | 'ak' => $oContent->vsrc ?? $optInfo['ak'] |
1315 | ]; |
1316 | |
1317 | if ( $optInfo['s'] === true ) { |
1318 | // Default: Simple image option |
1319 | $opts[$optInfo['ck']] = [ 'v' => $optInfo['v'] ]; |
1320 | } else { |
1321 | // Map short canonical name to the localized version used. |
1322 | $opt['ck'] = $optInfo['ck']; |
1323 | |
1324 | // The MediaWiki magic word for image dimensions is called 'width' |
1325 | // for historical reasons |
1326 | // Unlike other options, use last-specified width. |
1327 | if ( $optInfo['ck'] === 'width' ) { |
1328 | // We support a trailing 'px' here for historical reasons |
1329 | // (T15500, T53628, T207032) |
1330 | $maybeDim = Utils::parseMediaDimensions( |
1331 | $env->getSiteConfig(), $optInfo['v'], false, true |
1332 | ); |
1333 | if ( $maybeDim !== null ) { |
1334 | if ( $maybeDim['bogusPx'] ) { |
1335 | // Lint away redundant unit (T207032) |
1336 | $dataParsoid->setTempFlag( TempData::BOGUS_PX ); |
1337 | } |
1338 | $opts['size']['v'] = [ |
1339 | 'width' => Utils::validateMediaParam( $maybeDim['x'] ) ? $maybeDim['x'] : null, |
1340 | 'height' => array_key_exists( 'y', $maybeDim ) && |
1341 | Utils::validateMediaParam( $maybeDim['y'] ) ? $maybeDim['y'] : null |
1342 | ]; |
1343 | // Only round-trip a valid size |
1344 | $opts['size']['src'] = $oContent->vsrc ?? $optInfo['ak']; |
1345 | // check for duplicated options |
1346 | foreach ( $dataParsoid->optList as &$value ) { |
1347 | if ( $value['ck'] === 'width' ) { |
1348 | $value['ck'] = 'bogus'; // mark the previous definition as bogus, last one wins |
1349 | break; |
1350 | } |
1351 | } |
1352 | } else { |
1353 | $recordCaption(); |
1354 | continue; |
1355 | } |
1356 | // Lang is a global attribute and can be applied to all media elements |
1357 | // for editing and roundtripping. However, not all file handlers will |
1358 | // make use of it. This param validation is from the SVG handler but |
1359 | // seems generally applicable. |
1360 | } elseif ( $optInfo['ck'] === 'lang' && !Language::isValidInternalCode( $optInfo['v'] ) ) { |
1361 | $opt['ck'] = 'bogus'; |
1362 | } elseif ( |
1363 | $optInfo['ck'] === 'upright' && |
1364 | ( !is_numeric( $optInfo['v'] ) || $optInfo['v'] <= 0 ) |
1365 | ) { |
1366 | $opt['ck'] = 'bogus'; |
1367 | } else { |
1368 | $opts[$optInfo['ck']] = [ |
1369 | 'v' => $optInfo['v'], |
1370 | 'src' => $oContent->vsrc ?? $optInfo['ak'], |
1371 | 'srcOffsets' => $oContent->valueOffset(), |
1372 | ]; |
1373 | } |
1374 | } |
1375 | |
1376 | // Collect option in dataParsoid (becomes data-parsoid later on) |
1377 | // for faithful serialization. |
1378 | $dataParsoid->optList[] = $opt; |
1379 | |
1380 | // Collect source wikitext for image options for possible template expansion. |
1381 | $maybeOpt = !isset( self::getUsed()[$opt['ck']] ); |
1382 | $expOpt = null; |
1383 | // Links more often than not show up as arrays here because they're |
1384 | // tokenized as `autourl`. To avoid unnecessarily considering them |
1385 | // expanded, we'll use a more restrictive test, at the cost of |
1386 | // perhaps missing some edgy behaviour. |
1387 | if ( $opt['ck'] === 'link' ) { |
1388 | $expOpt = is_array( $origOptSrc ) && |
1389 | $this->hasTransclusion( $origOptSrc ); |
1390 | } else { |
1391 | $expOpt = is_array( $origOptSrc ); |
1392 | } |
1393 | if ( $maybeOpt || $expOpt ) { |
1394 | $val = []; |
1395 | if ( $expOpt ) { |
1396 | $hasExpandableOpt = true; |
1397 | $val['html'] = $origOptSrc; |
1398 | $val['srcOffsets'] = $oContent->valueOffset(); |
1399 | $val = PipelineUtils::expandAttrValueToDOM( |
1400 | $env, $manager->getFrame(), $val, |
1401 | $this->options['expandTemplates'], |
1402 | $this->options['inTemplate'] |
1403 | ); |
1404 | } |
1405 | |
1406 | // This is a bit of an abuse of the "txt" property since |
1407 | // `optInfo.v` isn't necessarily wikitext from source. |
1408 | // It's a result of the specialized stringifying above, which |
1409 | // if interpreted as wikitext upon serialization will result |
1410 | // in some (acceptable) normalization. |
1411 | // |
1412 | // We're storing these options in data-mw because they aren't |
1413 | // guaranteed to apply to all media types and we'd like to |
1414 | // avoid the need to back them out later. |
1415 | // |
1416 | // Note that the caption in the legacy parser depends on the |
1417 | // exact set of options parsed, which we aren't attempting to |
1418 | // try and replicate after fetching the media info, since we |
1419 | // consider that more of bug than a feature. It prevent anyone |
1420 | // from ever safely adding media options in the future. |
1421 | // |
1422 | // See T163582 |
1423 | if ( $maybeOpt ) { |
1424 | $val['txt'] = $optInfo['v']; |
1425 | } |
1426 | $dataMw->attribs ??= []; |
1427 | $dataMw->attribs[] = new DataMwAttrib( $opt['ck'], $val ); |
1428 | } |
1429 | } |
1430 | |
1431 | // Add the last caption in the right position if there is one |
1432 | if ( isset( $opts['caption'] ) ) { |
1433 | // Wrap the caption opt in an array since the option itself is an array! |
1434 | // Without the wrapping, the splicing will flatten the value. |
1435 | array_splice( $dataParsoid->optList, $opts['caption']['pos'], 0, [ [ |
1436 | 'ck' => 'caption', |
1437 | 'ak' => $opts['caption']['src'] |
1438 | ] ] |
1439 | ); |
1440 | } |
1441 | |
1442 | $format = self::getFormat( $opts ); |
1443 | |
1444 | // Handle image default sizes and upright option after extracting all |
1445 | // options |
1446 | if ( $format === 'framed' || $format === 'manualthumb' ) { |
1447 | // width and height is ignored for framed and manualthumb images |
1448 | // https://phabricator.wikimedia.org/T64258 |
1449 | $opts['size']['v'] = [ 'width' => null, 'height' => null ]; |
1450 | // Mark any definitions as bogus |
1451 | foreach ( $dataParsoid->optList as &$value ) { |
1452 | if ( $value['ck'] === 'width' ) { |
1453 | $value['ck'] = 'bogus'; |
1454 | } |
1455 | } |
1456 | } elseif ( $format ) { |
1457 | if ( !$opts['size']['v']['height'] && !$opts['size']['v']['width'] ) { |
1458 | $defaultWidth = $env->getSiteConfig()->widthOption(); |
1459 | if ( isset( $opts['upright'] ) ) { |
1460 | if ( $opts['upright']['v'] === 'upright' ) { // Simple option |
1461 | $defaultWidth *= 0.75; |
1462 | } else { |
1463 | $defaultWidth *= $opts['upright']['v']; |
1464 | } |
1465 | // round to nearest 10 pixels |
1466 | $defaultWidth = 10 * round( $defaultWidth / 10 ); |
1467 | } |
1468 | $opts['size']['v']['width'] = $defaultWidth; |
1469 | } |
1470 | } |
1471 | |
1472 | $rdfaType = 'mw:File'; |
1473 | |
1474 | // If the format is something we *recognize*, add the subtype |
1475 | switch ( $format ) { |
1476 | case 'manualthumb': // FIXME(T305759): Does it deserve its own type? |
1477 | case 'thumbnail': |
1478 | $rdfaType .= '/Thumb'; |
1479 | break; |
1480 | case 'framed': |
1481 | $rdfaType .= '/Frame'; |
1482 | break; |
1483 | case 'frameless': |
1484 | $rdfaType .= '/Frameless'; |
1485 | break; |
1486 | } |
1487 | |
1488 | // Tell VE that it shouldn't try to edit this |
1489 | if ( !empty( $dataParsoid->uneditable ) ) { |
1490 | $rdfaType .= ' mw:Placeholder'; |
1491 | } else { |
1492 | unset( $dataParsoid->src ); |
1493 | } |
1494 | |
1495 | $wrapperInfo = self::getWrapperInfo( $opts ); |
1496 | |
1497 | $isInline = $wrapperInfo['isInline']; |
1498 | $containerName = $isInline ? 'span' : 'figure'; |
1499 | |
1500 | $classes = $wrapperInfo['classes']; |
1501 | if ( !empty( $opts['class'] ) ) { |
1502 | PHPUtils::pushArray( $classes, explode( ' ', $opts['class']['v'] ) ); |
1503 | } |
1504 | |
1505 | $attribs = [ new KV( 'typeof', $rdfaType ) ]; |
1506 | if ( count( $classes ) > 0 ) { |
1507 | array_unshift( $attribs, new KV( 'class', implode( ' ', $classes ) ) ); |
1508 | } |
1509 | |
1510 | $container = new TagTk( $containerName, $attribs, $dataParsoid ); |
1511 | $containerClose = new EndTagTk( $containerName ); |
1512 | |
1513 | if ( $hasExpandableOpt ) { |
1514 | $container->addAttribute( 'about', $env->newAboutId() ); |
1515 | $container->addSpaceSeparatedAttribute( 'typeof', 'mw:ExpandedAttrs' ); |
1516 | } elseif ( preg_match( '/\bmw:ExpandedAttrs\b/', $token->getAttributeV( 'typeof' ) ?? '' ) ) { |
1517 | $container->addSpaceSeparatedAttribute( 'typeof', 'mw:ExpandedAttrs' ); |
1518 | } |
1519 | |
1520 | $span = new TagTk( 'span', [ new KV( 'class', 'mw-file-element mw-broken-media' ) ] ); |
1521 | |
1522 | // "resource" and "lang" are allowed attributes on spans |
1523 | $span->addNormalizedAttribute( 'resource', $opts['title']['v'], $opts['title']['src'] ); |
1524 | if ( isset( $opts['lang'] ) ) { |
1525 | $span->addNormalizedAttribute( 'lang', $opts['lang']['v'], $opts['lang']['src'] ); |
1526 | } |
1527 | |
1528 | // Token's KV attributes only accept strings, Tokens or arrays of those. |
1529 | $size = $opts['size']['v']; |
1530 | if ( !empty( $size['width'] ) ) { |
1531 | $span->addAttribute( 'data-width', (string)$size['width'] ); |
1532 | } |
1533 | if ( !empty( $size['height'] ) ) { |
1534 | $span->addAttribute( 'data-height', (string)$size['height'] ); |
1535 | } |
1536 | |
1537 | $anchor = new TagTk( 'a' ); |
1538 | $anchor->setAttribute( 'href', $this->specialFilePath( $target->title ) ); |
1539 | |
1540 | $tokens = [ |
1541 | $container, |
1542 | $anchor, |
1543 | $span, |
1544 | $target->title->getPrefixedText(), |
1545 | new EndTagTk( 'span' ), |
1546 | new EndTagTk( 'a' ) |
1547 | ]; |
1548 | |
1549 | $optsCaption = $opts['caption'] ?? null; |
1550 | if ( $isInline ) { |
1551 | if ( $optsCaption ) { |
1552 | if ( !is_array( $optsCaption['v'] ) ) { |
1553 | $opts['caption']['v'] = $optsCaption['v'] = [ $optsCaption['v'] ]; |
1554 | } |
1555 | // Parse the caption |
1556 | $captionDOM = PipelineUtils::processContentInPipeline( |
1557 | $this->env, |
1558 | $this->manager->getFrame(), |
1559 | array_merge( $optsCaption['v'], [ new EOFTk() ] ), |
1560 | [ |
1561 | 'pipelineType' => 'expanded-tokens-to-fragment', |
1562 | 'pipelineOpts' => [ |
1563 | 'inlineContext' => true, |
1564 | 'expandTemplates' => $this->options['expandTemplates'], |
1565 | 'inTemplate' => $this->options['inTemplate'] |
1566 | ], |
1567 | 'srcOffsets' => $optsCaption['srcOffsets'] ?? null, |
1568 | 'sol' => true |
1569 | ] |
1570 | ); |
1571 | |
1572 | // Use parsed DOM given in `captionDOM` |
1573 | // FIXME: Does this belong in `dataMw.attribs`? |
1574 | // FIXME: This should use a rich attribute with a fragment type |
1575 | $dataMw->caption = ContentUtils::ppToXML( $captionDOM, [ |
1576 | 'innerXML' => true, |
1577 | 'fragment' => true, |
1578 | ] ); |
1579 | } |
1580 | } else { |
1581 | // We always add a figcaption for blocks |
1582 | $tsr = $optsCaption['srcOffsets'] ?? null; |
1583 | $dp = new DataParsoid; |
1584 | $dp->tsr = $tsr; |
1585 | $tokens[] = new TagTk( 'figcaption', [], $dp ); |
1586 | if ( $optsCaption ) { |
1587 | if ( is_string( $optsCaption['v'] ) ) { |
1588 | $tokens[] = $optsCaption['v']; |
1589 | } else { |
1590 | $tokens[] = PipelineUtils::getDOMFragmentToken( |
1591 | $optsCaption['v'], |
1592 | $tsr, |
1593 | [ 'inlineContext' => true, 'token' => $token ] |
1594 | ); |
1595 | } |
1596 | } |
1597 | $tokens[] = new EndTagTk( 'figcaption' ); |
1598 | } |
1599 | |
1600 | if ( !$dataMw->isEmpty() ) { |
1601 | $container->dataMw = $dataMw; |
1602 | } |
1603 | |
1604 | $tokens[] = $containerClose; |
1605 | return new TokenHandlerResult( $tokens ); |
1606 | } |
1607 | |
1608 | private function specialFilePath( Title $title ): string { |
1609 | $filePath = Sanitizer::sanitizeTitleURI( $title->getDBkey(), false ); |
1610 | return "./Special:FilePath/{$filePath}"; |
1611 | } |
1612 | |
1613 | /** |
1614 | * @param Token $token |
1615 | * @param stdClass $target |
1616 | * @param list<DataMwError> $errs |
1617 | * @param ?array{url?:string} $info |
1618 | * @return TokenHandlerResult |
1619 | */ |
1620 | private function linkToMedia( Token $token, stdClass $target, array $errs, ?array $info ): TokenHandlerResult { |
1621 | // Only pass in the url, since media links should not link to the thumburl |
1622 | $imgHref = $info['url'] ?? $this->specialFilePath( $target->title ); // Copied from getPath |
1623 | $imgHrefFileName = preg_replace( '#.*/#', '', $imgHref, 1 ); |
1624 | |
1625 | $link = new TagTk( 'a' ); |
1626 | |
1627 | try { |
1628 | $content = $this->addLinkAttributesAndGetContent( $link, $token, $target ); |
1629 | } catch ( InternalException $e ) { |
1630 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
1631 | } |
1632 | |
1633 | // Change the rel to be mw:MediaLink |
1634 | $link->getAttributeKV( 'rel' )->v = 'mw:MediaLink'; |
1635 | |
1636 | $link->addNormalizedAttribute( 'href', $imgHref, $target->hrefSrc ); |
1637 | |
1638 | // html2wt will use the resource rather than try to parse the href. |
1639 | $link->addNormalizedAttribute( |
1640 | 'resource', |
1641 | $this->env->makeLink( $target->title ), |
1642 | $target->hrefSrc |
1643 | ); |
1644 | |
1645 | // Normalize title according to how PHP parser does it currently |
1646 | $link->setAttribute( 'title', str_replace( '_', ' ', $imgHrefFileName ) ); |
1647 | |
1648 | if ( count( $errs ) > 0 ) { |
1649 | // Set RDFa type to mw:Error so VE and other clients |
1650 | // can use this to do client-specific action on these. |
1651 | if ( !TokenUtils::hasTypeOf( $link, 'mw:Error' ) ) { |
1652 | $link->addSpaceSeparatedAttribute( 'typeof', 'mw:Error' ); |
1653 | } |
1654 | |
1655 | // Update data-mw |
1656 | $dataMw = $token->dataMw ?? new DataMw; |
1657 | if ( is_array( $dataMw->errors ?? null ) ) { |
1658 | array_push( $dataMw->errors, ...$errs ); |
1659 | } else { |
1660 | $dataMw->errors = $errs; |
1661 | } |
1662 | $link->dataMw = $dataMw; |
1663 | } |
1664 | |
1665 | $tokens = array_merge( [ $link ], $content, [ new EndTagTk( 'a' ) ] ); |
1666 | |
1667 | return new TokenHandlerResult( $tokens ); |
1668 | } |
1669 | |
1670 | // FIXME: The media request here is only used to determine if this is a |
1671 | // redlink and deserves to be handling in the redlink post-processing pass. |
1672 | |
1673 | /** |
1674 | * @param Token $token |
1675 | * @param stdClass $target |
1676 | * @return TokenHandlerResult |
1677 | */ |
1678 | private function renderMedia( Token $token, stdClass $target ): TokenHandlerResult { |
1679 | $env = $this->env; |
1680 | $title = $target->title; |
1681 | $errs = []; |
1682 | $info = $env->getDataAccess()->getFileInfo( |
1683 | $env->getPageConfig(), |
1684 | [ [ $title->getDBkey(), [ 'height' => null, 'width' => null ] ] ] |
1685 | )[0]; |
1686 | if ( !$info ) { |
1687 | $errs[] = new DataMwError( 'apierror-filedoesnotexist', [], 'This image does not exist.' ); |
1688 | } elseif ( isset( $info['thumberror'] ) ) { |
1689 | $errs[] = new DataMwError( 'apierror-unknownerror', [], $info['thumberror'] ); |
1690 | } |
1691 | return $this->linkToMedia( $token, $target, $errs, $info ); |
1692 | } |
1693 | |
1694 | /** @inheritDoc */ |
1695 | public function onTag( Token $token ): ?TokenHandlerResult { |
1696 | switch ( $token->getName() ) { |
1697 | case 'wikilink': |
1698 | return $this->onWikiLink( $token ); |
1699 | case 'mw:redirect': |
1700 | return $this->onRedirect( $token ); |
1701 | default: |
1702 | return null; |
1703 | } |
1704 | } |
1705 | } |