Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
4.47% |
34 / 760 |
|
0.00% |
0 / 25 |
CRAP | |
0.00% |
0 / 1 |
WikiLinkHandler | |
4.47% |
34 / 760 |
|
0.00% |
0 / 25 |
46746.00 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
hrefParts | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getWikiLinkTargetInfo | |
55.74% |
34 / 61 |
|
0.00% |
0 / 1 |
38.20 | |||
onRedirect | |
0.00% |
0 / 34 |
|
0.00% |
0 / 1 |
30 | |||
bailTokens | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
6 | |||
onWikiLink | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
56 | |||
wikiLinkHandler | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
156 | |||
buildLinkAttrs | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
156 | |||
addLinkAttributesAndGetContent | |
0.00% |
0 / 65 |
|
0.00% |
0 / 1 |
756 | |||
renderWikiLink | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
renderCategory | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
42 | |||
renderLanguageLink | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
30 | |||
renderInterwikiLink | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
30 | |||
getWrapperInfo | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
72 | |||
getOptionInfo | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
42 | |||
isWikitextOpt | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
stringifyOptionTokens | |
0.00% |
0 / 63 |
|
0.00% |
0 / 1 |
650 | |||
getFormat | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getUsed | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
6 | |||
hasTransclusion | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
renderFile | |
0.00% |
0 / 242 |
|
0.00% |
0 / 1 |
4290 | |||
specialFilePath | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
linkToMedia | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
30 | |||
renderMedia | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
onTag | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | /** |
5 | * Simple link handler. |
6 | * |
7 | * TODO: keep round-trip information in meta tag or the like |
8 | */ |
9 | |
10 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
11 | |
12 | use stdClass; |
13 | use Wikimedia\Assert\Assert; |
14 | use Wikimedia\Parsoid\Config\Env; |
15 | use Wikimedia\Parsoid\Core\DomSourceRange; |
16 | use Wikimedia\Parsoid\Core\InternalException; |
17 | use Wikimedia\Parsoid\Core\Sanitizer; |
18 | use Wikimedia\Parsoid\Language\Language; |
19 | use Wikimedia\Parsoid\NodeData\DataMw; |
20 | use Wikimedia\Parsoid\NodeData\DataMwAttrib; |
21 | use Wikimedia\Parsoid\NodeData\DataMwError; |
22 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
23 | use Wikimedia\Parsoid\NodeData\TempData; |
24 | use Wikimedia\Parsoid\Tokens\EndTagTk; |
25 | use Wikimedia\Parsoid\Tokens\EOFTk; |
26 | use Wikimedia\Parsoid\Tokens\KV; |
27 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
28 | use Wikimedia\Parsoid\Tokens\SourceRange; |
29 | use Wikimedia\Parsoid\Tokens\TagTk; |
30 | use Wikimedia\Parsoid\Tokens\Token; |
31 | use Wikimedia\Parsoid\Utils\ContentUtils; |
32 | use Wikimedia\Parsoid\Utils\DOMCompat; |
33 | use Wikimedia\Parsoid\Utils\DOMUtils; |
34 | use Wikimedia\Parsoid\Utils\PHPUtils; |
35 | use Wikimedia\Parsoid\Utils\PipelineUtils; |
36 | use Wikimedia\Parsoid\Utils\Title; |
37 | use Wikimedia\Parsoid\Utils\TitleException; |
38 | use Wikimedia\Parsoid\Utils\TokenUtils; |
39 | use Wikimedia\Parsoid\Utils\Utils; |
40 | use Wikimedia\Parsoid\Wikitext\Consts; |
41 | use Wikimedia\Parsoid\Wt2Html\PegTokenizer; |
42 | use Wikimedia\Parsoid\Wt2Html\TokenTransformManager; |
43 | |
44 | class WikiLinkHandler extends TokenHandler { |
45 | /** |
46 | * @var PegTokenizer |
47 | */ |
48 | private $urlParser; |
49 | |
50 | /** @inheritDoc */ |
51 | public function __construct( TokenTransformManager $manager, array $options ) { |
52 | parent::__construct( $manager, $options ); |
53 | |
54 | // Create a new peg parser for image options. |
55 | if ( !$this->urlParser ) { |
56 | // Actually the regular tokenizer, but we'll call it with the |
57 | // url rule only. |
58 | $this->urlParser = new PegTokenizer( $this->env ); |
59 | } |
60 | } |
61 | |
62 | private static function hrefParts( string $str ): ?array { |
63 | if ( preg_match( '/^([^:]+):(.*)$/D', $str, $matches ) ) { |
64 | return [ 'prefix' => $matches[1], 'title' => $matches[2] ]; |
65 | } else { |
66 | return null; |
67 | } |
68 | } |
69 | |
70 | /** |
71 | * Normalize and analyze a wikilink target. |
72 | * |
73 | * Returns an object containing |
74 | * - href: The expanded target string |
75 | * - hrefSrc: The original target wikitext |
76 | * - title: A title object *or* |
77 | * - language: An interwikiInfo object *or* |
78 | * - interwiki: An interwikiInfo object. |
79 | * - localprefix: Set if the link had a localinterwiki prefix (or prefixes) |
80 | * - fromColonEscapedText: Target was colon-escaped ([[:en:foo]]) |
81 | * - prefix: The original namespace or language/interwiki prefix without a |
82 | * colon escape. |
83 | * |
84 | * @param Token $token |
85 | * @param string $href |
86 | * @param string $hrefSrc |
87 | * @return stdClass The target info. |
88 | * @throws InternalException |
89 | */ |
90 | private function getWikiLinkTargetInfo( Token $token, string $href, string $hrefSrc ): stdClass { |
91 | $env = $this->env; |
92 | $siteConfig = $env->getSiteConfig(); |
93 | $info = (object)[ |
94 | 'href' => $href, |
95 | 'hrefSrc' => $hrefSrc, |
96 | // Initialize these properties to avoid isset checks |
97 | 'interwiki' => null, |
98 | 'language' => null, |
99 | 'localprefix' => null, |
100 | 'fromColonEscapedText' => null |
101 | ]; |
102 | |
103 | if ( ( ltrim( $info->href )[0] ?? '' ) === ':' ) { |
104 | $info->fromColonEscapedText = true; |
105 | // Remove the colon escape |
106 | $info->href = substr( ltrim( $info->href ), 1 ); |
107 | } |
108 | if ( ( $info->href[0] ?? '' ) === ':' ) { |
109 | if ( $env->linting( 'multi-colon-escape' ) ) { |
110 | $lint = [ |
111 | 'dsr' => DomSourceRange::fromTsr( $token->dataParsoid->tsr ), |
112 | 'params' => [ 'href' => ':' . $info->href ], |
113 | 'templateInfo' => null |
114 | ]; |
115 | if ( $this->options['inTemplate'] ) { |
116 | // Match Linter.findEnclosingTemplateName(), by first |
117 | // converting the title to an href using env.makeLink |
118 | $name = PHPUtils::stripPrefix( |
119 | $env->makeLink( $this->manager->getFrame()->getTitle() ), |
120 | './' |
121 | ); |
122 | $lint['templateInfo'] = [ 'name' => $name ]; |
123 | // TODO(arlolra): Pass tsr info to the frame |
124 | $lint['dsr'] = new DomSourceRange( 0, 0, null, null ); |
125 | } |
126 | $env->recordLint( 'multi-colon-escape', $lint ); |
127 | } |
128 | // This will get caught by the caller, and mark the target as invalid |
129 | throw new InternalException( 'Multiple colons prefixing href.' ); |
130 | } |
131 | |
132 | $title = $env->resolveTitle( Utils::decodeURIComponent( $info->href ) ); |
133 | $hrefBits = self::hrefParts( $info->href ); |
134 | if ( $hrefBits ) { |
135 | $nsPrefix = $hrefBits['prefix']; |
136 | $info->prefix = $nsPrefix; |
137 | $nnn = Utils::normalizeNamespaceName( trim( $nsPrefix ) ); |
138 | $interwikiInfo = $siteConfig->interwikiMapNoNamespaces()[$nnn] ?? null; |
139 | // check for interwiki / language links |
140 | $ns = $siteConfig->namespaceId( $nnn ); |
141 | // also check for url to protect against [[constructor:foo]] |
142 | if ( $ns !== null ) { |
143 | $info->title = $env->makeTitleFromURLDecodedStr( $title ); |
144 | } elseif ( isset( $interwikiInfo['localinterwiki'] ) ) { |
145 | if ( $hrefBits['title'] === '' ) { |
146 | // Empty title => main page (T66167) |
147 | $info->title = Title::newFromLinkTarget( |
148 | $siteConfig->mainPageLinkTarget(), $siteConfig |
149 | ); |
150 | } else { |
151 | $info->href = str_contains( $hrefBits['title'], ':' ) |
152 | ? ':' . $hrefBits['title'] : $hrefBits['title']; |
153 | // Recurse! |
154 | $info = $this->getWikiLinkTargetInfo( $token, $info->href, $info->hrefSrc ); |
155 | $info->localprefix = $nsPrefix . |
156 | ( $info->localprefix ? ( ':' . $info->localprefix ) : '' ); |
157 | } |
158 | } elseif ( !empty( $interwikiInfo['url'] ) ) { |
159 | $info->href = $hrefBits['title']; |
160 | // Ensure a valid title and store it for later use. |
161 | // (don't store as $info->title because that signals a wikilink) |
162 | $interwikiInfo['title'] = $env->makeTitleFromURLDecodedStr( $title ); |
163 | // Interwiki or language link? If no language info, or if it starts |
164 | // with an explicit ':' (like [[:en:Foo]]), it's not a language link. |
165 | if ( $info->fromColonEscapedText || |
166 | ( !isset( $interwikiInfo['language'] ) && !isset( $interwikiInfo['extralanglink'] ) ) |
167 | ) { |
168 | // An interwiki link. |
169 | $info->interwiki = $interwikiInfo; |
170 | // Remove the colon escape after an interwiki prefix |
171 | if ( ( ltrim( $info->href )[0] ?? '' ) === ':' ) { |
172 | $info->href = substr( ltrim( $info->href ), 1 ); |
173 | } |
174 | } else { |
175 | // A language link. |
176 | $info->language = $interwikiInfo; |
177 | } |
178 | } else { |
179 | $info->title = $env->makeTitleFromURLDecodedStr( $title ); |
180 | } |
181 | } else { |
182 | $info->title = $env->makeTitleFromURLDecodedStr( $title ); |
183 | } |
184 | |
185 | return $info; |
186 | } |
187 | |
188 | /** |
189 | * Handle mw:redirect tokens |
190 | * |
191 | * @param Token $token |
192 | * @return TokenHandlerResult |
193 | * @throws InternalException |
194 | */ |
195 | private function onRedirect( Token $token ): TokenHandlerResult { |
196 | // Avoid duplicating the link-processing code by invoking the |
197 | // standard onWikiLink handler on the embedded link, intercepting |
198 | // the generated tokens using the callback mechanism, reading |
199 | // the href from the result, and then creating a |
200 | // <link rel="mw:PageProp/redirect"> token from it. |
201 | |
202 | $rlink = new SelfclosingTagTk( 'link', |
203 | Utils::clone( $token->attribs ), |
204 | clone $token->dataParsoid, |
205 | $token->dataMw ? clone $token->dataMw : null ); |
206 | $wikiLinkTk = $rlink->dataParsoid->linkTk; |
207 | $rlink->setAttribute( 'rel', 'mw:PageProp/redirect' ); |
208 | |
209 | // Remove the nested wikiLinkTk token and the cloned href attribute |
210 | unset( $rlink->dataParsoid->linkTk ); |
211 | $rlink->removeAttribute( 'href' ); |
212 | |
213 | // Transfer href attribute back to wikiLinkTk, since it may have been |
214 | // template-expanded in the pipeline prior to this point. |
215 | $wikiLinkTk->attribs = Utils::clone( $token->attribs ); |
216 | |
217 | // Set "redirect" attribute on the wikilink token to indicate that |
218 | // image and category links should be handled as plain links. |
219 | $wikiLinkTk->setAttribute( 'redirect', 'true' ); |
220 | |
221 | // Render the wikilink (including interwiki links, etc) then collect |
222 | // the resulting href and transfer it to rlink. |
223 | $r = $this->onWikiLink( $wikiLinkTk ); |
224 | $firstToken = ( $r->tokens[0] ?? null ); |
225 | $isValid = $firstToken instanceof Token && |
226 | in_array( $firstToken->getName(), [ 'a', 'link' ], true ); |
227 | if ( $isValid ) { |
228 | $da = $r->tokens[0]->dataParsoid; |
229 | $rlink->addNormalizedAttribute( 'href', $da->a['href'], $da->sa['href'] ); |
230 | return new TokenHandlerResult( [ $rlink ] ); |
231 | } else { |
232 | // Bail! Emit tokens as if they were parsed as a list item: |
233 | // #REDIRECT.... |
234 | $src = $rlink->dataParsoid->src; |
235 | $tsr = $rlink->dataParsoid->tsr; |
236 | preg_match( '/^([^#]*)(#)/', $src, $srcMatch ); |
237 | $ntokens = strlen( $srcMatch[1] ) ? [ $srcMatch[1] ] : []; |
238 | $hashPos = $tsr->start + strlen( $srcMatch[1] ); |
239 | $tsr0 = new SourceRange( $hashPos, $hashPos + 1 ); |
240 | $dp = new DataParsoid; |
241 | $dp->tsr = $tsr0; |
242 | $li = new TagTk( |
243 | 'listItem', |
244 | [ new KV( 'bullets', [ '#' ], $tsr0->expandTsrV() ) ], |
245 | $dp ); |
246 | $ntokens[] = $li; |
247 | $ntokens[] = substr( $src, strlen( $srcMatch[0] ) ); |
248 | PHPUtils::pushArray( $ntokens, $r->tokens ); |
249 | return new TokenHandlerResult( $ntokens ); |
250 | } |
251 | } |
252 | |
253 | public static function bailTokens( TokenTransformManager $manager, Token $token ): array { |
254 | $frame = $manager->getFrame(); |
255 | $tsr = $token->dataParsoid->tsr; |
256 | $frameSrc = $frame->getSrcText(); |
257 | $linkSrc = $tsr->substr( $frameSrc ); |
258 | $src = substr( $linkSrc, 1 ); |
259 | if ( $src === false ) { |
260 | $manager->getEnv()->log( |
261 | 'error', 'Unable to determine link source.', |
262 | "frame: $frameSrc", 'tsr: ', $tsr, |
263 | "link: $linkSrc" |
264 | ); |
265 | return [ $linkSrc ]; // Forget about trying to tokenize this |
266 | } |
267 | $startOffset = $tsr->start + 1; |
268 | $toks = PipeLineUtils::processContentInPipeline( |
269 | $manager->getEnv(), $frame, $src, [ |
270 | 'sol' => false, |
271 | 'pipelineType' => 'wikitext-to-expanded-tokens', |
272 | 'srcOffsets' => new SourceRange( $startOffset, $startOffset + strlen( $src ) ), |
273 | 'pipelineOpts' => [ |
274 | 'expandTemplates' => $manager->getOptions()['expandTemplates'], |
275 | 'inTemplate' => $manager->getOptions()['inTemplate'], |
276 | ], |
277 | ] |
278 | ); |
279 | TokenUtils::stripEOFTkfromTokens( $toks ); |
280 | return array_merge( [ '[' ], $toks ); |
281 | } |
282 | |
283 | /** |
284 | * Handle a mw:WikiLink token. |
285 | * |
286 | * @param Token $token |
287 | * @return TokenHandlerResult |
288 | * @throws InternalException |
289 | */ |
290 | private function onWikiLink( Token $token ): TokenHandlerResult { |
291 | $env = $this->env; |
292 | $hrefKV = $token->getAttributeKV( 'href' ); |
293 | $hrefTokenStr = TokenUtils::tokensToString( $hrefKV->v ); |
294 | |
295 | // Don't allow internal links to pages containing PROTO: |
296 | // See Parser::handleInternalLinks2() |
297 | if ( $env->getSiteConfig()->hasValidProtocol( $hrefTokenStr ) ) { |
298 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
299 | } |
300 | |
301 | // Xmlish tags in title position are invalid. Not according to the |
302 | // preprocessor ABNF but at later stages in the legacy parser, |
303 | // namely handleInternalLinks. |
304 | if ( is_array( $hrefKV->v ) ) { |
305 | // Use the expanded attr instead of trying to unpackDOMFragments |
306 | // since the fragment will have been released when expanding to DOM |
307 | $expandedVal = $token->fetchExpandedAttrValue( 'href' ); |
308 | $expandedDom = DOMUtils::parseHTML( $expandedVal ?? '' ); |
309 | foreach ( DOMCompat::querySelectorAll( $expandedDom, '[typeof]' ) as $el ) { |
310 | if ( DOMUtils::matchTypeOf( $el, '#^mw:(Nowiki|Extension|DOMFragment/sealed)#' ) !== null ) { |
311 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
312 | } |
313 | } |
314 | } |
315 | |
316 | // First check if the expanded href contains a pipe. |
317 | if ( str_contains( $hrefTokenStr, '|' ) ) { |
318 | // It does. This 'href' was templated and also returned other |
319 | // parameters separated by a pipe. We don't have any sensible way to |
320 | // handle such a construct currently, so prevent people from editing |
321 | // it. See T226523 |
322 | // TODO: add useful debugging info for editors ('if you would like to |
323 | // make this content editable, then fix template X..') |
324 | // TODO: also check other parameters for pipes! |
325 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
326 | } |
327 | |
328 | $target = null; |
329 | try { |
330 | $target = $this->getWikiLinkTargetInfo( $token, $hrefTokenStr, $hrefKV->vsrc ); |
331 | } catch ( TitleException | InternalException $e ) { |
332 | // Invalid title |
333 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
334 | } |
335 | |
336 | // Ok, it looks like we have a sensible href. Figure out which handler to use. |
337 | $isRedirect = (bool)$token->getAttributeV( 'redirect' ); |
338 | return $this->wikiLinkHandler( $token, $target, $isRedirect ); |
339 | } |
340 | |
341 | /** |
342 | * Figure out which handler to use to render a given WikiLink token. Override |
343 | * this method to add new handlers or swap out existing handlers based on the |
344 | * target structure. |
345 | * |
346 | * @param Token $token |
347 | * @param stdClass $target |
348 | * @param bool $isRedirect |
349 | * @return TokenHandlerResult |
350 | * @throws InternalException |
351 | */ |
352 | private function wikiLinkHandler( |
353 | Token $token, stdClass $target, bool $isRedirect |
354 | ): TokenHandlerResult { |
355 | $title = $target->title ?? null; |
356 | if ( $title ) { |
357 | if ( $isRedirect ) { |
358 | return $this->renderWikiLink( $token, $target ); |
359 | } |
360 | $siteConfig = $this->env->getSiteConfig(); |
361 | $nsId = $title->getNamespace(); |
362 | if ( $nsId === $siteConfig->canonicalNamespaceId( 'media' ) ) { |
363 | // Render as a media link. |
364 | return $this->renderMedia( $token, $target ); |
365 | } |
366 | if ( |
367 | !$target->fromColonEscapedText && |
368 | // Protect from purely fragment links on pages in these namespaces |
369 | ( $target->href[0] ?? '' ) !== '#' |
370 | ) { |
371 | if ( $nsId === $siteConfig->canonicalNamespaceId( 'file' ) ) { |
372 | // Render as a file. |
373 | return $this->renderFile( $token, $target ); |
374 | } |
375 | if ( $nsId === $siteConfig->canonicalNamespaceId( 'category' ) ) { |
376 | // Render as a category membership. |
377 | return $this->renderCategory( $token, $target ); |
378 | } |
379 | } |
380 | |
381 | // Render as plain wiki links. |
382 | return $this->renderWikiLink( $token, $target ); |
383 | } |
384 | |
385 | // language and interwiki links |
386 | if ( $target->interwiki ) { |
387 | return $this->renderInterwikiLink( $token, $target ); |
388 | } |
389 | if ( $target->language ) { |
390 | $ns = $this->env->getContextTitle()->getNamespace(); |
391 | $noLanguageLinks = $this->env->getSiteConfig()->namespaceIsTalk( $ns ) || |
392 | !$this->env->getSiteConfig()->interwikiMagic(); |
393 | if ( $noLanguageLinks ) { |
394 | $target->interwiki = $target->language; |
395 | return $this->renderInterwikiLink( $token, $target ); |
396 | } |
397 | |
398 | return $this->renderLanguageLink( $token, $target ); |
399 | } |
400 | |
401 | // Neither a title, nor a language or interwiki. Should not happen. |
402 | throw new InternalException( 'Unknown link type' ); |
403 | } |
404 | |
405 | /** ------------------------------------------------------------ |
406 | * This (overloaded) function does three different things: |
407 | * - Extracts link text from attrs (when k === "mw:maybeContent"). |
408 | * As a performance micro-opt, only does if asked to (getLinkText) |
409 | * - Updates existing rdfa type with an additional rdf-type, |
410 | * if one is provided (rdfaType) |
411 | * - Collates about, typeof, and linkAttrs into a new attr. array |
412 | * |
413 | * @param array $attrs |
414 | * @param bool $getLinkText |
415 | * @param ?string $rdfaType |
416 | * @param ?array $linkAttrs |
417 | * @return array |
418 | */ |
419 | public static function buildLinkAttrs( |
420 | array $attrs, bool $getLinkText, ?string $rdfaType, |
421 | ?array $linkAttrs |
422 | ): array { |
423 | $newAttrs = []; |
424 | $linkTextKVs = []; |
425 | $about = null; |
426 | |
427 | // In one pass through the attribute array, fetch about, typeof, and linkText |
428 | // |
429 | // about && typeof are usually at the end of the array if at all present |
430 | foreach ( $attrs as $kv ) { |
431 | $k = $kv->k; |
432 | $v = $kv->v; |
433 | |
434 | // link-text attrs have the key "maybeContent" |
435 | if ( $getLinkText && $k === 'mw:maybeContent' ) { |
436 | $linkTextKVs[] = $kv; |
437 | } elseif ( is_string( $k ) && $k ) { |
438 | if ( trim( $k ) === 'typeof' ) { |
439 | $rdfaType = $rdfaType ? $rdfaType . ' ' . $v : $v; |
440 | } elseif ( trim( $k ) === 'about' ) { |
441 | $about = $v; |
442 | } |
443 | } |
444 | } |
445 | |
446 | if ( $rdfaType ) { |
447 | $newAttrs[] = new KV( 'typeof', $rdfaType ); |
448 | } |
449 | |
450 | if ( $about ) { |
451 | $newAttrs[] = new KV( 'about', $about ); |
452 | } |
453 | |
454 | if ( $linkAttrs ) { |
455 | PHPUtils::pushArray( $newAttrs, $linkAttrs ); |
456 | } |
457 | |
458 | return [ |
459 | 'attribs' => $newAttrs, |
460 | 'contentKVs' => $linkTextKVs, |
461 | 'hasRdfaType' => $rdfaType !== null |
462 | ]; |
463 | } |
464 | |
465 | /** |
466 | * Generic wiki link attribute setup on a passed-in new token based on the |
467 | * wikilink token and target. As a side effect, this method also extracts the |
468 | * link content tokens and returns them. |
469 | * |
470 | * @param Token $newTk |
471 | * @param Token $token |
472 | * @param stdClass $target |
473 | * @param bool $buildDOMFragment |
474 | * @return array |
475 | * @throws InternalException |
476 | */ |
477 | private function addLinkAttributesAndGetContent( |
478 | Token $newTk, Token $token, stdClass $target, bool $buildDOMFragment = false |
479 | ): array { |
480 | $attribs = $token->attribs; |
481 | $dataParsoid = $token->dataParsoid; |
482 | $dataMw = $token->dataMw; |
483 | $newAttrData = self::buildLinkAttrs( $attribs, true, null, [ new KV( 'rel', 'mw:WikiLink' ) ] ); |
484 | $content = $newAttrData['contentKVs']; |
485 | $env = $this->env; |
486 | |
487 | // Set attribs and dataParsoid |
488 | $newTk->attribs = $newAttrData['attribs']; |
489 | $newTk->dataParsoid = clone $dataParsoid; |
490 | $newTk->dataMw = $dataMw !== null ? clone $dataMw : null; |
491 | unset( $newTk->dataParsoid->src ); // clear src string since we can serialize this |
492 | |
493 | // Note: Link tails are handled on the DOM in handleLinkNeighbours, so no |
494 | // need to handle them here. |
495 | $l = count( $content ); |
496 | if ( $l > 0 ) { |
497 | $newTk->dataParsoid->stx = 'piped'; |
498 | $out = []; |
499 | // re-join content bits |
500 | foreach ( $content as $i => $kv ) { |
501 | $toks = $kv->v; |
502 | // since this is already a link, strip autolinks from content |
503 | // FIXME: Maybe add a stop in the grammar so that autolinks |
504 | // aren't tokenized in link content to begin with? |
505 | if ( !is_array( $toks ) ) { |
506 | $toks = [ $toks ]; |
507 | } |
508 | |
509 | $toks = array_values( array_filter( $toks, static function ( $t ) { |
510 | return $t !== ''; |
511 | } ) ); |
512 | $n = count( $toks ); |
513 | foreach ( $toks as $j => $t ) { |
514 | // Bail on media-syntax in wikilink-syntax scenarios, |
515 | // since the legacy parser explodes on [[, last one wins. |
516 | // Note that without this, anchors tags in media output |
517 | // will be stripped and we won't have the right structure |
518 | // when we get to the dom pass to add media info. |
519 | if ( |
520 | $t instanceof TagTk && |
521 | ( $t->getName() === 'figure' || $t->getName() === 'span' ) && |
522 | TokenUtils::matchTypeOf( $t, '#^mw:File($|/)#D' ) !== null |
523 | ) { |
524 | throw new InternalException( 'Media-in-link' ); |
525 | } |
526 | |
527 | if ( $t instanceof TagTk && $t->getName() === 'a' ) { |
528 | // Bail on wikilink-syntax in wiklink-syntax scenarios, |
529 | // since the legacy parser explodes on [[, last one wins |
530 | if ( |
531 | preg_match( |
532 | '#^mw:WikiLink(/Interwiki)?$#D', |
533 | $t->getAttributeV( 'rel' ) ?? '' |
534 | ) && |
535 | // ISBN links don't use wikilink-syntax but still |
536 | // get the same "rel", so should be ignored |
537 | ( $t->dataParsoid->stx ?? '' ) !== 'magiclink' |
538 | ) { |
539 | throw new InternalException( 'Link-in-link' ); |
540 | } |
541 | if ( $j + 1 < $n && $toks[$j + 1] instanceof EndTagTk && |
542 | $toks[$j + 1]->getName() === 'a' |
543 | ) { |
544 | // autonumbered links in the stream get rendered |
545 | // as an <a> tag with no content -- but these ought |
546 | // to be treated as plaintext since we don't allow |
547 | // nested links. |
548 | $out[] = '[' . $t->getAttributeV( 'href' ) . ']'; |
549 | } |
550 | // suppress <a> |
551 | continue; |
552 | } |
553 | |
554 | if ( $t instanceof EndTagTk && $t->getName() === 'a' ) { |
555 | continue; // suppress </a> |
556 | } |
557 | |
558 | $out[] = $t; |
559 | } |
560 | if ( $i < $l - 1 ) { |
561 | $out[] = '|'; |
562 | } |
563 | } |
564 | |
565 | if ( $buildDOMFragment ) { |
566 | // content = [part 0, .. part l-1] |
567 | // offsets = [start(part-0), end(part l-1)] |
568 | $offsets = isset( $dataParsoid->tsr ) ? |
569 | new SourceRange( $content[0]->srcOffsets->value->start, |
570 | $content[$l - 1]->srcOffsets->value->end ) : null; |
571 | $content = [ PipelineUtils::getDOMFragmentToken( $out, $offsets, |
572 | [ 'inlineContext' => true, 'token' => $token ] ) ]; |
573 | } else { |
574 | $content = $out; |
575 | } |
576 | } else { |
577 | $newTk->dataParsoid->stx = 'simple'; |
578 | $morecontent = Utils::decodeURIComponent( $target->href ); |
579 | |
580 | // Try to match labeling in core |
581 | if ( $env->getSiteConfig()->namespaceHasSubpages( |
582 | $env->getContextTitle()->getNamespace() |
583 | ) ) { |
584 | // subpage links with a trailing slash get the trailing slashes stripped. |
585 | // See https://gerrit.wikimedia.org/r/173431 |
586 | if ( preg_match( '#^((\.\./)+|/)(?!\.\./)(.*?[^/])/+$#D', $morecontent, $match ) ) { |
587 | $morecontent = $match[3]; |
588 | } elseif ( str_starts_with( $morecontent, '../' ) ) { |
589 | // Subpages on interwiki / language links aren't valid, |
590 | // so $target->title should always be present here |
591 | $morecontent = $target->title->getPrefixedText(); |
592 | } |
593 | } |
594 | |
595 | // for interwiki links, include the interwiki prefix in the link text |
596 | if ( $target->interwiki ) { |
597 | $morecontent = $target->prefix . ':' . $morecontent; |
598 | } |
599 | |
600 | // for local links, include the local prefix in the link text |
601 | if ( $target->localprefix ) { |
602 | $morecontent = $target->localprefix . ':' . $morecontent; |
603 | } |
604 | |
605 | $content = [ $morecontent ]; |
606 | } |
607 | return $content; |
608 | } |
609 | |
610 | /** |
611 | * Render a plain wiki link. |
612 | * |
613 | * @param Token $token |
614 | * @param stdClass $target |
615 | * @return TokenHandlerResult |
616 | */ |
617 | private function renderWikiLink( Token $token, stdClass $target ): TokenHandlerResult { |
618 | $newTk = new TagTk( 'a' ); |
619 | try { |
620 | $content = $this->addLinkAttributesAndGetContent( $newTk, $token, $target, true ); |
621 | } catch ( InternalException $e ) { |
622 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
623 | } |
624 | |
625 | $newTk->addNormalizedAttribute( 'href', $this->env->makeLink( $target->title ), |
626 | $target->hrefSrc ); |
627 | |
628 | $newTk->setAttribute( 'title', $target->title->getPrefixedText() ); |
629 | |
630 | return new TokenHandlerResult( array_merge( [ $newTk ], $content, [ new EndTagTk( 'a' ) ] ) ); |
631 | } |
632 | |
633 | /** |
634 | * Render a category 'link'. Categories are really page properties, and are |
635 | * normally rendered in a box at the bottom of an article. |
636 | * |
637 | * @param Token $token |
638 | * @param stdClass $target |
639 | * @return TokenHandlerResult |
640 | */ |
641 | private function renderCategory( Token $token, stdClass $target ): TokenHandlerResult { |
642 | $newTk = new SelfclosingTagTk( 'link' ); |
643 | try { |
644 | $content = $this->addLinkAttributesAndGetContent( $newTk, $token, $target ); |
645 | } catch ( InternalException $e ) { |
646 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
647 | } |
648 | $env = $this->env; |
649 | |
650 | // Change the rel to be mw:PageProp/Category |
651 | $newTk->getAttributeKV( 'rel' )->v = 'mw:PageProp/Category'; |
652 | |
653 | $newTk->addNormalizedAttribute( 'href', $env->makeLink( $target->title ), $target->hrefSrc ); |
654 | |
655 | // Change the href to include the sort key, if any (but don't update the rt info) |
656 | // Fallback to empty string for default sorting |
657 | $categorySort = ''; |
658 | $strContent = str_replace( "\n", '', TokenUtils::tokensToString( $content ) ); |
659 | if ( $strContent !== '' && $strContent !== $target->href ) { |
660 | $categorySort = $strContent; |
661 | $hrefkv = $newTk->getAttributeKV( 'href' ); |
662 | $hrefkv->v .= '#'; |
663 | $hrefkv->v .= str_replace( '#', '%23', Sanitizer::sanitizeTitleURI( $categorySort, false ) ); |
664 | } |
665 | |
666 | if ( count( $content ) !== 1 ) { |
667 | // Deal with sort keys that come from generated content (transclusions, etc.) |
668 | $key = [ 'txt' => 'mw:sortKey' ]; |
669 | $contentKV = $token->getAttributeKV( 'mw:maybeContent' ); |
670 | $so = $contentKV->valueOffset(); |
671 | $val = PipelineUtils::expandAttrValueToDOM( |
672 | $this->env, |
673 | $this->manager->getFrame(), |
674 | [ 'html' => $content, 'srcOffsets' => $so ], |
675 | $this->options['expandTemplates'], |
676 | $this->options['inTemplate'] |
677 | ); |
678 | $attr = new DataMwAttrib( $key, $val ); |
679 | $dataMw = $newTk->dataMw; |
680 | if ( $dataMw ) { |
681 | $dataMw->attribs[] = $attr; |
682 | } else { |
683 | $dataMw = new DataMw( [ 'attribs' => [ $attr ] ] ); |
684 | } |
685 | |
686 | // Mark token as having expanded attrs |
687 | $newTk->addAttribute( 'about', $env->newAboutId() ); |
688 | $newTk->addSpaceSeparatedAttribute( 'typeof', 'mw:ExpandedAttrs' ); |
689 | $newTk->dataMw = $dataMw; |
690 | } |
691 | $this->env->getMetadata()->addCategory( $target->title, $categorySort ); |
692 | return new TokenHandlerResult( [ $newTk ] ); |
693 | } |
694 | |
695 | /** |
696 | * Render a language link. Those normally appear in the list of alternate |
697 | * languages for an article in the sidebar, so are really a page property. |
698 | * |
699 | * @param Token $token |
700 | * @param stdClass $target |
701 | * @return TokenHandlerResult |
702 | */ |
703 | private function renderLanguageLink( Token $token, stdClass $target ): TokenHandlerResult { |
704 | // The prefix is listed in the interwiki map |
705 | |
706 | // TODO: If $target->language['deprecated'] is set and |
707 | // $target->language['extralanglink'] is *not* set, then we |
708 | // should use the normalized language name/prefix (from |
709 | // 'deprecated') when calling |
710 | // ContentMetadataCollector::addLanguageLink() here (which |
711 | // we should eventualy be doing) |
712 | |
713 | // TODO: might also want to add the language *code* here, |
714 | // which would be the language['bcp47'] property (added in |
715 | // change I82465261bc66f0b0cd30d361c299f08066494762) for an |
716 | // extralanglink, or the interwiki prefix otherwise; the |
717 | // latter is mediawiki-internal and maybe not BCP-47 compliant. |
718 | // This is for clients of the MediaWiki DOM spec HTML: the |
719 | // WMF domain prefix, the MediaWiki internal language code, |
720 | // and the actual *language* (ie bcp-47 code) can all differ |
721 | // from each other, due to various historical infelicities. |
722 | // Perhaps a `lang` attribute on the `link` would be appropriate. |
723 | |
724 | $newTk = new SelfclosingTagTk( 'link', [], $token->dataParsoid ); |
725 | try { |
726 | $this->addLinkAttributesAndGetContent( $newTk, $token, $target ); |
727 | } catch ( InternalException $e ) { |
728 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
729 | } |
730 | |
731 | // add title attribute giving the presentation name of the |
732 | // "extra language link" |
733 | // T329303: the 'linktext' comes from the system message |
734 | // `interlanguage-link-$prefix` and should be set in integrated mode |
735 | // using the localization features; the integrated-mode SiteConfig |
736 | // currently never sets the `linktext` property in |
737 | // SiteConfig::interwikiMap(). |
738 | // I52d50e2f75942a849908c6be7fc5169f00a5983a has some partial work |
739 | // on this. |
740 | if ( isset( $target->language['extralanglink'] ) && |
741 | !empty( $target->language['linktext'] ) |
742 | ) { |
743 | // XXX in standalone mode, this is user-interface-language text, |
744 | // not "content language" text. |
745 | $newTk->addNormalizedAttribute( 'title', $target->language['linktext'], null ); |
746 | } |
747 | |
748 | // We set an absolute link to the article in the other wiki/language |
749 | $title = Sanitizer::sanitizeTitleURI( Utils::decodeURIComponent( $target->href ), false ); |
750 | $absHref = str_replace( '$1', $title, $target->language['url'] ); |
751 | if ( isset( $target->language['protorel'] ) ) { |
752 | $absHref = preg_replace( '/^https?:/', '', $absHref, 1 ); |
753 | } |
754 | $newTk->addNormalizedAttribute( 'href', $absHref, $target->hrefSrc ); |
755 | |
756 | // Change the rel to be mw:PageProp/Language |
757 | $newTk->getAttributeKV( 'rel' )->v = 'mw:PageProp/Language'; |
758 | |
759 | // Add language link(s) to metadata |
760 | $this->env->getMetadata()->addLanguageLink( $target->language['title'] ); |
761 | |
762 | return new TokenHandlerResult( [ $newTk ] ); |
763 | } |
764 | |
765 | /** |
766 | * Render an interwiki link. |
767 | * |
768 | * @param Token $token |
769 | * @param stdClass $target |
770 | * @return TokenHandlerResult |
771 | */ |
772 | private function renderInterwikiLink( Token $token, stdClass $target ): TokenHandlerResult { |
773 | // The prefix is listed in the interwiki map |
774 | |
775 | $tokens = []; |
776 | $newTk = new TagTk( 'a', [], $token->dataParsoid ); |
777 | try { |
778 | $content = $this->addLinkAttributesAndGetContent( $newTk, $token, $target, true ); |
779 | } catch ( InternalException $e ) { |
780 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
781 | } |
782 | |
783 | // We set an absolute link to the article in the other wiki/language |
784 | $isLocal = !empty( $target->interwiki['local'] ); |
785 | $trimmedHref = trim( $target->href ); |
786 | $title = Sanitizer::sanitizeTitleURI( |
787 | Utils::decodeURIComponent( $trimmedHref ), |
788 | !$isLocal |
789 | ); |
790 | $absHref = str_replace( '$1', $title, $target->interwiki['url'] ); |
791 | if ( isset( $target->interwiki['protorel'] ) ) { |
792 | $absHref = preg_replace( '/^https?:/', '', $absHref, 1 ); |
793 | } |
794 | $newTk->addNormalizedAttribute( 'href', $absHref, $target->hrefSrc ); |
795 | |
796 | // Change the rel to be mw:ExtLink |
797 | $newTk->getAttributeKV( 'rel' )->v = 'mw:WikiLink/Interwiki'; |
798 | // Remember that this was using wikitext syntax though |
799 | $newTk->dataParsoid->isIW = true; |
800 | // Add title unless it's just a fragment (and trim off fragment) |
801 | // (The normalization here is similar to what Title#getPrefixedDBKey() does.) |
802 | if ( $target->href === '' || $target->href[0] !== '#' ) { |
803 | $titleAttr = $target->interwiki['prefix'] . ':' . |
804 | Utils::decodeURIComponent( str_replace( '_', ' ', |
805 | preg_replace( '/#.*/s', '', $trimmedHref, 1 ) ) ); |
806 | $newTk->setAttribute( 'title', $titleAttr ); |
807 | } |
808 | $tokens[] = $newTk; |
809 | |
810 | PHPUtils::pushArray( $tokens, $content ); |
811 | $tokens[] = new EndTagTk( 'a' ); |
812 | return new TokenHandlerResult( $tokens ); |
813 | } |
814 | |
815 | private static $horizontalAligns = [ |
816 | // PHP parser wraps in <div class="floatnone"> |
817 | 'left', |
818 | // PHP parser wraps in <div class="center"><div class="floatnone"> |
819 | 'right', |
820 | // PHP parser wraps in <div class="floatleft"> |
821 | 'center', |
822 | // PHP parser wraps in <div class="floatright"> |
823 | 'none', |
824 | ]; |
825 | private static $verticalAligns = [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', |
826 | 'bottom', 'text-bottom' ]; |
827 | |
828 | /** |
829 | * Get the style and class lists for an image's wrapper element. |
830 | * |
831 | * @param array $opts The option hash from renderFile. |
832 | * @return array with boolean isInline Whether the image is inline after handling options. |
833 | * or classes The list of classes for the wrapper. |
834 | */ |
835 | private static function getWrapperInfo( array $opts ) { |
836 | $format = self::getFormat( $opts ); |
837 | $isInline = !in_array( $format, [ 'thumbnail', 'manualthumb', 'framed' ], true ); |
838 | $classes = []; |
839 | |
840 | if ( |
841 | !isset( $opts['size']['src'] ) && |
842 | // Framed and manualthumb images aren't scaled |
843 | !in_array( $format, [ 'manualthumb', 'framed' ], true ) |
844 | ) { |
845 | $classes[] = 'mw-default-size'; |
846 | } |
847 | |
848 | // Border isn't applicable to 'thumbnail', 'manualthumb', or 'framed' formats |
849 | // Using $isInline as a shorthand for that here (see above), |
850 | // but this isn't about being *inline* per se |
851 | if ( $isInline && isset( $opts['border'] ) ) { |
852 | $classes[] = 'mw-image-border'; |
853 | } |
854 | |
855 | $halign = $opts['halign']['v'] ?? null; |
856 | if ( in_array( $halign, self::$horizontalAligns, true ) ) { |
857 | $isInline = false; |
858 | $classes[] = "mw-halign-$halign"; |
859 | } |
860 | |
861 | if ( $isInline ) { |
862 | $valignOpt = $opts['valign']['v'] ?? null; |
863 | if ( in_array( $valignOpt, self::$verticalAligns, true ) ) { |
864 | $classes[] = str_replace( '_', '-', "mw-valign-$valignOpt" ); |
865 | } |
866 | } |
867 | |
868 | return [ 'classes' => $classes, 'isInline' => $isInline ]; |
869 | } |
870 | |
871 | /** |
872 | * Determine the name of an option. |
873 | * |
874 | * @param string $optStr |
875 | * @param Env $env |
876 | * @return array|null |
877 | * ck Canonical key for the image option. |
878 | * v Value of the option. |
879 | * ak Aliased key for the image option - includes `"$1"` for placeholder. |
880 | * s Whether it's a simple option or one with a value. |
881 | */ |
882 | private static function getOptionInfo( string $optStr, Env $env ): ?array { |
883 | $oText = trim( $optStr ); |
884 | $siteConfig = $env->getSiteConfig(); |
885 | $getOption = $siteConfig->getMediaPrefixParameterizedAliasMatcher(); |
886 | // oText contains the localized name of this option. the |
887 | // canonical option names (from mediawiki upstream) are in |
888 | // English and contain an '(img|timedmedia)_' prefix. We drop the |
889 | // prefix before stuffing them in data-parsoid in order to |
890 | // save space (that's shortCanonicalOption) |
891 | $canonicalOption = $siteConfig->getMagicWordForMediaOption( $oText ) ?? ''; |
892 | $shortCanonicalOption = preg_replace( '/^(img|timedmedia)_/', '', $canonicalOption, 1 ); |
893 | // 'imgOption' is the key we'd put in opts; it names the 'group' |
894 | // for the option, and doesn't have an img_ prefix. |
895 | $imgOption = Consts::$Media['SimpleOptions'][$canonicalOption] ?? null; |
896 | if ( !empty( $imgOption ) ) { |
897 | return [ |
898 | 'ck' => $imgOption, |
899 | 'v' => $shortCanonicalOption, |
900 | 'ak' => $optStr, |
901 | 's' => true |
902 | ]; |
903 | } |
904 | // If there isn't a literal match for the option, look for a |
905 | // prefix match (ie, img_width => `$1px`) |
906 | |
907 | // *Note* that the legacy parser doesn't have a "principled" |
908 | // precedence here (T372935), it just so happens that members |
909 | // of Consts::PrefixOptions like |
910 | // img_width/img_page/img_lang/timedmedia_* are added last (as |
911 | // handler parameters), and other prefixed options like |
912 | // img_link/img_alt/img_class *happen* to be last in the |
913 | // $internalParamMap. But the possibility for conflicts |
914 | // between prefixed parameters and literal options still |
915 | // exists in the legacy parser. |
916 | $bits = $getOption( $oText ); |
917 | $normalizedBit0 = $bits ? mb_strtolower( trim( $bits['k'] ) ) : null; |
918 | $key = $bits ? ( Consts::$Media['PrefixOptions'][$normalizedBit0] ?? null ) : null; |
919 | |
920 | // bits.a *used to have* the localized name for the prefix option |
921 | // (see SiteConfig::getMediaPrefixParameterizedAliasMatcher, this was |
922 | // dropped in the port from JS.) |
923 | // with $1 as a placeholder for the value, which is in bits.v |
924 | // 'normalizedBit0' is the canonical English option name |
925 | // (from mediawiki upstream) with a prefix. |
926 | // 'key' is the parsoid 'group' for the option; it doesn't |
927 | // have a prefix (it's the key we'd put in opts) |
928 | if ( $bits && $key ) { |
929 | $shortCanonicalOption = preg_replace( '/^(img|timedmedia)_/', '', $normalizedBit0, 1 ); |
930 | // map short canonical name to the localized version used |
931 | |
932 | // Note that we deliberately do entity decoding |
933 | // *after* splitting so that HTML-encoded pipes don't |
934 | // separate options. This matches PHP, whether or |
935 | // not it's a good idea. |
936 | return [ |
937 | 'ck' => $shortCanonicalOption, |
938 | 'v' => Utils::decodeWtEntities( $bits['v'] ), |
939 | 'ak' => $optStr, |
940 | 's' => false |
941 | ]; |
942 | } |
943 | |
944 | return null; |
945 | } |
946 | |
947 | private static function isWikitextOpt( |
948 | Env $env, ?array &$optInfo, string $prefix, string $resultStr |
949 | ): bool { |
950 | // link and alt options are allowed to contain arbitrary |
951 | // wikitext (even though only strings are supported in reality) |
952 | // FIXME(SSS): Is this actually true of all options rather than |
953 | // just link and alt? |
954 | if ( $optInfo === null ) { |
955 | $optInfo = self::getOptionInfo( $prefix . $resultStr, $env ); |
956 | } |
957 | return $optInfo !== null && in_array( $optInfo['ck'], [ 'link', 'alt' ], true ); |
958 | } |
959 | |
960 | /** |
961 | * Make option token streams into a stringy thing that we can recognize. |
962 | * |
963 | * @param array $tstream |
964 | * @param string $prefix Anything that came before this part of the recursive call stack. |
965 | * @param Env $env |
966 | * @return string|string[]|null |
967 | */ |
968 | private static function stringifyOptionTokens( array $tstream, string $prefix, Env $env ) { |
969 | // Seems like this should be a more general "stripTags"-like function? |
970 | $tokenType = null; |
971 | $tkHref = null; |
972 | $nextResult = null; |
973 | $skipToEndOf = null; |
974 | $optInfo = null; |
975 | $resultStr = ''; |
976 | |
977 | for ( $i = 0; $i < count( $tstream ); $i++ ) { |
978 | $currentToken = $tstream[$i]; |
979 | |
980 | if ( $skipToEndOf ) { |
981 | if ( $currentToken instanceof EndTagTk && $currentToken->getName() === $skipToEndOf ) { |
982 | $skipToEndOf = null; |
983 | } |
984 | continue; |
985 | } |
986 | |
987 | if ( is_string( $currentToken ) ) { |
988 | $resultStr .= $currentToken; |
989 | } elseif ( is_array( $currentToken ) ) { |
990 | $nextResult = self::stringifyOptionTokens( $currentToken, $prefix . $resultStr, $env ); |
991 | |
992 | if ( $nextResult === null ) { |
993 | return null; |
994 | } |
995 | |
996 | $resultStr .= $nextResult; |
997 | } elseif ( !( $currentToken instanceof EndTagTk ) ) { |
998 | // This is actually a token |
999 | if ( TokenUtils::hasDOMFragmentType( $currentToken ) ) { |
1000 | if ( self::isWikitextOpt( $env, $optInfo, $prefix, $resultStr ) ) { |
1001 | $str = TokenUtils::tokensToString( [ $currentToken ], false, [ |
1002 | // These tokens haven't been expanded to DOM yet |
1003 | // so unpacking them here is justifiable |
1004 | // FIXME: It's a little convoluted to figure out |
1005 | // that this is actually the case in the |
1006 | // AttributeExpander, but it seems like only |
1007 | // target/href ever gets expanded to DOM and |
1008 | // the rest of the wikilink_content/options |
1009 | // become mw:maybeContent that gets expanded |
1010 | // below where $hasExpandableOpt is set. |
1011 | 'unpackDOMFragments' => true, |
1012 | // FIXME: Sneaking in `env` to avoid changing the signature |
1013 | 'env' => $env |
1014 | ] |
1015 | ); |
1016 | // Entity encode pipes since we wouldn't have split on |
1017 | // them from fragments and we're about to attempt to |
1018 | // when this function returns. |
1019 | // This is similar to getting the shadow "href" below. |
1020 | $resultStr .= preg_replace( '/\|/', '|', $str, 1 ); |
1021 | $optInfo = null; // might change the nature of opt |
1022 | continue; |
1023 | } else { |
1024 | // if this is a nowiki, we must be in a caption |
1025 | return null; |
1026 | } |
1027 | } |
1028 | if ( $currentToken->getName() === 'mw-quote' ) { |
1029 | if ( self::isWikitextOpt( $env, $optInfo, $prefix, $resultStr ) ) { |
1030 | // just recurse inside |
1031 | $optInfo = null; // might change the nature of opt |
1032 | continue; |
1033 | } |
1034 | return null; |
1035 | } |
1036 | // Similar to TokenUtils.tokensToString()'s includeEntities |
1037 | if ( TokenUtils::isEntitySpanToken( $currentToken ) ) { |
1038 | $resultStr .= $currentToken->dataParsoid->src; |
1039 | $skipToEndOf = 'span'; |
1040 | continue; |
1041 | } |
1042 | if ( $currentToken->getName() === 'a' ) { |
1043 | if ( $optInfo === null ) { |
1044 | $optInfo = self::getOptionInfo( $prefix . $resultStr, $env ); |
1045 | if ( $optInfo === null ) { |
1046 | // An <a> tag before a valid option? |
1047 | // This is most likely a caption. |
1048 | return null; |
1049 | } |
1050 | } |
1051 | |
1052 | if ( self::isWikitextOpt( $env, $optInfo, $prefix, $resultStr ) ) { |
1053 | $tokenType = $currentToken->getAttributeV( 'rel' ); |
1054 | // Using the shadow since entities (think pipes) would |
1055 | // have already been decoded. |
1056 | $tkHref = $currentToken->getAttributeShadowInfo( 'href' )['value']; |
1057 | $isLink = $optInfo && $optInfo['ck'] === 'link'; |
1058 | // Reset the optInfo since we're changing the nature of it |
1059 | $optInfo = null; |
1060 | // Figure out the proper string to put here and break. |
1061 | if ( |
1062 | $tokenType === 'mw:ExtLink' && |
1063 | ( $currentToken->dataParsoid->stx ?? '' ) === 'url' |
1064 | ) { |
1065 | // Add the URL |
1066 | $resultStr .= $tkHref; |
1067 | // Tell our loop to skip to the end of this tag |
1068 | $skipToEndOf = 'a'; |
1069 | } elseif ( $tokenType === 'mw:WikiLink/Interwiki' ) { |
1070 | if ( $isLink ) { |
1071 | $resultStr .= $currentToken->getAttributeV( 'href' ); |
1072 | $i += 2; |
1073 | continue; |
1074 | } |
1075 | // Nothing to do -- the link content will be |
1076 | // captured by walking the rest of the tokens. |
1077 | } elseif ( $tokenType === 'mw:WikiLink' || $tokenType === 'mw:MediaLink' ) { |
1078 | |
1079 | // Nothing to do -- the link content will be |
1080 | // captured by walking the rest of the tokens. |
1081 | } else { |
1082 | // There shouldn't be any other kind of link... |
1083 | // This is likely a caption. |
1084 | return null; |
1085 | } |
1086 | } else { |
1087 | // Why would there be an a tag without a link? |
1088 | return null; |
1089 | } |
1090 | } |
1091 | } |
1092 | } |
1093 | |
1094 | return $resultStr; |
1095 | } |
1096 | |
1097 | /** |
1098 | * Get the format for media. |
1099 | * |
1100 | * @param array $opts |
1101 | * @return string|null |
1102 | */ |
1103 | private static function getFormat( array $opts ): ?string { |
1104 | if ( $opts['manualthumb'] ) { |
1105 | return 'manualthumb'; |
1106 | } |
1107 | return $opts['format']['v'] ?? null; |
1108 | } |
1109 | |
1110 | private $used; |
1111 | |
1112 | /** |
1113 | * This is the set of file options that apply to the container, rather |
1114 | * than the media element itself (or, apply generically to a span). |
1115 | * Other options depend on the fetched media type and won't necessary be |
1116 | * applied. |
1117 | * |
1118 | * @return array |
1119 | */ |
1120 | private function getUsed(): array { |
1121 | if ( $this->used ) { |
1122 | return $this->used; |
1123 | } |
1124 | $this->used = PHPUtils::makeSet( |
1125 | array_merge( |
1126 | [ |
1127 | 'lang', 'width', 'class', 'upright', |
1128 | 'border', 'frameless', 'framed', 'thumbnail', |
1129 | ], |
1130 | self::$horizontalAligns, |
1131 | self::$verticalAligns |
1132 | ) |
1133 | ); |
1134 | return $this->used; |
1135 | } |
1136 | |
1137 | private function hasTransclusion( array $toks ): bool { |
1138 | foreach ( $toks as $t ) { |
1139 | if ( |
1140 | $t instanceof SelfclosingTagTk && |
1141 | TokenUtils::hasTypeOf( $t, 'mw:Transclusion' ) |
1142 | ) { |
1143 | return true; |
1144 | } |
1145 | } |
1146 | return false; |
1147 | } |
1148 | |
1149 | /** |
1150 | * Render a file. This can be an image, a sound, a PDF etc. |
1151 | * |
1152 | * @param Token $token |
1153 | * @param stdClass $target |
1154 | * @return TokenHandlerResult |
1155 | */ |
1156 | private function renderFile( Token $token, stdClass $target ): TokenHandlerResult { |
1157 | $manager = $this->manager; |
1158 | $env = $this->env; |
1159 | |
1160 | // FIXME: Re-enable use of media cache and figure out how that fits |
1161 | // into this new processing model. See T98995 |
1162 | // const cachedMedia = env.mediaCache[token.dataParsoid.src]; |
1163 | |
1164 | $dataParsoid = clone $token->dataParsoid; |
1165 | $dataParsoid->optList = []; |
1166 | |
1167 | // Account for the possibility of an expanded target |
1168 | $dataMw = $token->dataMw ?? new DataMw(); |
1169 | |
1170 | $opts = [ |
1171 | 'title' => [ |
1172 | 'v' => $env->makeLink( $target->title ), |
1173 | 'src' => $token->getAttributeKV( 'href' )->vsrc |
1174 | ], |
1175 | 'size' => [ |
1176 | 'v' => [ |
1177 | 'height' => null, |
1178 | 'width' => null |
1179 | ] |
1180 | ], |
1181 | // Initialize these properties to avoid isset checks |
1182 | 'caption' => null, |
1183 | 'format' => null, |
1184 | 'manualthumb' => null, |
1185 | 'class' => null |
1186 | ]; |
1187 | |
1188 | $hasExpandableOpt = false; |
1189 | |
1190 | $optKVs = self::buildLinkAttrs( $token->attribs, true, null, null )['contentKVs']; |
1191 | while ( count( $optKVs ) > 0 ) { |
1192 | $oContent = array_shift( $optKVs ); |
1193 | Assert::invariant( $oContent instanceof KV, 'bad type' ); |
1194 | |
1195 | $origOptSrc = $oContent->v; |
1196 | if ( is_array( $origOptSrc ) && count( $origOptSrc ) === 1 ) { |
1197 | $origOptSrc = $origOptSrc[0]; |
1198 | } |
1199 | |
1200 | $oText = TokenUtils::tokensToString( $origOptSrc, true, [ 'includeEntities' => true ] ); |
1201 | |
1202 | if ( !is_string( $oText ) ) { |
1203 | // Might be that this is a valid option whose value is just |
1204 | // complicated. Try to figure it out, step through all tokens. |
1205 | $maybeOText = self::stringifyOptionTokens( $oText, '', $env ); |
1206 | if ( $maybeOText !== null ) { |
1207 | $oText = $maybeOText; |
1208 | } |
1209 | } |
1210 | |
1211 | $optInfo = null; |
1212 | if ( is_string( $oText ) ) { |
1213 | if ( str_contains( $oText, '|' ) ) { |
1214 | // Split the pipe-separated string into pieces |
1215 | // and convert each one into a KV obj and add them |
1216 | // to the beginning of the array. Note that this is |
1217 | // a hack to support templates that provide multiple |
1218 | // image options as a pipe-separated string. We aren't |
1219 | // really providing editing support for this yet, or |
1220 | // ever, maybe. |
1221 | // |
1222 | // TODO(arlolra): Tables in captions suppress breaking on |
1223 | // "linkdesc" pipes so `stringifyOptionTokens` should account |
1224 | // for pipes in table cell content. For the moment, breaking |
1225 | // here is acceptable since it matches the php implementation |
1226 | // bug for bug. |
1227 | $pieces = array_map( static function ( $s ) { |
1228 | return new KV( 'mw:maybeContent', $s ); |
1229 | }, explode( '|', $oText ) ); |
1230 | $optKVs = array_merge( $pieces, $optKVs ); |
1231 | |
1232 | // Record the fact that we won't provide editing support for this. |
1233 | $dataParsoid->uneditable = true; |
1234 | continue; |
1235 | } else { |
1236 | // We're being overly accepting of media options at this point, |
1237 | // since we don't know the type yet. After the info request, |
1238 | // we'll filter out those that aren't appropriate. |
1239 | $optInfo = self::getOptionInfo( $oText, $env ); |
1240 | } |
1241 | } |
1242 | |
1243 | $recordCaption = static function () use ( $oContent, $oText, $dataParsoid, &$opts ) { |
1244 | $optsCaption = [ |
1245 | 'v' => $oContent->v, |
1246 | 'src' => $oContent->vsrc ?? $oText, |
1247 | 'srcOffsets' => $oContent->valueOffset(), |
1248 | // remember the position |
1249 | 'pos' => count( $dataParsoid->optList ) |
1250 | ]; |
1251 | // if there was a 'caption' previously, round-trip it as a |
1252 | // "bogus option". |
1253 | if ( !empty( $opts['caption'] ) ) { |
1254 | // Wrap the caption opt in an array since the option itself is an array! |
1255 | // Without the wrapping, the splicing will flatten the value. |
1256 | array_splice( $dataParsoid->optList, $opts['caption']['pos'], 0, [ [ |
1257 | 'ck' => 'bogus', |
1258 | 'ak' => $opts['caption']['src'] |
1259 | ] ] |
1260 | ); |
1261 | $optsCaption['pos']++; |
1262 | } |
1263 | $opts['caption'] = $optsCaption; |
1264 | }; |
1265 | |
1266 | // For the values of the caption and options, see |
1267 | // getOptionInfo's documentation above. |
1268 | // |
1269 | // If there are multiple captions, this code always |
1270 | // picks the last entry. This is the spec; see |
1271 | // "Image with multiple captions" parserTest. |
1272 | if ( !is_string( $oText ) || $optInfo === null || |
1273 | // Deprecated options |
1274 | in_array( $optInfo['ck'], [ 'disablecontrols' ], true ) |
1275 | ) { |
1276 | // No valid option found!? |
1277 | // Record for RT-ing |
1278 | $recordCaption(); |
1279 | continue; |
1280 | } |
1281 | |
1282 | // First option wins, the rest are 'bogus' |
1283 | // FIXME: For now, see T305628 |
1284 | if ( isset( $opts[$optInfo['ck']] ) || ( |
1285 | // All the formats are simple options with the key "format" |
1286 | // except for "manualthumb", so check if the format has been set |
1287 | in_array( $optInfo['ck'], [ 'format', 'manualthumb' ], true ) && ( |
1288 | self::getFormat( $opts ) || |
1289 | ( $this->options['extTagOpts']['suppressMediaFormats'] ?? false ) |
1290 | ) |
1291 | ) ) { |
1292 | $dataParsoid->optList[] = [ |
1293 | 'ck' => 'bogus', |
1294 | 'ak' => $optInfo['ak'] |
1295 | ]; |
1296 | continue; |
1297 | } |
1298 | |
1299 | $opt = [ |
1300 | 'ck' => $optInfo['v'], |
1301 | 'ak' => $oContent->vsrc ?? $optInfo['ak'] |
1302 | ]; |
1303 | |
1304 | if ( $optInfo['s'] === true ) { |
1305 | // Default: Simple image option |
1306 | $opts[$optInfo['ck']] = [ 'v' => $optInfo['v'] ]; |
1307 | } else { |
1308 | // Map short canonical name to the localized version used. |
1309 | $opt['ck'] = $optInfo['ck']; |
1310 | |
1311 | // The MediaWiki magic word for image dimensions is called 'width' |
1312 | // for historical reasons |
1313 | // Unlike other options, use last-specified width. |
1314 | if ( $optInfo['ck'] === 'width' ) { |
1315 | // We support a trailing 'px' here for historical reasons |
1316 | // (T15500, T53628, T207032) |
1317 | $maybeDim = Utils::parseMediaDimensions( |
1318 | $env->getSiteConfig(), $optInfo['v'], false, true |
1319 | ); |
1320 | if ( $maybeDim !== null ) { |
1321 | if ( $maybeDim['bogusPx'] ) { |
1322 | // Lint away redundant unit (T207032) |
1323 | $dataParsoid->setTempFlag( TempData::BOGUS_PX ); |
1324 | } |
1325 | $opts['size']['v'] = [ |
1326 | 'width' => Utils::validateMediaParam( $maybeDim['x'] ) ? $maybeDim['x'] : null, |
1327 | 'height' => array_key_exists( 'y', $maybeDim ) && |
1328 | Utils::validateMediaParam( $maybeDim['y'] ) ? $maybeDim['y'] : null |
1329 | ]; |
1330 | // Only round-trip a valid size |
1331 | $opts['size']['src'] = $oContent->vsrc ?? $optInfo['ak']; |
1332 | // check for duplicated options |
1333 | foreach ( $dataParsoid->optList as &$value ) { |
1334 | if ( $value['ck'] === 'width' ) { |
1335 | $value['ck'] = 'bogus'; // mark the previous definition as bogus, last one wins |
1336 | break; |
1337 | } |
1338 | } |
1339 | } else { |
1340 | $recordCaption(); |
1341 | continue; |
1342 | } |
1343 | // Lang is a global attribute and can be applied to all media elements |
1344 | // for editing and roundtripping. However, not all file handlers will |
1345 | // make use of it. This param validation is from the SVG handler but |
1346 | // seems generally applicable. |
1347 | } elseif ( $optInfo['ck'] === 'lang' && !Language::isValidInternalCode( $optInfo['v'] ) ) { |
1348 | $opt['ck'] = 'bogus'; |
1349 | } elseif ( |
1350 | $optInfo['ck'] === 'upright' && |
1351 | ( !is_numeric( $optInfo['v'] ) || $optInfo['v'] <= 0 ) |
1352 | ) { |
1353 | $opt['ck'] = 'bogus'; |
1354 | } else { |
1355 | $opts[$optInfo['ck']] = [ |
1356 | 'v' => $optInfo['v'], |
1357 | 'src' => $oContent->vsrc ?? $optInfo['ak'], |
1358 | 'srcOffsets' => $oContent->valueOffset(), |
1359 | ]; |
1360 | } |
1361 | } |
1362 | |
1363 | // Collect option in dataParsoid (becomes data-parsoid later on) |
1364 | // for faithful serialization. |
1365 | $dataParsoid->optList[] = $opt; |
1366 | |
1367 | // Collect source wikitext for image options for possible template expansion. |
1368 | $maybeOpt = !isset( self::getUsed()[$opt['ck']] ); |
1369 | $expOpt = null; |
1370 | // Links more often than not show up as arrays here because they're |
1371 | // tokenized as `autourl`. To avoid unnecessarily considering them |
1372 | // expanded, we'll use a more restrictive test, at the cost of |
1373 | // perhaps missing some edgy behaviour. |
1374 | if ( $opt['ck'] === 'link' ) { |
1375 | $expOpt = is_array( $origOptSrc ) && |
1376 | $this->hasTransclusion( $origOptSrc ); |
1377 | } else { |
1378 | $expOpt = is_array( $origOptSrc ); |
1379 | } |
1380 | if ( $maybeOpt || $expOpt ) { |
1381 | $val = []; |
1382 | if ( $expOpt ) { |
1383 | $hasExpandableOpt = true; |
1384 | $val['html'] = $origOptSrc; |
1385 | $val['srcOffsets'] = $oContent->valueOffset(); |
1386 | $val = PipelineUtils::expandAttrValueToDOM( |
1387 | $env, $manager->getFrame(), $val, |
1388 | $this->options['expandTemplates'], |
1389 | $this->options['inTemplate'] |
1390 | ); |
1391 | } |
1392 | |
1393 | // This is a bit of an abuse of the "txt" property since |
1394 | // `optInfo.v` isn't necessarily wikitext from source. |
1395 | // It's a result of the specialized stringifying above, which |
1396 | // if interpreted as wikitext upon serialization will result |
1397 | // in some (acceptable) normalization. |
1398 | // |
1399 | // We're storing these options in data-mw because they aren't |
1400 | // guaranteed to apply to all media types and we'd like to |
1401 | // avoid the need to back them out later. |
1402 | // |
1403 | // Note that the caption in the legacy parser depends on the |
1404 | // exact set of options parsed, which we aren't attempting to |
1405 | // try and replicate after fetching the media info, since we |
1406 | // consider that more of bug than a feature. It prevent anyone |
1407 | // from ever safely adding media options in the future. |
1408 | // |
1409 | // See T163582 |
1410 | if ( $maybeOpt ) { |
1411 | $val['txt'] = $optInfo['v']; |
1412 | } |
1413 | $dataMw->attribs ??= []; |
1414 | $dataMw->attribs[] = new DataMwAttrib( $opt['ck'], $val ); |
1415 | } |
1416 | } |
1417 | |
1418 | // Add the last caption in the right position if there is one |
1419 | if ( isset( $opts['caption'] ) ) { |
1420 | // Wrap the caption opt in an array since the option itself is an array! |
1421 | // Without the wrapping, the splicing will flatten the value. |
1422 | array_splice( $dataParsoid->optList, $opts['caption']['pos'], 0, [ [ |
1423 | 'ck' => 'caption', |
1424 | 'ak' => $opts['caption']['src'] |
1425 | ] ] |
1426 | ); |
1427 | } |
1428 | |
1429 | $format = self::getFormat( $opts ); |
1430 | |
1431 | // Handle image default sizes and upright option after extracting all |
1432 | // options |
1433 | if ( $format === 'framed' || $format === 'manualthumb' ) { |
1434 | // width and height is ignored for framed and manualthumb images |
1435 | // https://phabricator.wikimedia.org/T64258 |
1436 | $opts['size']['v'] = [ 'width' => null, 'height' => null ]; |
1437 | // Mark any definitions as bogus |
1438 | foreach ( $dataParsoid->optList as &$value ) { |
1439 | if ( $value['ck'] === 'width' ) { |
1440 | $value['ck'] = 'bogus'; |
1441 | } |
1442 | } |
1443 | } elseif ( $format ) { |
1444 | if ( !$opts['size']['v']['height'] && !$opts['size']['v']['width'] ) { |
1445 | $defaultWidth = $env->getSiteConfig()->widthOption(); |
1446 | if ( isset( $opts['upright'] ) ) { |
1447 | if ( $opts['upright']['v'] === 'upright' ) { // Simple option |
1448 | $defaultWidth *= 0.75; |
1449 | } else { |
1450 | $defaultWidth *= $opts['upright']['v']; |
1451 | } |
1452 | // round to nearest 10 pixels |
1453 | $defaultWidth = 10 * round( $defaultWidth / 10 ); |
1454 | } |
1455 | $opts['size']['v']['width'] = $defaultWidth; |
1456 | } |
1457 | } |
1458 | |
1459 | $rdfaType = 'mw:File'; |
1460 | |
1461 | // If the format is something we *recognize*, add the subtype |
1462 | switch ( $format ) { |
1463 | case 'manualthumb': // FIXME(T305759): Does it deserve its own type? |
1464 | case 'thumbnail': |
1465 | $rdfaType .= '/Thumb'; |
1466 | break; |
1467 | case 'framed': |
1468 | $rdfaType .= '/Frame'; |
1469 | break; |
1470 | case 'frameless': |
1471 | $rdfaType .= '/Frameless'; |
1472 | break; |
1473 | } |
1474 | |
1475 | // Tell VE that it shouldn't try to edit this |
1476 | if ( !empty( $dataParsoid->uneditable ) ) { |
1477 | $rdfaType .= ' mw:Placeholder'; |
1478 | } else { |
1479 | unset( $dataParsoid->src ); |
1480 | } |
1481 | |
1482 | $wrapperInfo = self::getWrapperInfo( $opts ); |
1483 | |
1484 | $isInline = $wrapperInfo['isInline']; |
1485 | $containerName = $isInline ? 'span' : 'figure'; |
1486 | |
1487 | $classes = $wrapperInfo['classes']; |
1488 | if ( !empty( $opts['class'] ) ) { |
1489 | PHPUtils::pushArray( $classes, explode( ' ', $opts['class']['v'] ) ); |
1490 | } |
1491 | |
1492 | $attribs = [ new KV( 'typeof', $rdfaType ) ]; |
1493 | if ( count( $classes ) > 0 ) { |
1494 | array_unshift( $attribs, new KV( 'class', implode( ' ', $classes ) ) ); |
1495 | } |
1496 | |
1497 | $container = new TagTk( $containerName, $attribs, $dataParsoid ); |
1498 | $containerClose = new EndTagTk( $containerName ); |
1499 | |
1500 | if ( $hasExpandableOpt ) { |
1501 | $container->addAttribute( 'about', $env->newAboutId() ); |
1502 | $container->addSpaceSeparatedAttribute( 'typeof', 'mw:ExpandedAttrs' ); |
1503 | } elseif ( preg_match( '/\bmw:ExpandedAttrs\b/', $token->getAttributeV( 'typeof' ) ?? '' ) ) { |
1504 | $container->addSpaceSeparatedAttribute( 'typeof', 'mw:ExpandedAttrs' ); |
1505 | } |
1506 | |
1507 | $span = new TagTk( 'span', [ new KV( 'class', 'mw-file-element mw-broken-media' ) ] ); |
1508 | |
1509 | // "resource" and "lang" are allowed attributes on spans |
1510 | $span->addNormalizedAttribute( 'resource', $opts['title']['v'], $opts['title']['src'] ); |
1511 | if ( isset( $opts['lang'] ) ) { |
1512 | $span->addNormalizedAttribute( 'lang', $opts['lang']['v'], $opts['lang']['src'] ); |
1513 | } |
1514 | |
1515 | // Token's KV attributes only accept strings, Tokens or arrays of those. |
1516 | $size = $opts['size']['v']; |
1517 | if ( !empty( $size['width'] ) ) { |
1518 | $span->addAttribute( 'data-width', (string)$size['width'] ); |
1519 | } |
1520 | if ( !empty( $size['height'] ) ) { |
1521 | $span->addAttribute( 'data-height', (string)$size['height'] ); |
1522 | } |
1523 | |
1524 | $anchor = new TagTk( 'a' ); |
1525 | $anchor->setAttribute( 'href', $this->specialFilePath( $target->title ) ); |
1526 | |
1527 | $tokens = [ |
1528 | $container, |
1529 | $anchor, |
1530 | $span, |
1531 | $target->title->getPrefixedText(), |
1532 | new EndTagTk( 'span' ), |
1533 | new EndTagTk( 'a' ) |
1534 | ]; |
1535 | |
1536 | $optsCaption = $opts['caption'] ?? null; |
1537 | if ( $isInline ) { |
1538 | if ( $optsCaption ) { |
1539 | if ( !is_array( $optsCaption['v'] ) ) { |
1540 | $opts['caption']['v'] = $optsCaption['v'] = [ $optsCaption['v'] ]; |
1541 | } |
1542 | // Parse the caption |
1543 | $captionDOM = PipelineUtils::processContentInPipeline( |
1544 | $this->env, |
1545 | $this->manager->getFrame(), |
1546 | array_merge( $optsCaption['v'], [ new EOFTk() ] ), |
1547 | [ |
1548 | 'pipelineType' => 'expanded-tokens-to-fragment', |
1549 | 'pipelineOpts' => [ |
1550 | 'inlineContext' => true, |
1551 | 'expandTemplates' => $this->options['expandTemplates'], |
1552 | 'inTemplate' => $this->options['inTemplate'] |
1553 | ], |
1554 | 'srcOffsets' => $optsCaption['srcOffsets'] ?? null, |
1555 | 'sol' => true |
1556 | ] |
1557 | ); |
1558 | |
1559 | // Use parsed DOM given in `captionDOM` |
1560 | // FIXME: Does this belong in `dataMw.attribs`? |
1561 | $dataMw->caption = ContentUtils::ppToXML( |
1562 | $captionDOM, [ 'innerXML' => true ] |
1563 | ); |
1564 | } |
1565 | } else { |
1566 | // We always add a figcaption for blocks |
1567 | $tsr = $optsCaption['srcOffsets'] ?? null; |
1568 | $dp = new DataParsoid; |
1569 | $dp->tsr = $tsr; |
1570 | $tokens[] = new TagTk( 'figcaption', [], $dp ); |
1571 | if ( $optsCaption ) { |
1572 | if ( is_string( $optsCaption['v'] ) ) { |
1573 | $tokens[] = $optsCaption['v']; |
1574 | } else { |
1575 | $tokens[] = PipelineUtils::getDOMFragmentToken( |
1576 | $optsCaption['v'], |
1577 | $tsr, |
1578 | [ 'inlineContext' => true, 'token' => $token ] |
1579 | ); |
1580 | } |
1581 | } |
1582 | $tokens[] = new EndTagTk( 'figcaption' ); |
1583 | } |
1584 | |
1585 | if ( !$dataMw->isEmpty() ) { |
1586 | $container->dataMw = $dataMw; |
1587 | } |
1588 | |
1589 | $tokens[] = $containerClose; |
1590 | return new TokenHandlerResult( $tokens ); |
1591 | } |
1592 | |
1593 | private function specialFilePath( Title $title ): string { |
1594 | $filePath = Sanitizer::sanitizeTitleURI( $title->getDBkey(), false ); |
1595 | return "./Special:FilePath/{$filePath}"; |
1596 | } |
1597 | |
1598 | /** |
1599 | * @param Token $token |
1600 | * @param stdClass $target |
1601 | * @param list<DataMwError> $errs |
1602 | * @param ?array{url?:string} $info |
1603 | * @return TokenHandlerResult |
1604 | */ |
1605 | private function linkToMedia( Token $token, stdClass $target, array $errs, ?array $info ): TokenHandlerResult { |
1606 | // Only pass in the url, since media links should not link to the thumburl |
1607 | $imgHref = $info['url'] ?? $this->specialFilePath( $target->title ); // Copied from getPath |
1608 | $imgHrefFileName = preg_replace( '#.*/#', '', $imgHref, 1 ); |
1609 | |
1610 | $link = new TagTk( 'a' ); |
1611 | |
1612 | try { |
1613 | $content = $this->addLinkAttributesAndGetContent( $link, $token, $target ); |
1614 | } catch ( InternalException $e ) { |
1615 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
1616 | } |
1617 | |
1618 | // Change the rel to be mw:MediaLink |
1619 | $link->getAttributeKV( 'rel' )->v = 'mw:MediaLink'; |
1620 | |
1621 | $link->setAttribute( 'href', $imgHref ); |
1622 | |
1623 | // html2wt will use the resource rather than try to parse the href. |
1624 | $link->addNormalizedAttribute( |
1625 | 'resource', |
1626 | $this->env->makeLink( $target->title ), |
1627 | $target->hrefSrc |
1628 | ); |
1629 | |
1630 | // Normalize title according to how PHP parser does it currently |
1631 | $link->setAttribute( 'title', str_replace( '_', ' ', $imgHrefFileName ) ); |
1632 | |
1633 | if ( count( $errs ) > 0 ) { |
1634 | // Set RDFa type to mw:Error so VE and other clients |
1635 | // can use this to do client-specific action on these. |
1636 | if ( !TokenUtils::hasTypeOf( $link, 'mw:Error' ) ) { |
1637 | $link->addSpaceSeparatedAttribute( 'typeof', 'mw:Error' ); |
1638 | } |
1639 | |
1640 | // Update data-mw |
1641 | $dataMw = $token->dataMw ?? new DataMw; |
1642 | if ( is_array( $dataMw->errors ?? null ) ) { |
1643 | array_push( $dataMw->errors, ...$errs ); |
1644 | } else { |
1645 | $dataMw->errors = $errs; |
1646 | } |
1647 | $link->dataMw = $dataMw; |
1648 | } |
1649 | |
1650 | $tokens = array_merge( [ $link ], $content, [ new EndTagTk( 'a' ) ] ); |
1651 | |
1652 | return new TokenHandlerResult( $tokens ); |
1653 | } |
1654 | |
1655 | // FIXME: The media request here is only used to determine if this is a |
1656 | // redlink and deserves to be handling in the redlink post-processing pass. |
1657 | |
1658 | /** |
1659 | * @param Token $token |
1660 | * @param stdClass $target |
1661 | * @return TokenHandlerResult |
1662 | */ |
1663 | private function renderMedia( Token $token, stdClass $target ): TokenHandlerResult { |
1664 | $env = $this->env; |
1665 | $title = $target->title; |
1666 | $errs = []; |
1667 | $info = $env->getDataAccess()->getFileInfo( |
1668 | $env->getPageConfig(), |
1669 | [ [ $title->getDBkey(), [ 'height' => null, 'width' => null ] ] ] |
1670 | )[0]; |
1671 | if ( !$info ) { |
1672 | $errs[] = new DataMwError( 'apierror-filedoesnotexist', [], 'This image does not exist.' ); |
1673 | } elseif ( isset( $info['thumberror'] ) ) { |
1674 | $errs[] = new DataMwError( 'apierror-unknownerror', [], $info['thumberror'] ); |
1675 | } |
1676 | return $this->linkToMedia( $token, $target, $errs, $info ); |
1677 | } |
1678 | |
1679 | /** @inheritDoc */ |
1680 | public function onTag( Token $token ): ?TokenHandlerResult { |
1681 | switch ( $token->getName() ) { |
1682 | case 'wikilink': |
1683 | return $this->onWikiLink( $token ); |
1684 | case 'mw:redirect': |
1685 | return $this->onRedirect( $token ); |
1686 | default: |
1687 | return null; |
1688 | } |
1689 | } |
1690 | } |