Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 792 |
|
0.00% |
0 / 25 |
CRAP | |
0.00% |
0 / 1 |
WikiLinkHandler | |
0.00% |
0 / 792 |
|
0.00% |
0 / 25 |
60762 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
hrefParts | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getWikiLinkTargetInfo | |
0.00% |
0 / 61 |
|
0.00% |
0 / 1 |
272 | |||
onRedirect | |
0.00% |
0 / 32 |
|
0.00% |
0 / 1 |
20 | |||
bailTokens | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
6 | |||
onWikiLink | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
42 | |||
wikiLinkHandler | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
132 | |||
buildLinkAttrs | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
182 | |||
addLinkAttributesAndGetContent | |
0.00% |
0 / 63 |
|
0.00% |
0 / 1 |
702 | |||
renderWikiLink | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
renderCategory | |
0.00% |
0 / 36 |
|
0.00% |
0 / 1 |
42 | |||
renderLanguageLink | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
30 | |||
renderInterwikiLink | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
30 | |||
getWrapperInfo | |
0.00% |
0 / 57 |
|
0.00% |
0 / 1 |
552 | |||
getOptionInfo | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
56 | |||
isWikitextOpt | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
stringifyOptionTokens | |
0.00% |
0 / 63 |
|
0.00% |
0 / 1 |
650 | |||
getFormat | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getUsed | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
hasTransclusion | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
renderFile | |
0.00% |
0 / 241 |
|
0.00% |
0 / 1 |
4422 | |||
specialFilePath | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
linkToMedia | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
42 | |||
renderMedia | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
onTag | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | /** |
5 | * Simple link handler. |
6 | * |
7 | * TODO: keep round-trip information in meta tag or the like |
8 | */ |
9 | |
10 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
11 | |
12 | use stdClass; |
13 | use Wikimedia\Assert\Assert; |
14 | use Wikimedia\Parsoid\Config\Env; |
15 | use Wikimedia\Parsoid\Core\DomSourceRange; |
16 | use Wikimedia\Parsoid\Core\InternalException; |
17 | use Wikimedia\Parsoid\Core\Sanitizer; |
18 | use Wikimedia\Parsoid\Language\Language; |
19 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
20 | use Wikimedia\Parsoid\NodeData\TempData; |
21 | use Wikimedia\Parsoid\Tokens\EndTagTk; |
22 | use Wikimedia\Parsoid\Tokens\EOFTk; |
23 | use Wikimedia\Parsoid\Tokens\KV; |
24 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
25 | use Wikimedia\Parsoid\Tokens\SourceRange; |
26 | use Wikimedia\Parsoid\Tokens\TagTk; |
27 | use Wikimedia\Parsoid\Tokens\Token; |
28 | use Wikimedia\Parsoid\Utils\ContentUtils; |
29 | use Wikimedia\Parsoid\Utils\PHPUtils; |
30 | use Wikimedia\Parsoid\Utils\PipelineUtils; |
31 | use Wikimedia\Parsoid\Utils\Title; |
32 | use Wikimedia\Parsoid\Utils\TitleException; |
33 | use Wikimedia\Parsoid\Utils\TokenUtils; |
34 | use Wikimedia\Parsoid\Utils\Utils; |
35 | use Wikimedia\Parsoid\Wikitext\Consts; |
36 | use Wikimedia\Parsoid\Wt2Html\PegTokenizer; |
37 | use Wikimedia\Parsoid\Wt2Html\TokenTransformManager; |
38 | |
39 | class WikiLinkHandler extends TokenHandler { |
40 | /** |
41 | * @var PegTokenizer |
42 | */ |
43 | private $urlParser; |
44 | |
45 | /** @inheritDoc */ |
46 | public function __construct( TokenTransformManager $manager, array $options ) { |
47 | parent::__construct( $manager, $options ); |
48 | |
49 | // Create a new peg parser for image options. |
50 | if ( !$this->urlParser ) { |
51 | // Actually the regular tokenizer, but we'll call it with the |
52 | // url rule only. |
53 | $this->urlParser = new PegTokenizer( $this->env ); |
54 | } |
55 | } |
56 | |
57 | private static function hrefParts( string $str ): ?array { |
58 | if ( preg_match( '/^([^:]+):(.*)$/D', $str, $matches ) ) { |
59 | return [ 'prefix' => $matches[1], 'title' => $matches[2] ]; |
60 | } else { |
61 | return null; |
62 | } |
63 | } |
64 | |
65 | /** |
66 | * Normalize and analyze a wikilink target. |
67 | * |
68 | * Returns an object containing |
69 | * - href: The expanded target string |
70 | * - hrefSrc: The original target wikitext |
71 | * - title: A title object *or* |
72 | * - language: An interwikiInfo object *or* |
73 | * - interwiki: An interwikiInfo object. |
74 | * - localprefix: Set if the link had a localinterwiki prefix (or prefixes) |
75 | * - fromColonEscapedText: Target was colon-escaped ([[:en:foo]]) |
76 | * - prefix: The original namespace or language/interwiki prefix without a |
77 | * colon escape. |
78 | * |
79 | * @param Token $token |
80 | * @param string $href |
81 | * @param string $hrefSrc |
82 | * @return stdClass The target info. |
83 | * @throws InternalException |
84 | */ |
85 | private function getWikiLinkTargetInfo( Token $token, string $href, string $hrefSrc ): stdClass { |
86 | $env = $this->env; |
87 | $siteConfig = $env->getSiteConfig(); |
88 | $info = (object)[ |
89 | 'href' => $href, |
90 | 'hrefSrc' => $hrefSrc, |
91 | // Initialize these properties to avoid isset checks |
92 | 'interwiki' => null, |
93 | 'language' => null, |
94 | 'localprefix' => null, |
95 | 'fromColonEscapedText' => null |
96 | ]; |
97 | |
98 | if ( ( ltrim( $info->href )[0] ?? '' ) === ':' ) { |
99 | $info->fromColonEscapedText = true; |
100 | // Remove the colon escape |
101 | $info->href = substr( ltrim( $info->href ), 1 ); |
102 | } |
103 | if ( ( $info->href[0] ?? '' ) === ':' ) { |
104 | if ( $env->linting( 'multi-colon-escape' ) ) { |
105 | $lint = [ |
106 | 'dsr' => DomSourceRange::fromTsr( $token->dataParsoid->tsr ), |
107 | 'params' => [ 'href' => ':' . $info->href ], |
108 | 'templateInfo' => null |
109 | ]; |
110 | if ( $this->options['inTemplate'] ) { |
111 | // Match Linter.findEnclosingTemplateName(), by first |
112 | // converting the title to an href using env.makeLink |
113 | $name = PHPUtils::stripPrefix( |
114 | $env->makeLink( $this->manager->getFrame()->getTitle() ), |
115 | './' |
116 | ); |
117 | $lint['templateInfo'] = [ 'name' => $name ]; |
118 | // TODO(arlolra): Pass tsr info to the frame |
119 | $lint['dsr'] = new DomSourceRange( 0, 0, null, null ); |
120 | } |
121 | $env->recordLint( 'multi-colon-escape', $lint ); |
122 | } |
123 | // This will get caught by the caller, and mark the target as invalid |
124 | throw new InternalException( 'Multiple colons prefixing href.' ); |
125 | } |
126 | |
127 | $title = $env->resolveTitle( Utils::decodeURIComponent( $info->href ) ); |
128 | $hrefBits = self::hrefParts( $info->href ); |
129 | if ( $hrefBits ) { |
130 | $nsPrefix = $hrefBits['prefix']; |
131 | $info->prefix = $nsPrefix; |
132 | $nnn = Utils::normalizeNamespaceName( trim( $nsPrefix ) ); |
133 | $interwikiInfo = $siteConfig->interwikiMapNoNamespaces()[$nnn] ?? null; |
134 | // check for interwiki / language links |
135 | $ns = $siteConfig->namespaceId( $nnn ); |
136 | // also check for url to protect against [[constructor:foo]] |
137 | if ( $ns !== null ) { |
138 | $info->title = $env->makeTitleFromURLDecodedStr( $title ); |
139 | } elseif ( isset( $interwikiInfo['localinterwiki'] ) ) { |
140 | if ( $hrefBits['title'] === '' ) { |
141 | // Empty title => main page (T66167) |
142 | $info->title = Title::newFromLinkTarget( |
143 | $siteConfig->mainPageLinkTarget(), $siteConfig |
144 | ); |
145 | } else { |
146 | $info->href = str_contains( $hrefBits['title'], ':' ) |
147 | ? ':' . $hrefBits['title'] : $hrefBits['title']; |
148 | // Recurse! |
149 | $info = $this->getWikiLinkTargetInfo( $token, $info->href, $info->hrefSrc ); |
150 | $info->localprefix = $nsPrefix . |
151 | ( $info->localprefix ? ( ':' . $info->localprefix ) : '' ); |
152 | } |
153 | } elseif ( !empty( $interwikiInfo['url'] ) ) { |
154 | $info->href = $hrefBits['title']; |
155 | // Ensure a valid title, even though we're discarding the result |
156 | $env->makeTitleFromURLDecodedStr( $title ); |
157 | // Interwiki or language link? If no language info, or if it starts |
158 | // with an explicit ':' (like [[:en:Foo]]), it's not a language link. |
159 | if ( $info->fromColonEscapedText || |
160 | ( !isset( $interwikiInfo['language'] ) && !isset( $interwikiInfo['extralanglink'] ) ) |
161 | ) { |
162 | // An interwiki link. |
163 | $info->interwiki = $interwikiInfo; |
164 | // Remove the colon escape after an interwiki prefix |
165 | if ( ( ltrim( $info->href )[0] ?? '' ) === ':' ) { |
166 | $info->href = substr( ltrim( $info->href ), 1 ); |
167 | } |
168 | } else { |
169 | // A language link. |
170 | $info->language = $interwikiInfo; |
171 | } |
172 | } else { |
173 | $info->title = $env->makeTitleFromURLDecodedStr( $title ); |
174 | } |
175 | } else { |
176 | $info->title = $env->makeTitleFromURLDecodedStr( $title ); |
177 | } |
178 | |
179 | return $info; |
180 | } |
181 | |
182 | /** |
183 | * Handle mw:redirect tokens |
184 | * |
185 | * @param Token $token |
186 | * @return TokenHandlerResult |
187 | * @throws InternalException |
188 | */ |
189 | private function onRedirect( Token $token ): TokenHandlerResult { |
190 | // Avoid duplicating the link-processing code by invoking the |
191 | // standard onWikiLink handler on the embedded link, intercepting |
192 | // the generated tokens using the callback mechanism, reading |
193 | // the href from the result, and then creating a |
194 | // <link rel="mw:PageProp/redirect"> token from it. |
195 | |
196 | $rlink = new SelfclosingTagTk( 'link', Utils::clone( $token->attribs ), |
197 | $token->dataParsoid->clone() ); |
198 | $wikiLinkTk = $rlink->dataParsoid->linkTk; |
199 | $rlink->setAttribute( 'rel', 'mw:PageProp/redirect' ); |
200 | |
201 | // Remove the nested wikiLinkTk token and the cloned href attribute |
202 | unset( $rlink->dataParsoid->linkTk ); |
203 | $rlink->removeAttribute( 'href' ); |
204 | |
205 | // Transfer href attribute back to wikiLinkTk, since it may have been |
206 | // template-expanded in the pipeline prior to this point. |
207 | $wikiLinkTk->attribs = Utils::clone( $token->attribs ); |
208 | |
209 | // Set "redirect" attribute on the wikilink token to indicate that |
210 | // image and category links should be handled as plain links. |
211 | $wikiLinkTk->setAttribute( 'redirect', 'true' ); |
212 | |
213 | // Render the wikilink (including interwiki links, etc) then collect |
214 | // the resulting href and transfer it to rlink. |
215 | $r = $this->onWikiLink( $wikiLinkTk ); |
216 | $firstToken = ( $r->tokens[0] ?? null ); |
217 | $isValid = $firstToken instanceof Token && |
218 | in_array( $firstToken->getName(), [ 'a', 'link' ], true ); |
219 | if ( $isValid ) { |
220 | $da = $r->tokens[0]->dataParsoid; |
221 | $rlink->addNormalizedAttribute( 'href', $da->a['href'], $da->sa['href'] ); |
222 | return new TokenHandlerResult( [ $rlink ] ); |
223 | } else { |
224 | // Bail! Emit tokens as if they were parsed as a list item: |
225 | // #REDIRECT.... |
226 | $src = $rlink->dataParsoid->src; |
227 | $tsr = $rlink->dataParsoid->tsr; |
228 | preg_match( '/^([^#]*)(#)/', $src, $srcMatch ); |
229 | $ntokens = strlen( $srcMatch[1] ) ? [ $srcMatch[1] ] : []; |
230 | $hashPos = $tsr->start + strlen( $srcMatch[1] ); |
231 | $tsr0 = new SourceRange( $hashPos, $hashPos + 1 ); |
232 | $dp = new DataParsoid; |
233 | $dp->tsr = $tsr0; |
234 | $li = new TagTk( |
235 | 'listItem', |
236 | [ new KV( 'bullets', [ '#' ], $tsr0->expandTsrV() ) ], |
237 | $dp ); |
238 | $ntokens[] = $li; |
239 | $ntokens[] = substr( $src, strlen( $srcMatch[0] ) ); |
240 | PHPUtils::pushArray( $ntokens, $r->tokens ); |
241 | return new TokenHandlerResult( $ntokens ); |
242 | } |
243 | } |
244 | |
245 | public static function bailTokens( TokenTransformManager $manager, Token $token ): array { |
246 | $frame = $manager->getFrame(); |
247 | $tsr = $token->dataParsoid->tsr; |
248 | $frameSrc = $frame->getSrcText(); |
249 | $linkSrc = $tsr->substr( $frameSrc ); |
250 | $src = substr( $linkSrc, 1 ); |
251 | if ( $src === false ) { |
252 | $manager->getEnv()->log( |
253 | 'error', 'Unable to determine link source.', |
254 | "frame: $frameSrc", 'tsr: ', $tsr, |
255 | "link: $linkSrc" |
256 | ); |
257 | return [ $linkSrc ]; // Forget about trying to tokenize this |
258 | } |
259 | $startOffset = $tsr->start + 1; |
260 | $toks = PipeLineUtils::processContentInPipeline( |
261 | $manager->getEnv(), $frame, $src, [ |
262 | 'sol' => false, |
263 | 'pipelineType' => 'text/x-mediawiki', |
264 | 'srcOffsets' => new SourceRange( $startOffset, $startOffset + strlen( $src ) ), |
265 | 'pipelineOpts' => [ |
266 | 'expandTemplates' => $manager->getOptions()['expandTemplates'], |
267 | 'inTemplate' => $manager->getOptions()['inTemplate'], |
268 | ], |
269 | ] |
270 | ); |
271 | TokenUtils::stripEOFTkfromTokens( $toks ); |
272 | return array_merge( [ '[' ], $toks ); |
273 | } |
274 | |
275 | /** |
276 | * Handle a mw:WikiLink token. |
277 | * |
278 | * @param Token $token |
279 | * @return TokenHandlerResult |
280 | * @throws InternalException |
281 | */ |
282 | private function onWikiLink( Token $token ): TokenHandlerResult { |
283 | $env = $this->env; |
284 | $hrefKV = $token->getAttributeKV( 'href' ); |
285 | $hrefTokenStr = TokenUtils::tokensToString( $hrefKV->v ); |
286 | |
287 | // Don't allow internal links to pages containing PROTO: |
288 | // See Parser::handleInternalLinks2() |
289 | if ( $env->getSiteConfig()->hasValidProtocol( $hrefTokenStr ) ) { |
290 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
291 | } |
292 | |
293 | // Xmlish tags in title position are invalid. Not according to the |
294 | // preprocessor ABNF but at later stages in the legacy parser, |
295 | // namely handleInternalLinks. |
296 | if ( is_array( $hrefKV->v ) ) { |
297 | // Use the expanded attr instead of trying to unpackDOMFragments |
298 | // since the fragment will have been released when expanding to DOM |
299 | $expandedVal = $token->fetchExpandedAttrValue( 'href' ); |
300 | if ( preg_match( '#mw:(Nowiki|Extension|DOMFragment/sealed)#', $expandedVal ?? '' ) ) { |
301 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
302 | } |
303 | } |
304 | |
305 | // First check if the expanded href contains a pipe. |
306 | if ( str_contains( $hrefTokenStr, '|' ) ) { |
307 | // It does. This 'href' was templated and also returned other |
308 | // parameters separated by a pipe. We don't have any sensible way to |
309 | // handle such a construct currently, so prevent people from editing |
310 | // it. See T226523 |
311 | // TODO: add useful debugging info for editors ('if you would like to |
312 | // make this content editable, then fix template X..') |
313 | // TODO: also check other parameters for pipes! |
314 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
315 | } |
316 | |
317 | $target = null; |
318 | try { |
319 | $target = $this->getWikiLinkTargetInfo( $token, $hrefTokenStr, $hrefKV->vsrc ); |
320 | } catch ( TitleException | InternalException $e ) { |
321 | // Invalid title |
322 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
323 | } |
324 | |
325 | // Ok, it looks like we have a sensible href. Figure out which handler to use. |
326 | $isRedirect = (bool)$token->getAttributeV( 'redirect' ); |
327 | return $this->wikiLinkHandler( $token, $target, $isRedirect ); |
328 | } |
329 | |
330 | /** |
331 | * Figure out which handler to use to render a given WikiLink token. Override |
332 | * this method to add new handlers or swap out existing handlers based on the |
333 | * target structure. |
334 | * |
335 | * @param Token $token |
336 | * @param stdClass $target |
337 | * @param bool $isRedirect |
338 | * @return TokenHandlerResult |
339 | * @throws InternalException |
340 | */ |
341 | private function wikiLinkHandler( |
342 | Token $token, stdClass $target, bool $isRedirect |
343 | ): TokenHandlerResult { |
344 | $title = $target->title ?? null; |
345 | if ( $title ) { |
346 | if ( $isRedirect ) { |
347 | return $this->renderWikiLink( $token, $target ); |
348 | } |
349 | $siteConfig = $this->env->getSiteConfig(); |
350 | $nsId = $title->getNamespace(); |
351 | if ( $nsId === $siteConfig->canonicalNamespaceId( 'media' ) ) { |
352 | // Render as a media link. |
353 | return $this->renderMedia( $token, $target ); |
354 | } |
355 | if ( !$target->fromColonEscapedText ) { |
356 | if ( $nsId === $siteConfig->canonicalNamespaceId( 'file' ) ) { |
357 | // Render as a file. |
358 | return $this->renderFile( $token, $target ); |
359 | } |
360 | if ( $nsId === $siteConfig->canonicalNamespaceId( 'category' ) ) { |
361 | // Render as a category membership. |
362 | return $this->renderCategory( $token, $target ); |
363 | } |
364 | } |
365 | |
366 | // Render as plain wiki links. |
367 | return $this->renderWikiLink( $token, $target ); |
368 | } |
369 | |
370 | // language and interwiki links |
371 | if ( $target->interwiki ) { |
372 | return $this->renderInterwikiLink( $token, $target ); |
373 | } |
374 | if ( $target->language ) { |
375 | $ns = $this->env->getContextTitle()->getNamespace(); |
376 | $noLanguageLinks = $this->env->getSiteConfig()->namespaceIsTalk( $ns ) || |
377 | !$this->env->getSiteConfig()->interwikiMagic(); |
378 | if ( $noLanguageLinks ) { |
379 | $target->interwiki = $target->language; |
380 | return $this->renderInterwikiLink( $token, $target ); |
381 | } |
382 | |
383 | return $this->renderLanguageLink( $token, $target ); |
384 | } |
385 | |
386 | // Neither a title, nor a language or interwiki. Should not happen. |
387 | throw new InternalException( 'Unknown link type' ); |
388 | } |
389 | |
390 | /** ------------------------------------------------------------ |
391 | * This (overloaded) function does three different things: |
392 | * - Extracts link text from attrs (when k === "mw:maybeContent"). |
393 | * As a performance micro-opt, only does if asked to (getLinkText) |
394 | * - Updates existing rdfa type with an additional rdf-type, |
395 | * if one is provided (rdfaType) |
396 | * - Collates about, typeof, and linkAttrs into a new attr. array |
397 | * |
398 | * @param array $attrs |
399 | * @param bool $getLinkText |
400 | * @param ?string $rdfaType |
401 | * @param ?array $linkAttrs |
402 | * @return array |
403 | */ |
404 | public static function buildLinkAttrs( |
405 | array $attrs, bool $getLinkText, ?string $rdfaType, |
406 | ?array $linkAttrs |
407 | ): array { |
408 | $newAttrs = []; |
409 | $linkTextKVs = []; |
410 | $about = null; |
411 | |
412 | // In one pass through the attribute array, fetch about, typeof, and linkText |
413 | // |
414 | // about && typeof are usually at the end of the array if at all present |
415 | foreach ( $attrs as $kv ) { |
416 | $k = $kv->k; |
417 | $v = $kv->v; |
418 | |
419 | // link-text attrs have the key "maybeContent" |
420 | if ( $getLinkText && $k === 'mw:maybeContent' ) { |
421 | $linkTextKVs[] = $kv; |
422 | } elseif ( is_string( $k ) && $k ) { |
423 | if ( trim( $k ) === 'typeof' ) { |
424 | $rdfaType = $rdfaType ? $rdfaType . ' ' . $v : $v; |
425 | } elseif ( trim( $k ) === 'about' ) { |
426 | $about = $v; |
427 | } elseif ( trim( $k ) === 'data-mw' ) { |
428 | $newAttrs[] = $kv; |
429 | } |
430 | } |
431 | } |
432 | |
433 | if ( $rdfaType ) { |
434 | $newAttrs[] = new KV( 'typeof', $rdfaType ); |
435 | } |
436 | |
437 | if ( $about ) { |
438 | $newAttrs[] = new KV( 'about', $about ); |
439 | } |
440 | |
441 | if ( $linkAttrs ) { |
442 | PHPUtils::pushArray( $newAttrs, $linkAttrs ); |
443 | } |
444 | |
445 | return [ |
446 | 'attribs' => $newAttrs, |
447 | 'contentKVs' => $linkTextKVs, |
448 | 'hasRdfaType' => $rdfaType !== null |
449 | ]; |
450 | } |
451 | |
452 | /** |
453 | * Generic wiki link attribute setup on a passed-in new token based on the |
454 | * wikilink token and target. As a side effect, this method also extracts the |
455 | * link content tokens and returns them. |
456 | * |
457 | * @param Token $newTk |
458 | * @param Token $token |
459 | * @param stdClass $target |
460 | * @param bool $buildDOMFragment |
461 | * @return array |
462 | * @throws InternalException |
463 | */ |
464 | private function addLinkAttributesAndGetContent( |
465 | Token $newTk, Token $token, stdClass $target, bool $buildDOMFragment = false |
466 | ): array { |
467 | $attribs = $token->attribs; |
468 | $dataParsoid = $token->dataParsoid; |
469 | $newAttrData = self::buildLinkAttrs( $attribs, true, null, [ new KV( 'rel', 'mw:WikiLink' ) ] ); |
470 | $content = $newAttrData['contentKVs']; |
471 | $env = $this->env; |
472 | |
473 | // Set attribs and dataParsoid |
474 | $newTk->attribs = $newAttrData['attribs']; |
475 | $newTk->dataParsoid = $dataParsoid->clone(); |
476 | unset( $newTk->dataParsoid->src ); // clear src string since we can serialize this |
477 | |
478 | // Note: Link tails are handled on the DOM in handleLinkNeighbours, so no |
479 | // need to handle them here. |
480 | $l = count( $content ); |
481 | if ( $l > 0 ) { |
482 | $newTk->dataParsoid->stx = 'piped'; |
483 | $out = []; |
484 | // re-join content bits |
485 | foreach ( $content as $i => $kv ) { |
486 | $toks = $kv->v; |
487 | // since this is already a link, strip autolinks from content |
488 | // FIXME: Maybe add a stop in the grammar so that autolinks |
489 | // aren't tokenized in link content to begin with? |
490 | if ( !is_array( $toks ) ) { |
491 | $toks = [ $toks ]; |
492 | } |
493 | |
494 | $toks = array_values( array_filter( $toks, static function ( $t ) { |
495 | return $t !== ''; |
496 | } ) ); |
497 | $n = count( $toks ); |
498 | foreach ( $toks as $j => $t ) { |
499 | // Bail on media-syntax in wikilink-syntax scenarios, |
500 | // since the legacy parser explodes on [[, last one wins. |
501 | // Note that without this, anchors tags in media output |
502 | // will be stripped and we won't have the right structure |
503 | // when we get to the dom pass to add media info. |
504 | if ( |
505 | $t instanceof TagTk && |
506 | ( $t->getName() === 'figure' || $t->getName() === 'span' ) && |
507 | TokenUtils::matchTypeOf( $t, '#^mw:File($|/)#D' ) !== null |
508 | ) { |
509 | throw new InternalException( 'Media-in-link' ); |
510 | } |
511 | |
512 | if ( $t instanceof TagTk && $t->getName() === 'a' ) { |
513 | // Bail on wikilink-syntax in wiklink-syntax scenarios, |
514 | // since the legacy parser explodes on [[, last one wins |
515 | if ( |
516 | preg_match( |
517 | '#^mw:WikiLink(/Interwiki)?$#D', |
518 | $t->getAttributeV( 'rel' ) ?? '' |
519 | ) && |
520 | // ISBN links don't use wikilink-syntax but still |
521 | // get the same "rel", so should be ignored |
522 | ( $t->dataParsoid->stx ?? '' ) !== 'magiclink' |
523 | ) { |
524 | throw new InternalException( 'Link-in-link' ); |
525 | } |
526 | if ( $j + 1 < $n && $toks[$j + 1] instanceof EndTagTk && |
527 | $toks[$j + 1]->getName() === 'a' |
528 | ) { |
529 | // autonumbered links in the stream get rendered |
530 | // as an <a> tag with no content -- but these ought |
531 | // to be treated as plaintext since we don't allow |
532 | // nested links. |
533 | $out[] = '[' . $t->getAttributeV( 'href' ) . ']'; |
534 | } |
535 | // suppress <a> |
536 | continue; |
537 | } |
538 | |
539 | if ( $t instanceof EndTagTk && $t->getName() === 'a' ) { |
540 | continue; // suppress </a> |
541 | } |
542 | |
543 | $out[] = $t; |
544 | } |
545 | if ( $i < $l - 1 ) { |
546 | $out[] = '|'; |
547 | } |
548 | } |
549 | |
550 | if ( $buildDOMFragment ) { |
551 | // content = [part 0, .. part l-1] |
552 | // offsets = [start(part-0), end(part l-1)] |
553 | $offsets = isset( $dataParsoid->tsr ) ? |
554 | new SourceRange( $content[0]->srcOffsets->value->start, |
555 | $content[$l - 1]->srcOffsets->value->end ) : null; |
556 | $content = [ PipelineUtils::getDOMFragmentToken( $out, $offsets, |
557 | [ 'inlineContext' => true, 'token' => $token ] ) ]; |
558 | } else { |
559 | $content = $out; |
560 | } |
561 | } else { |
562 | $newTk->dataParsoid->stx = 'simple'; |
563 | $morecontent = Utils::decodeURIComponent( $target->href ); |
564 | |
565 | // Try to match labeling in core |
566 | if ( $env->getSiteConfig()->namespaceHasSubpages( |
567 | $env->getContextTitle()->getNamespace() |
568 | ) ) { |
569 | // subpage links with a trailing slash get the trailing slashes stripped. |
570 | // See https://gerrit.wikimedia.org/r/173431 |
571 | if ( preg_match( '#^((\.\./)+|/)(?!\.\./)(.*?[^/])/+$#D', $morecontent, $match ) ) { |
572 | $morecontent = $match[3]; |
573 | } elseif ( str_starts_with( $morecontent, '../' ) ) { |
574 | // Subpages on interwiki / language links aren't valid, |
575 | // so $target->title should always be present here |
576 | $morecontent = $target->title->getPrefixedText(); |
577 | } |
578 | } |
579 | |
580 | // for interwiki links, include the interwiki prefix in the link text |
581 | if ( $target->interwiki ) { |
582 | $morecontent = $target->prefix . ':' . $morecontent; |
583 | } |
584 | |
585 | // for local links, include the local prefix in the link text |
586 | if ( $target->localprefix ) { |
587 | $morecontent = $target->localprefix . ':' . $morecontent; |
588 | } |
589 | |
590 | $content = [ $morecontent ]; |
591 | } |
592 | return $content; |
593 | } |
594 | |
595 | /** |
596 | * Render a plain wiki link. |
597 | * |
598 | * @param Token $token |
599 | * @param stdClass $target |
600 | * @return TokenHandlerResult |
601 | */ |
602 | private function renderWikiLink( Token $token, stdClass $target ): TokenHandlerResult { |
603 | $newTk = new TagTk( 'a' ); |
604 | try { |
605 | $content = $this->addLinkAttributesAndGetContent( $newTk, $token, $target, true ); |
606 | } catch ( InternalException $e ) { |
607 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
608 | } |
609 | |
610 | $newTk->addNormalizedAttribute( 'href', $this->env->makeLink( $target->title ), |
611 | $target->hrefSrc ); |
612 | |
613 | $newTk->setAttribute( 'title', $target->title->getPrefixedText() ); |
614 | |
615 | return new TokenHandlerResult( array_merge( [ $newTk ], $content, [ new EndTagTk( 'a' ) ] ) ); |
616 | } |
617 | |
618 | /** |
619 | * Render a category 'link'. Categories are really page properties, and are |
620 | * normally rendered in a box at the bottom of an article. |
621 | * |
622 | * @param Token $token |
623 | * @param stdClass $target |
624 | * @return TokenHandlerResult |
625 | */ |
626 | private function renderCategory( Token $token, stdClass $target ): TokenHandlerResult { |
627 | $newTk = new SelfclosingTagTk( 'link' ); |
628 | try { |
629 | $content = $this->addLinkAttributesAndGetContent( $newTk, $token, $target ); |
630 | } catch ( InternalException $e ) { |
631 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
632 | } |
633 | $env = $this->env; |
634 | |
635 | // Change the rel to be mw:PageProp/Category |
636 | $newTk->getAttributeKV( 'rel' )->v = 'mw:PageProp/Category'; |
637 | |
638 | $newTk->addNormalizedAttribute( 'href', $env->makeLink( $target->title ), $target->hrefSrc ); |
639 | |
640 | // Change the href to include the sort key, if any (but don't update the rt info) |
641 | // Fallback to empty string for default sorting |
642 | $categorySort = ''; |
643 | $strContent = str_replace( "\n", '', TokenUtils::tokensToString( $content ) ); |
644 | if ( $strContent !== '' && $strContent !== $target->href ) { |
645 | $categorySort = $strContent; |
646 | $hrefkv = $newTk->getAttributeKV( 'href' ); |
647 | $hrefkv->v .= '#'; |
648 | $hrefkv->v .= str_replace( '#', '%23', Sanitizer::sanitizeTitleURI( $categorySort, false ) ); |
649 | } |
650 | |
651 | if ( count( $content ) !== 1 ) { |
652 | // Deal with sort keys that come from generated content (transclusions, etc.) |
653 | $key = [ 'txt' => 'mw:sortKey' ]; |
654 | $contentKV = $token->getAttributeKV( 'mw:maybeContent' ); |
655 | $so = $contentKV->valueOffset(); |
656 | $val = PipelineUtils::expandAttrValueToDOM( |
657 | $this->env, |
658 | $this->manager->getFrame(), |
659 | [ 'html' => $content, 'srcOffsets' => $so ], |
660 | $this->options['expandTemplates'], |
661 | $this->options['inTemplate'] |
662 | ); |
663 | $attr = [ $key, $val ]; |
664 | $dataMW = $newTk->getAttributeV( 'data-mw' ); |
665 | if ( $dataMW ) { |
666 | $dataMW = PHPUtils::jsonDecode( $dataMW, false ); |
667 | $dataMW->attribs[] = $attr; |
668 | } else { |
669 | $dataMW = (object)[ 'attribs' => [ $attr ] ]; |
670 | } |
671 | |
672 | // Mark token as having expanded attrs |
673 | $newTk->addAttribute( 'about', $env->newAboutId() ); |
674 | $newTk->addSpaceSeparatedAttribute( 'typeof', 'mw:ExpandedAttrs' ); |
675 | $newTk->addAttribute( 'data-mw', PHPUtils::jsonEncode( $dataMW ) ); |
676 | } |
677 | $this->env->getMetadata()->addCategory( $target->title, $categorySort ); |
678 | return new TokenHandlerResult( [ $newTk ] ); |
679 | } |
680 | |
681 | /** |
682 | * Render a language link. Those normally appear in the list of alternate |
683 | * languages for an article in the sidebar, so are really a page property. |
684 | * |
685 | * @param Token $token |
686 | * @param stdClass $target |
687 | * @return TokenHandlerResult |
688 | */ |
689 | private function renderLanguageLink( Token $token, stdClass $target ): TokenHandlerResult { |
690 | // The prefix is listed in the interwiki map |
691 | |
692 | // TODO: If $target->language['deprecated'] is set and |
693 | // $target->language['extralanglink'] is *not* set, then we |
694 | // should use the normalized language name/prefix (from |
695 | // 'deprecated') when calling |
696 | // ContentMetadataCollector::addLanguageLink() here (which |
697 | // we should eventualy be doing) |
698 | |
699 | // TODO: might also want to add the language *code* here, |
700 | // which would be the language['bcp47'] property (added in |
701 | // change I82465261bc66f0b0cd30d361c299f08066494762) for an |
702 | // extralanglink, or the interwiki prefix otherwise; the |
703 | // latter is mediawiki-internal and maybe not BCP-47 compliant. |
704 | // This is for clients of the MediaWiki DOM spec HTML: the |
705 | // WMF domain prefix, the MediaWiki internal language code, |
706 | // and the actual *language* (ie bcp-47 code) can all differ |
707 | // from each other, due to various historical infelicities. |
708 | // Perhaps a `lang` attribute on the `link` would be appropriate. |
709 | |
710 | $newTk = new SelfclosingTagTk( 'link', [], $token->dataParsoid ); |
711 | try { |
712 | $this->addLinkAttributesAndGetContent( $newTk, $token, $target ); |
713 | } catch ( InternalException $e ) { |
714 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
715 | } |
716 | |
717 | // add title attribute giving the presentation name of the |
718 | // "extra language link" |
719 | // T329303: the 'linktext' comes from the system message |
720 | // `interlanguage-link-$prefix` and should be set in integrated mode |
721 | // using the localization features; the integrated-mode SiteConfig |
722 | // currently never sets the `linktext` property in |
723 | // SiteConfig::interwikiMap(). |
724 | // I52d50e2f75942a849908c6be7fc5169f00a5983a has some partial work |
725 | // on this. |
726 | if ( isset( $target->language['extralanglink'] ) && |
727 | !empty( $target->language['linktext'] ) |
728 | ) { |
729 | // XXX in standalone mode, this is user-interface-language text, |
730 | // not "content language" text. |
731 | $newTk->addNormalizedAttribute( 'title', $target->language['linktext'], null ); |
732 | } |
733 | |
734 | // We set an absolute link to the article in the other wiki/language |
735 | $title = Sanitizer::sanitizeTitleURI( Utils::decodeURIComponent( $target->href ), false ); |
736 | $absHref = str_replace( '$1', $title, $target->language['url'] ); |
737 | if ( isset( $target->language['protorel'] ) ) { |
738 | $absHref = preg_replace( '/^https?:/', '', $absHref, 1 ); |
739 | } |
740 | $newTk->addNormalizedAttribute( 'href', $absHref, $target->hrefSrc ); |
741 | |
742 | // Change the rel to be mw:PageProp/Language |
743 | $newTk->getAttributeKV( 'rel' )->v = 'mw:PageProp/Language'; |
744 | |
745 | return new TokenHandlerResult( [ $newTk ] ); |
746 | } |
747 | |
748 | /** |
749 | * Render an interwiki link. |
750 | * |
751 | * @param Token $token |
752 | * @param stdClass $target |
753 | * @return TokenHandlerResult |
754 | */ |
755 | private function renderInterwikiLink( Token $token, stdClass $target ): TokenHandlerResult { |
756 | // The prefix is listed in the interwiki map |
757 | |
758 | $tokens = []; |
759 | $newTk = new TagTk( 'a', [], $token->dataParsoid ); |
760 | try { |
761 | $content = $this->addLinkAttributesAndGetContent( $newTk, $token, $target, true ); |
762 | } catch ( InternalException $e ) { |
763 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
764 | } |
765 | |
766 | // We set an absolute link to the article in the other wiki/language |
767 | $isLocal = !empty( $target->interwiki['local'] ); |
768 | $trimmedHref = trim( $target->href ); |
769 | $title = Sanitizer::sanitizeTitleURI( |
770 | Utils::decodeURIComponent( $trimmedHref ), |
771 | !$isLocal |
772 | ); |
773 | $absHref = str_replace( '$1', $title, $target->interwiki['url'] ); |
774 | if ( isset( $target->interwiki['protorel'] ) ) { |
775 | $absHref = preg_replace( '/^https?:/', '', $absHref, 1 ); |
776 | } |
777 | $newTk->addNormalizedAttribute( 'href', $absHref, $target->hrefSrc ); |
778 | |
779 | // Change the rel to be mw:ExtLink |
780 | $newTk->getAttributeKV( 'rel' )->v = 'mw:WikiLink/Interwiki'; |
781 | // Remember that this was using wikitext syntax though |
782 | $newTk->dataParsoid->isIW = true; |
783 | // Add title unless it's just a fragment (and trim off fragment) |
784 | // (The normalization here is similar to what Title#getPrefixedDBKey() does.) |
785 | if ( $target->href === '' || $target->href[0] !== '#' ) { |
786 | $titleAttr = $target->interwiki['prefix'] . ':' . |
787 | Utils::decodeURIComponent( str_replace( '_', ' ', |
788 | preg_replace( '/#.*/s', '', $trimmedHref, 1 ) ) ); |
789 | $newTk->setAttribute( 'title', $titleAttr ); |
790 | } |
791 | $tokens[] = $newTk; |
792 | |
793 | PHPUtils::pushArray( $tokens, $content ); |
794 | $tokens[] = new EndTagTk( 'a' ); |
795 | return new TokenHandlerResult( $tokens ); |
796 | } |
797 | |
798 | /** |
799 | * Get the style and class lists for an image's wrapper element. |
800 | * |
801 | * @param array $opts The option hash from renderFile. |
802 | * @return array with boolean isInline Whether the image is inline after handling options. |
803 | * or classes The list of classes for the wrapper. |
804 | */ |
805 | private static function getWrapperInfo( array $opts ) { |
806 | $format = self::getFormat( $opts ); |
807 | $isInline = !in_array( $format, [ 'thumbnail', 'manualthumb', 'framed' ], true ); |
808 | $classes = []; |
809 | |
810 | if ( |
811 | !isset( $opts['size']['src'] ) && |
812 | // Framed and manualthumb images aren't scaled |
813 | !in_array( $format, [ 'manualthumb', 'framed' ], true ) |
814 | ) { |
815 | $classes[] = 'mw-default-size'; |
816 | } |
817 | |
818 | // Border isn't applicable to 'thumbnail', 'manualthumb', or 'framed' formats |
819 | // Using $isInline as a shorthand for that here (see above), |
820 | // but this isn't about being *inline* per se |
821 | if ( $isInline && isset( $opts['border'] ) ) { |
822 | $classes[] = 'mw-image-border'; |
823 | } |
824 | |
825 | $halign = $opts['halign']['v'] ?? null; |
826 | switch ( $halign ) { |
827 | case 'none': |
828 | // PHP parser wraps in <div class="floatnone"> |
829 | $isInline = false; |
830 | if ( $halign === 'none' ) { |
831 | $classes[] = 'mw-halign-none'; |
832 | } |
833 | break; |
834 | |
835 | case 'center': |
836 | // PHP parser wraps in <div class="center"><div class="floatnone"> |
837 | $isInline = false; |
838 | if ( $halign === 'center' ) { |
839 | $classes[] = 'mw-halign-center'; |
840 | } |
841 | break; |
842 | |
843 | case 'left': |
844 | // PHP parser wraps in <div class="floatleft"> |
845 | $isInline = false; |
846 | if ( $halign === 'left' ) { |
847 | $classes[] = 'mw-halign-left'; |
848 | } |
849 | break; |
850 | |
851 | case 'right': |
852 | // PHP parser wraps in <div class="floatright"> |
853 | $isInline = false; |
854 | if ( $halign === 'right' ) { |
855 | $classes[] = 'mw-halign-right'; |
856 | } |
857 | break; |
858 | } |
859 | |
860 | if ( $isInline ) { |
861 | $valignOpt = $opts['valign']['v'] ?? null; |
862 | switch ( $valignOpt ) { |
863 | case 'middle': |
864 | $classes[] = 'mw-valign-middle'; |
865 | break; |
866 | |
867 | case 'baseline': |
868 | $classes[] = 'mw-valign-baseline'; |
869 | break; |
870 | |
871 | case 'sub': |
872 | $classes[] = 'mw-valign-sub'; |
873 | break; |
874 | |
875 | case 'super': |
876 | $classes[] = 'mw-valign-super'; |
877 | break; |
878 | |
879 | case 'top': |
880 | $classes[] = 'mw-valign-top'; |
881 | break; |
882 | |
883 | case 'text_top': |
884 | $classes[] = 'mw-valign-text-top'; |
885 | break; |
886 | |
887 | case 'bottom': |
888 | $classes[] = 'mw-valign-bottom'; |
889 | break; |
890 | |
891 | case 'text_bottom': |
892 | $classes[] = 'mw-valign-text-bottom'; |
893 | break; |
894 | |
895 | default: |
896 | break; |
897 | } |
898 | } |
899 | |
900 | return [ 'classes' => $classes, 'isInline' => $isInline ]; |
901 | } |
902 | |
903 | /** |
904 | * Determine the name of an option. |
905 | * |
906 | * @param string $optStr |
907 | * @param Env $env |
908 | * @return array|null |
909 | * ck Canonical key for the image option. |
910 | * v Value of the option. |
911 | * ak Aliased key for the image option - includes `"$1"` for placeholder. |
912 | * s Whether it's a simple option or one with a value. |
913 | */ |
914 | private static function getOptionInfo( string $optStr, Env $env ): ?array { |
915 | $oText = trim( $optStr ); |
916 | $siteConfig = $env->getSiteConfig(); |
917 | $getOption = $siteConfig->getMediaPrefixParameterizedAliasMatcher(); |
918 | // oText contains the localized name of this option. the |
919 | // canonical option names (from mediawiki upstream) are in |
920 | // English and contain an '(img|timedmedia)_' prefix. We drop the |
921 | // prefix before stuffing them in data-parsoid in order to |
922 | // save space (that's shortCanonicalOption) |
923 | $canonicalOption = $siteConfig->getMagicWordForMediaOption( $oText ) ?? ''; |
924 | $shortCanonicalOption = preg_replace( '/^(img|timedmedia)_/', '', $canonicalOption, 1 ); |
925 | // 'imgOption' is the key we'd put in opts; it names the 'group' |
926 | // for the option, and doesn't have an img_ prefix. |
927 | $imgOption = Consts::$Media['SimpleOptions'][$canonicalOption] ?? null; |
928 | $bits = $getOption( $oText ); |
929 | $normalizedBit0 = $bits ? mb_strtolower( trim( $bits['k'] ) ) : null; |
930 | $key = $bits ? ( Consts::$Media['PrefixOptions'][$normalizedBit0] ?? null ) : null; |
931 | |
932 | if ( !empty( $imgOption ) && $key === null ) { |
933 | return [ |
934 | 'ck' => $imgOption, |
935 | 'v' => $shortCanonicalOption, |
936 | 'ak' => $optStr, |
937 | 's' => true |
938 | ]; |
939 | } |
940 | |
941 | // bits.a has the localized name for the prefix option |
942 | // (with $1 as a placeholder for the value, which is in bits.v) |
943 | // 'normalizedBit0' is the canonical English option name |
944 | // (from mediawiki upstream) with a prefix. |
945 | // 'key' is the parsoid 'group' for the option; it doesn't |
946 | // have a prefix (it's the key we'd put in opts) |
947 | if ( $bits && $key ) { |
948 | $shortCanonicalOption = preg_replace( '/^(img|timedmedia)_/', '', $normalizedBit0, 1 ); |
949 | // map short canonical name to the localized version used |
950 | |
951 | // Note that we deliberately do entity decoding |
952 | // *after* splitting so that HTML-encoded pipes don't |
953 | // separate options. This matches PHP, whether or |
954 | // not it's a good idea. |
955 | return [ |
956 | 'ck' => $shortCanonicalOption, |
957 | 'v' => Utils::decodeWtEntities( $bits['v'] ), |
958 | 'ak' => $optStr, |
959 | 's' => false |
960 | ]; |
961 | } |
962 | |
963 | return null; |
964 | } |
965 | |
966 | private static function isWikitextOpt( |
967 | Env $env, ?array &$optInfo, string $prefix, string $resultStr |
968 | ): bool { |
969 | // link and alt options are allowed to contain arbitrary |
970 | // wikitext (even though only strings are supported in reality) |
971 | // FIXME(SSS): Is this actually true of all options rather than |
972 | // just link and alt? |
973 | if ( $optInfo === null ) { |
974 | $optInfo = self::getOptionInfo( $prefix . $resultStr, $env ); |
975 | } |
976 | return $optInfo !== null && in_array( $optInfo['ck'], [ 'link', 'alt' ], true ); |
977 | } |
978 | |
979 | /** |
980 | * Make option token streams into a stringy thing that we can recognize. |
981 | * |
982 | * @param array $tstream |
983 | * @param string $prefix Anything that came before this part of the recursive call stack. |
984 | * @param Env $env |
985 | * @return string|string[]|null |
986 | */ |
987 | private static function stringifyOptionTokens( array $tstream, string $prefix, Env $env ) { |
988 | // Seems like this should be a more general "stripTags"-like function? |
989 | $tokenType = null; |
990 | $tkHref = null; |
991 | $nextResult = null; |
992 | $skipToEndOf = null; |
993 | $optInfo = null; |
994 | $resultStr = ''; |
995 | |
996 | for ( $i = 0; $i < count( $tstream ); $i++ ) { |
997 | $currentToken = $tstream[$i]; |
998 | |
999 | if ( $skipToEndOf ) { |
1000 | if ( $currentToken instanceof EndTagTk && $currentToken->getName() === $skipToEndOf ) { |
1001 | $skipToEndOf = null; |
1002 | } |
1003 | continue; |
1004 | } |
1005 | |
1006 | if ( is_string( $currentToken ) ) { |
1007 | $resultStr .= $currentToken; |
1008 | } elseif ( is_array( $currentToken ) ) { |
1009 | $nextResult = self::stringifyOptionTokens( $currentToken, $prefix . $resultStr, $env ); |
1010 | |
1011 | if ( $nextResult === null ) { |
1012 | return null; |
1013 | } |
1014 | |
1015 | $resultStr .= $nextResult; |
1016 | } elseif ( !( $currentToken instanceof EndTagTk ) ) { |
1017 | // This is actually a token |
1018 | if ( TokenUtils::hasDOMFragmentType( $currentToken ) ) { |
1019 | if ( self::isWikitextOpt( $env, $optInfo, $prefix, $resultStr ) ) { |
1020 | $str = TokenUtils::tokensToString( [ $currentToken ], false, [ |
1021 | // These tokens haven't been expanded to DOM yet |
1022 | // so unpacking them here is justifiable |
1023 | // FIXME: It's a little convoluted to figure out |
1024 | // that this is actually the case in the |
1025 | // AttributeExpander, but it seems like only |
1026 | // target/href ever gets expanded to DOM and |
1027 | // the rest of the wikilink_content/options |
1028 | // become mw:maybeContent that gets expanded |
1029 | // below where $hasExpandableOpt is set. |
1030 | 'unpackDOMFragments' => true, |
1031 | // FIXME: Sneaking in `env` to avoid changing the signature |
1032 | 'env' => $env |
1033 | ] |
1034 | ); |
1035 | // Entity encode pipes since we wouldn't have split on |
1036 | // them from fragments and we're about to attempt to |
1037 | // when this function returns. |
1038 | // This is similar to getting the shadow "href" below. |
1039 | $resultStr .= preg_replace( '/\|/', '|', $str, 1 ); |
1040 | $optInfo = null; // might change the nature of opt |
1041 | continue; |
1042 | } else { |
1043 | // if this is a nowiki, we must be in a caption |
1044 | return null; |
1045 | } |
1046 | } |
1047 | if ( $currentToken->getName() === 'mw-quote' ) { |
1048 | if ( self::isWikitextOpt( $env, $optInfo, $prefix, $resultStr ) ) { |
1049 | // just recurse inside |
1050 | $optInfo = null; // might change the nature of opt |
1051 | continue; |
1052 | } |
1053 | return null; |
1054 | } |
1055 | // Similar to TokenUtils.tokensToString()'s includeEntities |
1056 | if ( TokenUtils::isEntitySpanToken( $currentToken ) ) { |
1057 | $resultStr .= $currentToken->dataParsoid->src; |
1058 | $skipToEndOf = 'span'; |
1059 | continue; |
1060 | } |
1061 | if ( $currentToken->getName() === 'a' ) { |
1062 | if ( $optInfo === null ) { |
1063 | $optInfo = self::getOptionInfo( $prefix . $resultStr, $env ); |
1064 | if ( $optInfo === null ) { |
1065 | // An <a> tag before a valid option? |
1066 | // This is most likely a caption. |
1067 | return null; |
1068 | } |
1069 | } |
1070 | |
1071 | if ( self::isWikitextOpt( $env, $optInfo, $prefix, $resultStr ) ) { |
1072 | $tokenType = $currentToken->getAttributeV( 'rel' ); |
1073 | // Using the shadow since entities (think pipes) would |
1074 | // have already been decoded. |
1075 | $tkHref = $currentToken->getAttributeShadowInfo( 'href' )['value']; |
1076 | $isLink = $optInfo && $optInfo['ck'] === 'link'; |
1077 | // Reset the optInfo since we're changing the nature of it |
1078 | $optInfo = null; |
1079 | // Figure out the proper string to put here and break. |
1080 | if ( |
1081 | $tokenType === 'mw:ExtLink' && |
1082 | ( $currentToken->dataParsoid->stx ?? '' ) === 'url' |
1083 | ) { |
1084 | // Add the URL |
1085 | $resultStr .= $tkHref; |
1086 | // Tell our loop to skip to the end of this tag |
1087 | $skipToEndOf = 'a'; |
1088 | } elseif ( $tokenType === 'mw:WikiLink/Interwiki' ) { |
1089 | if ( $isLink ) { |
1090 | $resultStr .= $currentToken->getAttributeV( 'href' ); |
1091 | $i += 2; |
1092 | continue; |
1093 | } |
1094 | // Nothing to do -- the link content will be |
1095 | // captured by walking the rest of the tokens. |
1096 | } elseif ( $tokenType === 'mw:WikiLink' || $tokenType === 'mw:MediaLink' ) { |
1097 | |
1098 | // Nothing to do -- the link content will be |
1099 | // captured by walking the rest of the tokens. |
1100 | } else { |
1101 | // There shouldn't be any other kind of link... |
1102 | // This is likely a caption. |
1103 | return null; |
1104 | } |
1105 | } else { |
1106 | // Why would there be an a tag without a link? |
1107 | return null; |
1108 | } |
1109 | } |
1110 | } |
1111 | } |
1112 | |
1113 | return $resultStr; |
1114 | } |
1115 | |
1116 | /** |
1117 | * Get the format for media. |
1118 | * |
1119 | * @param array $opts |
1120 | * @return string|null |
1121 | */ |
1122 | private static function getFormat( array $opts ): ?string { |
1123 | if ( $opts['manualthumb'] ) { |
1124 | return 'manualthumb'; |
1125 | } |
1126 | return $opts['format']['v'] ?? null; |
1127 | } |
1128 | |
1129 | private $used; |
1130 | |
1131 | /** |
1132 | * This is the set of file options that apply to the container, rather |
1133 | * than the media element itself (or, apply generically to a span). |
1134 | * Other options depend on the fetched media type and won't necessary be |
1135 | * applied. |
1136 | * |
1137 | * @return array |
1138 | */ |
1139 | private function getUsed(): array { |
1140 | if ( $this->used ) { |
1141 | return $this->used; |
1142 | } |
1143 | $this->used = PHPUtils::makeSet( [ |
1144 | 'lang', 'width', 'class', 'upright', |
1145 | 'border', 'frameless', 'framed', 'thumbnail', |
1146 | 'left', 'right', 'center', 'none', |
1147 | 'baseline', 'sub', 'super', 'top', 'text_top', 'middle', 'bottom', 'text_bottom' |
1148 | ] |
1149 | ); |
1150 | return $this->used; |
1151 | } |
1152 | |
1153 | private function hasTransclusion( array $toks ): bool { |
1154 | foreach ( $toks as $t ) { |
1155 | if ( |
1156 | $t instanceof SelfclosingTagTk && |
1157 | TokenUtils::hasTypeOf( $t, 'mw:Transclusion' ) |
1158 | ) { |
1159 | return true; |
1160 | } |
1161 | } |
1162 | return false; |
1163 | } |
1164 | |
1165 | /** |
1166 | * Render a file. This can be an image, a sound, a PDF etc. |
1167 | * |
1168 | * @param Token $token |
1169 | * @param stdClass $target |
1170 | * @return TokenHandlerResult |
1171 | */ |
1172 | private function renderFile( Token $token, stdClass $target ): TokenHandlerResult { |
1173 | $manager = $this->manager; |
1174 | $env = $this->env; |
1175 | |
1176 | // FIXME: Re-enable use of media cache and figure out how that fits |
1177 | // into this new processing model. See T98995 |
1178 | // const cachedMedia = env.mediaCache[token.dataParsoid.src]; |
1179 | |
1180 | $dataParsoid = $token->dataParsoid->clone(); |
1181 | $dataParsoid->optList = []; |
1182 | |
1183 | // Account for the possibility of an expanded target |
1184 | $dataMwAttr = $token->getAttributeV( 'data-mw' ); |
1185 | $dataMw = $dataMwAttr ? PHPUtils::jsonDecode( $dataMwAttr, false ) : new stdClass; |
1186 | |
1187 | $opts = [ |
1188 | 'title' => [ |
1189 | 'v' => $env->makeLink( $target->title ), |
1190 | 'src' => $token->getAttributeKV( 'href' )->vsrc |
1191 | ], |
1192 | 'size' => [ |
1193 | 'v' => [ |
1194 | 'height' => null, |
1195 | 'width' => null |
1196 | ] |
1197 | ], |
1198 | // Initialize these properties to avoid isset checks |
1199 | 'caption' => null, |
1200 | 'format' => null, |
1201 | 'manualthumb' => null, |
1202 | 'class' => null |
1203 | ]; |
1204 | |
1205 | $hasExpandableOpt = false; |
1206 | |
1207 | $optKVs = self::buildLinkAttrs( $token->attribs, true, null, null )['contentKVs']; |
1208 | while ( count( $optKVs ) > 0 ) { |
1209 | $oContent = array_shift( $optKVs ); |
1210 | Assert::invariant( $oContent instanceof KV, 'bad type' ); |
1211 | |
1212 | $origOptSrc = $oContent->v; |
1213 | if ( is_array( $origOptSrc ) && count( $origOptSrc ) === 1 ) { |
1214 | $origOptSrc = $origOptSrc[0]; |
1215 | } |
1216 | |
1217 | $oText = TokenUtils::tokensToString( $origOptSrc, true, [ 'includeEntities' => true ] ); |
1218 | |
1219 | if ( !is_string( $oText ) ) { |
1220 | // Might be that this is a valid option whose value is just |
1221 | // complicated. Try to figure it out, step through all tokens. |
1222 | $maybeOText = self::stringifyOptionTokens( $oText, '', $env ); |
1223 | if ( $maybeOText !== null ) { |
1224 | $oText = $maybeOText; |
1225 | } |
1226 | } |
1227 | |
1228 | $optInfo = null; |
1229 | if ( is_string( $oText ) ) { |
1230 | if ( str_contains( $oText, '|' ) ) { |
1231 | // Split the pipe-separated string into pieces |
1232 | // and convert each one into a KV obj and add them |
1233 | // to the beginning of the array. Note that this is |
1234 | // a hack to support templates that provide multiple |
1235 | // image options as a pipe-separated string. We aren't |
1236 | // really providing editing support for this yet, or |
1237 | // ever, maybe. |
1238 | // |
1239 | // TODO(arlolra): Tables in captions suppress breaking on |
1240 | // "linkdesc" pipes so `stringifyOptionTokens` should account |
1241 | // for pipes in table cell content. For the moment, breaking |
1242 | // here is acceptable since it matches the php implementation |
1243 | // bug for bug. |
1244 | $pieces = array_map( static function ( $s ) { |
1245 | return new KV( 'mw:maybeContent', $s ); |
1246 | }, explode( '|', $oText ) ); |
1247 | $optKVs = array_merge( $pieces, $optKVs ); |
1248 | |
1249 | // Record the fact that we won't provide editing support for this. |
1250 | $dataParsoid->uneditable = true; |
1251 | continue; |
1252 | } else { |
1253 | // We're being overly accepting of media options at this point, |
1254 | // since we don't know the type yet. After the info request, |
1255 | // we'll filter out those that aren't appropriate. |
1256 | $optInfo = self::getOptionInfo( $oText, $env ); |
1257 | } |
1258 | } |
1259 | |
1260 | $recordCaption = static function () use ( $oContent, $oText, $dataParsoid, &$opts ) { |
1261 | $optsCaption = [ |
1262 | 'v' => $oContent->v, |
1263 | 'src' => $oContent->vsrc ?? $oText, |
1264 | 'srcOffsets' => $oContent->valueOffset(), |
1265 | // remember the position |
1266 | 'pos' => count( $dataParsoid->optList ) |
1267 | ]; |
1268 | // if there was a 'caption' previously, round-trip it as a |
1269 | // "bogus option". |
1270 | if ( !empty( $opts['caption'] ) ) { |
1271 | // Wrap the caption opt in an array since the option itself is an array! |
1272 | // Without the wrapping, the splicing will flatten the value. |
1273 | array_splice( $dataParsoid->optList, $opts['caption']['pos'], 0, [ [ |
1274 | 'ck' => 'bogus', |
1275 | 'ak' => $opts['caption']['src'] |
1276 | ] ] |
1277 | ); |
1278 | $optsCaption['pos']++; |
1279 | } |
1280 | $opts['caption'] = $optsCaption; |
1281 | }; |
1282 | |
1283 | // For the values of the caption and options, see |
1284 | // getOptionInfo's documentation above. |
1285 | // |
1286 | // If there are multiple captions, this code always |
1287 | // picks the last entry. This is the spec; see |
1288 | // "Image with multiple captions" parserTest. |
1289 | if ( !is_string( $oText ) || $optInfo === null || |
1290 | // Deprecated options |
1291 | in_array( $optInfo['ck'], [ 'disablecontrols' ], true ) |
1292 | ) { |
1293 | // No valid option found!? |
1294 | // Record for RT-ing |
1295 | $recordCaption(); |
1296 | continue; |
1297 | } |
1298 | |
1299 | // First option wins, the rest are 'bogus' |
1300 | // FIXME: For now, see T305628 |
1301 | if ( isset( $opts[$optInfo['ck']] ) || ( |
1302 | // All the formats are simple options with the key "format" |
1303 | // except for "manualthumb", so check if the format has been set |
1304 | in_array( $optInfo['ck'], [ 'format', 'manualthumb' ], true ) && ( |
1305 | self::getFormat( $opts ) || |
1306 | ( $this->options['extTagOpts']['suppressMediaFormats'] ?? false ) |
1307 | ) |
1308 | ) ) { |
1309 | $dataParsoid->optList[] = [ |
1310 | 'ck' => 'bogus', |
1311 | 'ak' => $optInfo['ak'] |
1312 | ]; |
1313 | continue; |
1314 | } |
1315 | |
1316 | $opt = [ |
1317 | 'ck' => $optInfo['v'], |
1318 | 'ak' => $oContent->vsrc ?? $optInfo['ak'] |
1319 | ]; |
1320 | |
1321 | if ( $optInfo['s'] === true ) { |
1322 | // Default: Simple image option |
1323 | $opts[$optInfo['ck']] = [ 'v' => $optInfo['v'] ]; |
1324 | } else { |
1325 | // Map short canonical name to the localized version used. |
1326 | $opt['ck'] = $optInfo['ck']; |
1327 | |
1328 | // The MediaWiki magic word for image dimensions is called 'width' |
1329 | // for historical reasons |
1330 | // Unlike other options, use last-specified width. |
1331 | if ( $optInfo['ck'] === 'width' ) { |
1332 | // We support a trailing 'px' here for historical reasons |
1333 | // (T15500, T53628, T207032) |
1334 | $maybeDim = Utils::parseMediaDimensions( $optInfo['v'] ); |
1335 | if ( $maybeDim !== null ) { |
1336 | if ( $maybeDim['bogusPx'] ) { |
1337 | // Lint away redundant unit (T207032) |
1338 | $dataParsoid->setTempFlag( TempData::BOGUS_PX ); |
1339 | } |
1340 | $opts['size']['v'] = [ |
1341 | 'width' => Utils::validateMediaParam( $maybeDim['x'] ) ? $maybeDim['x'] : null, |
1342 | 'height' => array_key_exists( 'y', $maybeDim ) && |
1343 | Utils::validateMediaParam( $maybeDim['y'] ) ? $maybeDim['y'] : null |
1344 | ]; |
1345 | // Only round-trip a valid size |
1346 | $opts['size']['src'] = $oContent->vsrc ?? $optInfo['ak']; |
1347 | // check for duplicated options |
1348 | foreach ( $dataParsoid->optList as &$value ) { |
1349 | if ( $value['ck'] === 'width' ) { |
1350 | $value['ck'] = 'bogus'; // mark the previous definition as bogus, last one wins |
1351 | break; |
1352 | } |
1353 | } |
1354 | } else { |
1355 | $recordCaption(); |
1356 | continue; |
1357 | } |
1358 | // Lang is a global attribute and can be applied to all media elements |
1359 | // for editing and roundtripping. However, not all file handlers will |
1360 | // make use of it. This param validation is from the SVG handler but |
1361 | // seems generally applicable. |
1362 | } elseif ( $optInfo['ck'] === 'lang' && !Language::isValidInternalCode( $optInfo['v'] ) ) { |
1363 | $opt['ck'] = 'bogus'; |
1364 | } elseif ( |
1365 | $optInfo['ck'] === 'upright' && |
1366 | ( !is_numeric( $optInfo['v'] ) || $optInfo['v'] <= 0 ) |
1367 | ) { |
1368 | $opt['ck'] = 'bogus'; |
1369 | } else { |
1370 | $opts[$optInfo['ck']] = [ |
1371 | 'v' => $optInfo['v'], |
1372 | 'src' => $oContent->vsrc ?? $optInfo['ak'], |
1373 | 'srcOffsets' => $oContent->valueOffset(), |
1374 | ]; |
1375 | } |
1376 | } |
1377 | |
1378 | // Collect option in dataParsoid (becomes data-parsoid later on) |
1379 | // for faithful serialization. |
1380 | $dataParsoid->optList[] = $opt; |
1381 | |
1382 | // Collect source wikitext for image options for possible template expansion. |
1383 | $maybeOpt = !isset( self::getUsed()[$opt['ck']] ); |
1384 | $expOpt = null; |
1385 | // Links more often than not show up as arrays here because they're |
1386 | // tokenized as `autourl`. To avoid unnecessarily considering them |
1387 | // expanded, we'll use a more restrictive test, at the cost of |
1388 | // perhaps missing some edgy behaviour. |
1389 | if ( $opt['ck'] === 'link' ) { |
1390 | $expOpt = is_array( $origOptSrc ) && |
1391 | $this->hasTransclusion( $origOptSrc ); |
1392 | } else { |
1393 | $expOpt = is_array( $origOptSrc ); |
1394 | } |
1395 | if ( $maybeOpt || $expOpt ) { |
1396 | $val = []; |
1397 | if ( $expOpt ) { |
1398 | $hasExpandableOpt = true; |
1399 | $val['html'] = $origOptSrc; |
1400 | $val['srcOffsets'] = $oContent->valueOffset(); |
1401 | $val = PipelineUtils::expandAttrValueToDOM( |
1402 | $env, $manager->getFrame(), $val, |
1403 | $this->options['expandTemplates'], |
1404 | $this->options['inTemplate'] |
1405 | ); |
1406 | } |
1407 | |
1408 | // This is a bit of an abuse of the "txt" property since |
1409 | // `optInfo.v` isn't necessarily wikitext from source. |
1410 | // It's a result of the specialized stringifying above, which |
1411 | // if interpreted as wikitext upon serialization will result |
1412 | // in some (acceptable) normalization. |
1413 | // |
1414 | // We're storing these options in data-mw because they aren't |
1415 | // guaranteed to apply to all media types and we'd like to |
1416 | // avoid the need to back them out later. |
1417 | // |
1418 | // Note that the caption in the legacy parser depends on the |
1419 | // exact set of options parsed, which we aren't attempting to |
1420 | // try and replicate after fetching the media info, since we |
1421 | // consider that more of bug than a feature. It prevent anyone |
1422 | // from ever safely adding media options in the future. |
1423 | // |
1424 | // See T163582 |
1425 | if ( $maybeOpt ) { |
1426 | $val['txt'] = $optInfo['v']; |
1427 | } |
1428 | $dataMw->attribs ??= []; |
1429 | $dataMw->attribs[] = [ $opt['ck'], $val ]; |
1430 | } |
1431 | } |
1432 | |
1433 | // Add the last caption in the right position if there is one |
1434 | if ( isset( $opts['caption'] ) ) { |
1435 | // Wrap the caption opt in an array since the option itself is an array! |
1436 | // Without the wrapping, the splicing will flatten the value. |
1437 | array_splice( $dataParsoid->optList, $opts['caption']['pos'], 0, [ [ |
1438 | 'ck' => 'caption', |
1439 | 'ak' => $opts['caption']['src'] |
1440 | ] ] |
1441 | ); |
1442 | } |
1443 | |
1444 | $format = self::getFormat( $opts ); |
1445 | |
1446 | // Handle image default sizes and upright option after extracting all |
1447 | // options |
1448 | if ( $format === 'framed' || $format === 'manualthumb' ) { |
1449 | // width and height is ignored for framed and manualthumb images |
1450 | // https://phabricator.wikimedia.org/T64258 |
1451 | $opts['size']['v'] = [ 'width' => null, 'height' => null ]; |
1452 | // Mark any definitions as bogus |
1453 | foreach ( $dataParsoid->optList as &$value ) { |
1454 | if ( $value['ck'] === 'width' ) { |
1455 | $value['ck'] = 'bogus'; |
1456 | } |
1457 | } |
1458 | } elseif ( $format ) { |
1459 | if ( !$opts['size']['v']['height'] && !$opts['size']['v']['width'] ) { |
1460 | $defaultWidth = $env->getSiteConfig()->widthOption(); |
1461 | if ( isset( $opts['upright'] ) ) { |
1462 | if ( $opts['upright']['v'] === 'upright' ) { // Simple option |
1463 | $defaultWidth *= 0.75; |
1464 | } else { |
1465 | $defaultWidth *= $opts['upright']['v']; |
1466 | } |
1467 | // round to nearest 10 pixels |
1468 | $defaultWidth = 10 * round( $defaultWidth / 10 ); |
1469 | } |
1470 | $opts['size']['v']['width'] = $defaultWidth; |
1471 | } |
1472 | } |
1473 | |
1474 | $rdfaType = 'mw:File'; |
1475 | |
1476 | // If the format is something we *recognize*, add the subtype |
1477 | switch ( $format ) { |
1478 | case 'manualthumb': // FIXME(T305759): Does it deserve its own type? |
1479 | case 'thumbnail': |
1480 | $rdfaType .= '/Thumb'; |
1481 | break; |
1482 | case 'framed': |
1483 | $rdfaType .= '/Frame'; |
1484 | break; |
1485 | case 'frameless': |
1486 | $rdfaType .= '/Frameless'; |
1487 | break; |
1488 | } |
1489 | |
1490 | // Tell VE that it shouldn't try to edit this |
1491 | if ( !empty( $dataParsoid->uneditable ) ) { |
1492 | $rdfaType .= ' mw:Placeholder'; |
1493 | } else { |
1494 | unset( $dataParsoid->src ); |
1495 | } |
1496 | |
1497 | $wrapperInfo = self::getWrapperInfo( $opts ); |
1498 | |
1499 | $isInline = $wrapperInfo['isInline']; |
1500 | $containerName = $isInline ? 'span' : 'figure'; |
1501 | |
1502 | $classes = $wrapperInfo['classes']; |
1503 | if ( !empty( $opts['class'] ) ) { |
1504 | PHPUtils::pushArray( $classes, explode( ' ', $opts['class']['v'] ) ); |
1505 | } |
1506 | |
1507 | $attribs = [ new KV( 'typeof', $rdfaType ) ]; |
1508 | if ( count( $classes ) > 0 ) { |
1509 | array_unshift( $attribs, new KV( 'class', implode( ' ', $classes ) ) ); |
1510 | } |
1511 | |
1512 | $container = new TagTk( $containerName, $attribs, $dataParsoid ); |
1513 | $containerClose = new EndTagTk( $containerName ); |
1514 | |
1515 | if ( $hasExpandableOpt ) { |
1516 | $container->addAttribute( 'about', $env->newAboutId() ); |
1517 | $container->addSpaceSeparatedAttribute( 'typeof', 'mw:ExpandedAttrs' ); |
1518 | } elseif ( preg_match( '/\bmw:ExpandedAttrs\b/', $token->getAttributeV( 'typeof' ) ?? '' ) ) { |
1519 | $container->addSpaceSeparatedAttribute( 'typeof', 'mw:ExpandedAttrs' ); |
1520 | } |
1521 | |
1522 | $span = new TagTk( 'span', [ new KV( 'class', 'mw-file-element mw-broken-media' ) ] ); |
1523 | |
1524 | // "resource" and "lang" are allowed attributes on spans |
1525 | $span->addNormalizedAttribute( 'resource', $opts['title']['v'], $opts['title']['src'] ); |
1526 | if ( isset( $opts['lang'] ) ) { |
1527 | $span->addNormalizedAttribute( 'lang', $opts['lang']['v'], $opts['lang']['src'] ); |
1528 | } |
1529 | |
1530 | // Token's KV attributes only accept strings, Tokens or arrays of those. |
1531 | $size = $opts['size']['v']; |
1532 | if ( !empty( $size['width'] ) ) { |
1533 | $span->addAttribute( 'data-width', (string)$size['width'] ); |
1534 | } |
1535 | if ( !empty( $size['height'] ) ) { |
1536 | $span->addAttribute( 'data-height', (string)$size['height'] ); |
1537 | } |
1538 | |
1539 | $anchor = new TagTk( 'a' ); |
1540 | $anchor->setAttribute( 'href', $this->specialFilePath( $target->title ) ); |
1541 | |
1542 | $tokens = [ |
1543 | $container, |
1544 | $anchor, |
1545 | $span, |
1546 | // FIXME: The php parser seems to put the link text here instead. |
1547 | // The title can go on the `anchor` as the "title" attribute. |
1548 | $target->title->getPrefixedText(), |
1549 | new EndTagTk( 'span' ), |
1550 | new EndTagTk( 'a' ) |
1551 | ]; |
1552 | |
1553 | $optsCaption = $opts['caption'] ?? null; |
1554 | if ( $isInline ) { |
1555 | if ( $optsCaption ) { |
1556 | if ( !is_array( $optsCaption['v'] ) ) { |
1557 | $opts['caption']['v'] = $optsCaption['v'] = [ $optsCaption['v'] ]; |
1558 | } |
1559 | // Parse the caption |
1560 | $captionDOM = PipelineUtils::processContentInPipeline( |
1561 | $this->env, |
1562 | $this->manager->getFrame(), |
1563 | array_merge( $optsCaption['v'], [ new EOFTk() ] ), |
1564 | [ |
1565 | 'pipelineType' => 'tokens/x-mediawiki/expanded', |
1566 | 'pipelineOpts' => [ |
1567 | 'inlineContext' => true, |
1568 | 'expandTemplates' => $this->options['expandTemplates'], |
1569 | 'inTemplate' => $this->options['inTemplate'] |
1570 | ], |
1571 | 'srcOffsets' => $optsCaption['srcOffsets'] ?? null, |
1572 | 'sol' => true |
1573 | ] |
1574 | ); |
1575 | |
1576 | // Use parsed DOM given in `captionDOM` |
1577 | // FIXME: Does this belong in `dataMw.attribs`? |
1578 | $dataMw->caption = ContentUtils::ppToXML( |
1579 | $captionDOM, [ 'innerXML' => true ] |
1580 | ); |
1581 | } |
1582 | } else { |
1583 | // We always add a figcaption for blocks |
1584 | $tsr = $optsCaption['srcOffsets'] ?? null; |
1585 | $dp = new DataParsoid; |
1586 | $dp->tsr = $tsr; |
1587 | $tokens[] = new TagTk( 'figcaption', [], $dp ); |
1588 | if ( $optsCaption ) { |
1589 | if ( is_string( $optsCaption['v'] ) ) { |
1590 | $tokens[] = $optsCaption['v']; |
1591 | } else { |
1592 | $tokens[] = PipelineUtils::getDOMFragmentToken( |
1593 | $optsCaption['v'], |
1594 | $tsr, |
1595 | [ 'inlineContext' => true, 'token' => $token ] |
1596 | ); |
1597 | } |
1598 | } |
1599 | $tokens[] = new EndTagTk( 'figcaption' ); |
1600 | } |
1601 | |
1602 | if ( (array)$dataMw !== [] ) { |
1603 | $container->addAttribute( 'data-mw', PHPUtils::jsonEncode( $dataMw ) ); |
1604 | } |
1605 | |
1606 | $tokens[] = $containerClose; |
1607 | return new TokenHandlerResult( $tokens ); |
1608 | } |
1609 | |
1610 | private function specialFilePath( Title $title ): string { |
1611 | $filePath = Sanitizer::sanitizeTitleURI( $title->getKey(), false ); |
1612 | return "./Special:FilePath/{$filePath}"; |
1613 | } |
1614 | |
1615 | private function linkToMedia( Token $token, stdClass $target, array $errs, ?array $info ): TokenHandlerResult { |
1616 | // Only pass in the url, since media links should not link to the thumburl |
1617 | $imgHref = $info['url'] ?? $this->specialFilePath( $target->title ); // Copied from getPath |
1618 | $imgHrefFileName = preg_replace( '#.*/#', '', $imgHref, 1 ); |
1619 | |
1620 | $link = new TagTk( 'a' ); |
1621 | |
1622 | try { |
1623 | $content = $this->addLinkAttributesAndGetContent( $link, $token, $target ); |
1624 | } catch ( InternalException $e ) { |
1625 | return new TokenHandlerResult( self::bailTokens( $this->manager, $token ) ); |
1626 | } |
1627 | |
1628 | // Change the rel to be mw:MediaLink |
1629 | $link->getAttributeKV( 'rel' )->v = 'mw:MediaLink'; |
1630 | |
1631 | $link->setAttribute( 'href', $imgHref ); |
1632 | |
1633 | // html2wt will use the resource rather than try to parse the href. |
1634 | $link->addNormalizedAttribute( |
1635 | 'resource', |
1636 | $this->env->makeLink( $target->title ), |
1637 | $target->hrefSrc |
1638 | ); |
1639 | |
1640 | // Normalize title according to how PHP parser does it currently |
1641 | $link->setAttribute( 'title', str_replace( '_', ' ', $imgHrefFileName ) ); |
1642 | |
1643 | if ( count( $errs ) > 0 ) { |
1644 | // Set RDFa type to mw:Error so VE and other clients |
1645 | // can use this to do client-specific action on these. |
1646 | if ( !TokenUtils::hasTypeOf( $link, 'mw:Error' ) ) { |
1647 | $link->addSpaceSeparatedAttribute( 'typeof', 'mw:Error' ); |
1648 | } |
1649 | |
1650 | // Update data-mw |
1651 | $dataMwAttr = $token->getAttributeV( 'data-mw' ); |
1652 | $dataMw = $dataMwAttr ? PHPUtils::jsonDecode( $dataMwAttr, false ) : new stdClass; |
1653 | if ( is_array( $dataMw->errors ?? null ) ) { |
1654 | PHPUtils::pushArray( $dataMw->errors, $errs ); |
1655 | } else { |
1656 | $dataMw->errors = $errs; |
1657 | } |
1658 | $link->setAttribute( 'data-mw', PHPUtils::jsonEncode( $dataMw ) ); |
1659 | } |
1660 | |
1661 | $tokens = array_merge( [ $link ], $content, [ new EndTagTk( 'a' ) ] ); |
1662 | |
1663 | return new TokenHandlerResult( $tokens ); |
1664 | } |
1665 | |
1666 | // FIXME: The media request here is only used to determine if this is a |
1667 | // redlink and deserves to be handling in the redlink post-processing pass. |
1668 | |
1669 | /** |
1670 | * @param Token $token |
1671 | * @param stdClass $target |
1672 | * @return TokenHandlerResult |
1673 | */ |
1674 | private function renderMedia( Token $token, stdClass $target ): TokenHandlerResult { |
1675 | $env = $this->env; |
1676 | $title = $target->title; |
1677 | $errs = []; |
1678 | $info = $env->getDataAccess()->getFileInfo( |
1679 | $env->getPageConfig(), |
1680 | [ [ $title->getKey(), [ 'height' => null, 'width' => null ] ] ] |
1681 | )[0]; |
1682 | if ( !$info ) { |
1683 | $errs[] = [ 'key' => 'apierror-filedoesnotexist', 'message' => 'This image does not exist.' ]; |
1684 | } elseif ( isset( $info['thumberror'] ) ) { |
1685 | $errs[] = [ 'key' => 'apierror-unknownerror', 'message' => $info['thumberror'] ]; |
1686 | } |
1687 | return $this->linkToMedia( $token, $target, $errs, $info ); |
1688 | } |
1689 | |
1690 | /** @inheritDoc */ |
1691 | public function onTag( Token $token ): ?TokenHandlerResult { |
1692 | switch ( $token->getName() ) { |
1693 | case 'wikilink': |
1694 | return $this->onWikiLink( $token ); |
1695 | case 'mw:redirect': |
1696 | return $this->onRedirect( $token ); |
1697 | default: |
1698 | return null; |
1699 | } |
1700 | } |
1701 | } |