Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 139 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
ExternalLinkHandler | |
0.00% |
0 / 139 |
|
0.00% |
0 / 7 |
1892 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
imageExtensions | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
90 | |||
arraySome | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
hasImageLink | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
30 | |||
onUrlLink | |
0.00% |
0 / 40 |
|
0.00% |
0 / 1 |
20 | |||
onExtLink | |
0.00% |
0 / 68 |
|
0.00% |
0 / 1 |
272 | |||
onTag | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
5 | |
6 | use Wikimedia\Parsoid\Core\Sanitizer; |
7 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
8 | use Wikimedia\Parsoid\Tokens\EndTagTk; |
9 | use Wikimedia\Parsoid\Tokens\KV; |
10 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
11 | use Wikimedia\Parsoid\Tokens\TagTk; |
12 | use Wikimedia\Parsoid\Tokens\Token; |
13 | use Wikimedia\Parsoid\Tokens\XMLTagTk; |
14 | use Wikimedia\Parsoid\Utils\PipelineUtils; |
15 | use Wikimedia\Parsoid\Utils\TokenUtils; |
16 | use Wikimedia\Parsoid\Wt2Html\PegTokenizer; |
17 | |
18 | class ExternalLinkHandler extends TokenHandler { |
19 | /** @var PegTokenizer */ |
20 | private $urlParser; |
21 | |
22 | /** @inheritDoc */ |
23 | public function __construct( object $manager, array $options ) { |
24 | parent::__construct( $manager, $options ); |
25 | |
26 | // Create a new peg parser for image options. |
27 | if ( !$this->urlParser ) { |
28 | // Actually the regular tokenizer, but we'll call it with the |
29 | // url rule only. |
30 | $this->urlParser = new PegTokenizer( $this->env ); |
31 | } |
32 | } |
33 | |
34 | private static function imageExtensions( string $str ): bool { |
35 | switch ( $str ) { |
36 | case 'avif': // fall through |
37 | case 'gif': // fall through |
38 | case 'jpeg': // fall through |
39 | case 'jpg': // fall through |
40 | case 'png': // fall through |
41 | case 'svg': |
42 | case 'webp': |
43 | return true; |
44 | default: |
45 | return false; |
46 | } |
47 | } |
48 | |
49 | private function arraySome( array $array, callable $fn ): bool { |
50 | foreach ( $array as $value ) { |
51 | if ( $fn( $value ) ) { |
52 | return true; |
53 | } |
54 | } |
55 | return false; |
56 | } |
57 | |
58 | private function hasImageLink( string $href ): bool { |
59 | $allowedPrefixes = $this->env->getSiteConfig()->allowedExternalImagePrefixes(); |
60 | $bits = explode( '.', $href ); |
61 | $hasImageExtension = count( $bits ) > 1 && |
62 | self::imageExtensions( end( $bits ) ) && |
63 | preg_match( '#^https?://#i', $href ); |
64 | // Typical settings for mediawiki configuration variables |
65 | // $wgAllowExternalImages and $wgAllowExternalImagesFrom will |
66 | // result in values like these: |
67 | // allowedPrefixes = undefined; // no external images |
68 | // allowedPrefixes = ['']; // allow all external images |
69 | // allowedPrefixes = ['http://127.0.0.1/', 'http://example.com']; |
70 | // Note that the values include the http:// or https:// protocol. |
71 | // See https://phabricator.wikimedia.org/T53092 |
72 | return $hasImageExtension && |
73 | // true if some prefix in the list matches href |
74 | self::arraySome( $allowedPrefixes, static function ( string $prefix ) use ( &$href ) { |
75 | return $prefix === "" || strpos( $href, $prefix ) === 0; |
76 | } ); |
77 | } |
78 | |
79 | /** |
80 | * @return ?array<string|Token> |
81 | */ |
82 | private function onUrlLink( Token $token ): ?array { |
83 | $tagAttrs = null; |
84 | $builtTag = null; |
85 | $env = $this->env; |
86 | $origHref = $token->getAttributeV( 'href' ); |
87 | $href = TokenUtils::tokensToString( $origHref ); |
88 | $dataParsoid = clone $token->dataParsoid; |
89 | $dataMw = $token->dataMw ? clone $token->dataMw : null; |
90 | |
91 | if ( $this->hasImageLink( $href ) ) { |
92 | $checkAlt = explode( '/', $href ); |
93 | $tagAttrs = [ |
94 | new KV( 'src', $href ), |
95 | new KV( 'alt', end( $checkAlt ) ), |
96 | new KV( 'rel', 'mw:externalImage' ) |
97 | ]; |
98 | |
99 | // combine with existing rdfa attrs |
100 | $tagAttrs = WikiLinkHandler::buildLinkAttrs( |
101 | $token->attribs, false, null, $tagAttrs )['attribs']; |
102 | return [ new SelfclosingTagTk( 'img', $tagAttrs, $dataParsoid, $dataMw ) ]; |
103 | } else { |
104 | $tagAttrs = [ |
105 | new KV( 'rel', 'mw:ExtLink' ) |
106 | ]; |
107 | |
108 | // combine with existing rdfa attrs |
109 | // href is set explicitly below |
110 | $tagAttrs = WikiLinkHandler::buildLinkAttrs( |
111 | $token->attribs, false, null, $tagAttrs )['attribs']; |
112 | $builtTag = new TagTk( 'a', $tagAttrs, $dataParsoid, $dataMw ); |
113 | $dataParsoid->stx = 'url'; |
114 | |
115 | if ( !$this->options['inTemplate'] ) { |
116 | // Since we messed with the text of the link, we need |
117 | // to preserve the original in the RT data. Or else. |
118 | $builtTag->addNormalizedAttribute( |
119 | 'href', $href, $token->getWTSource( $this->manager->getFrame() ) |
120 | ); |
121 | } else { |
122 | $builtTag->addAttribute( 'href', $href ); |
123 | } |
124 | |
125 | $dp = new DataParsoid; |
126 | $dp->tsr = $dataParsoid->tsr->expandTsrK()->value; |
127 | return [ |
128 | $builtTag, |
129 | // Make sure there are no IDN-ignored characters in the text so |
130 | // the user doesn't accidentally copy any. |
131 | Sanitizer::cleanUrl( $env->getSiteConfig(), $href, '' ), // mode could be 'wikilink' |
132 | new EndTagTk( |
133 | 'a', |
134 | [], |
135 | $dp |
136 | ) |
137 | ]; |
138 | } |
139 | } |
140 | |
141 | /** |
142 | * Bracketed external link |
143 | * @param Token $token |
144 | * @return ?array<string|Token> |
145 | */ |
146 | private function onExtLink( Token $token ): ?array { |
147 | $newAttrs = null; |
148 | $aStart = null; |
149 | $env = $this->env; |
150 | $origHref = $token->getAttributeV( 'href' ); |
151 | $hasExpandedAttrs = TokenUtils::hasTypeOf( $token, 'mw:ExpandedAttrs' ); |
152 | $href = TokenUtils::tokensToString( $origHref ); |
153 | $hrefWithEntities = TokenUtils::tokensToString( $origHref, false, [ |
154 | 'includeEntities' => true |
155 | ] |
156 | ); |
157 | $content = $token->getAttributeV( 'mw:content' ); |
158 | $dataParsoid = clone $token->dataParsoid; |
159 | $dataMw = $token->dataMw ? clone $token->dataMw : null; |
160 | $magLinkType = TokenUtils::matchTypeOf( |
161 | $token, '#^mw:(Ext|Wiki)Link/(ISBN|RFC|PMID)$#' |
162 | ); |
163 | $tokens = null; |
164 | |
165 | if ( $magLinkType ) { |
166 | $newHref = $href; |
167 | $newRel = 'mw:ExtLink'; |
168 | if ( str_ends_with( $magLinkType, '/ISBN' ) ) { |
169 | $newHref = $env->getSiteConfig()->relativeLinkPrefix() . $href; |
170 | // ISBNs use mw:WikiLink instead of mw:ExtLink |
171 | $newRel = 'mw:WikiLink'; |
172 | } |
173 | $newAttrs = [ |
174 | new KV( 'href', $newHref ), |
175 | new KV( 'rel', $newRel ) |
176 | ]; |
177 | $token->removeAttribute( 'typeof' ); |
178 | |
179 | // SSS FIXME: Right now, Parsoid does not support templating |
180 | // of ISBN attributes. So, "ISBN {{1x|1234567890}}" will not |
181 | // parse as you might expect it to. As a result, this code below |
182 | // that attempts to combine rdf attrs from earlier is unnecessary |
183 | // right now. But, it will become necessary if Parsoid starts |
184 | // supporting templating of ISBN attributes. |
185 | // |
186 | // combine with existing rdfa attrs |
187 | $newAttrs = WikiLinkHandler::buildLinkAttrs( |
188 | $token->attribs, false, null, $newAttrs )['attribs']; |
189 | $aStart = new TagTk( 'a', $newAttrs, $dataParsoid, $dataMw ); |
190 | $tokens = array_merge( [ $aStart ], |
191 | is_array( $content ) ? $content : [ $content ], [ new EndTagTk( 'a' ) ] ); |
192 | return $tokens; |
193 | } elseif ( ( !$hasExpandedAttrs && is_string( $origHref ) ) || |
194 | $this->urlParser->tokenizeURL( $hrefWithEntities ) !== false |
195 | ) { |
196 | if ( is_array( $content ) && count( $content ) === 1 && is_string( $content[0] ) ) { |
197 | $src = $content[0]; |
198 | if ( $env->getSiteConfig()->hasValidProtocol( $src ) && |
199 | $this->urlParser->tokenizeURL( $src ) !== false && |
200 | $this->hasImageLink( $src ) |
201 | ) { |
202 | $checkAlt = explode( '/', $src ); |
203 | $dp = new DataParsoid; |
204 | $dp->type = 'extlink'; |
205 | $content = [ new SelfclosingTagTk( 'img', [ |
206 | new KV( 'src', $src ), |
207 | new KV( 'alt', end( $checkAlt ) ) |
208 | ], $dp |
209 | ) ]; |
210 | } |
211 | } |
212 | |
213 | $newAttrs = [ new KV( 'rel', 'mw:ExtLink' ) ]; |
214 | // combine with existing rdfa attrs |
215 | // href is set explicitly below |
216 | $newAttrs = WikiLinkHandler::buildLinkAttrs( |
217 | $token->attribs, false, null, $newAttrs )['attribs']; |
218 | $aStart = new TagTk( 'a', $newAttrs, $dataParsoid, $dataMw ); |
219 | |
220 | if ( !$this->options['inTemplate'] ) { |
221 | // If we are from a top-level page, add normalized attr info for |
222 | // accurate roundtripping of original content. |
223 | // |
224 | // extLinkContentOffsets->start covers all spaces before content |
225 | // and we need src without those spaces. |
226 | $tsr0a = $dataParsoid->tsr->start + 1; |
227 | $tsr1a = $dataParsoid->tmp->extLinkContentOffsets->start - |
228 | strlen( $token->getAttributeV( 'spaces' ) ?? '' ); |
229 | $length = $tsr1a - $tsr0a; |
230 | $aStart->addNormalizedAttribute( 'href', $href, |
231 | substr( $this->manager->getFrame()->getSrcText(), $tsr0a, $length ) ); |
232 | } else { |
233 | $aStart->addAttribute( 'href', $href ); |
234 | } |
235 | |
236 | $content = PipelineUtils::getDOMFragmentToken( |
237 | $content, |
238 | $dataParsoid->tsr ? $dataParsoid->tmp->extLinkContentOffsets : null, |
239 | [ 'inlineContext' => true, 'token' => $token ] |
240 | ); |
241 | |
242 | return [ $aStart, $content, new EndTagTk( 'a' ) ]; |
243 | } else { |
244 | // Not a link, convert href to plain text. |
245 | return WikiLinkHandler::bailTokens( $this->manager, $token ); |
246 | } |
247 | } |
248 | |
249 | /** @inheritDoc */ |
250 | public function onTag( XMLTagTk $token ): ?array { |
251 | switch ( $token->getName() ) { |
252 | case 'urllink': |
253 | return $this->onUrlLink( $token ); |
254 | case 'extlink': |
255 | return $this->onExtLink( $token ); |
256 | default: |
257 | return null; |
258 | } |
259 | } |
260 | } |