Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 137 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
ExternalLinkHandler | |
0.00% |
0 / 137 |
|
0.00% |
0 / 7 |
1482 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
imageExtensions | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
42 | |||
arraySome | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
hasImageLink | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
30 | |||
onUrlLink | |
0.00% |
0 / 41 |
|
0.00% |
0 / 1 |
12 | |||
onExtLink | |
0.00% |
0 / 68 |
|
0.00% |
0 / 1 |
240 | |||
onTag | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
5 | |
6 | use Wikimedia\Parsoid\Core\Sanitizer; |
7 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
8 | use Wikimedia\Parsoid\Tokens\EndTagTk; |
9 | use Wikimedia\Parsoid\Tokens\KV; |
10 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
11 | use Wikimedia\Parsoid\Tokens\TagTk; |
12 | use Wikimedia\Parsoid\Tokens\Token; |
13 | use Wikimedia\Parsoid\Utils\PipelineUtils; |
14 | use Wikimedia\Parsoid\Utils\TokenUtils; |
15 | use Wikimedia\Parsoid\Wt2Html\PegTokenizer; |
16 | |
17 | class ExternalLinkHandler extends TokenHandler { |
18 | /** @var PegTokenizer */ |
19 | private $urlParser; |
20 | |
21 | /** @inheritDoc */ |
22 | public function __construct( object $manager, array $options ) { |
23 | parent::__construct( $manager, $options ); |
24 | |
25 | // Create a new peg parser for image options. |
26 | if ( !$this->urlParser ) { |
27 | // Actually the regular tokenizer, but we'll call it with the |
28 | // url rule only. |
29 | $this->urlParser = new PegTokenizer( $this->env ); |
30 | } |
31 | } |
32 | |
33 | private static function imageExtensions( string $str ): bool { |
34 | switch ( $str ) { |
35 | case 'jpg': // fall through |
36 | case 'png': // fall through |
37 | case 'gif': // fall through |
38 | case 'svg': |
39 | return true; |
40 | default: |
41 | return false; |
42 | } |
43 | } |
44 | |
45 | private function arraySome( array $array, callable $fn ): bool { |
46 | foreach ( $array as $value ) { |
47 | if ( $fn( $value ) ) { |
48 | return true; |
49 | } |
50 | } |
51 | return false; |
52 | } |
53 | |
54 | private function hasImageLink( string $href ): bool { |
55 | $allowedPrefixes = $this->env->getSiteConfig()->allowedExternalImagePrefixes(); |
56 | $bits = explode( '.', $href ); |
57 | $hasImageExtension = count( $bits ) > 1 && |
58 | self::imageExtensions( end( $bits ) ) && |
59 | preg_match( '#^https?://#i', $href ); |
60 | // Typical settings for mediawiki configuration variables |
61 | // $wgAllowExternalImages and $wgAllowExternalImagesFrom will |
62 | // result in values like these: |
63 | // allowedPrefixes = undefined; // no external images |
64 | // allowedPrefixes = ['']; // allow all external images |
65 | // allowedPrefixes = ['http://127.0.0.1/', 'http://example.com']; |
66 | // Note that the values include the http:// or https:// protocol. |
67 | // See https://phabricator.wikimedia.org/T53092 |
68 | return $hasImageExtension && |
69 | // true if some prefix in the list matches href |
70 | self::arraySome( $allowedPrefixes, static function ( string $prefix ) use ( &$href ) { |
71 | return $prefix === "" || strpos( $href, $prefix ) === 0; |
72 | } ); |
73 | } |
74 | |
75 | private function onUrlLink( Token $token ): ?TokenHandlerResult { |
76 | $tagAttrs = null; |
77 | $builtTag = null; |
78 | $env = $this->env; |
79 | $origHref = $token->getAttributeV( 'href' ); |
80 | $href = TokenUtils::tokensToString( $origHref ); |
81 | $dataParsoid = $token->dataParsoid->clone(); |
82 | |
83 | if ( $this->hasImageLink( $href ) ) { |
84 | $checkAlt = explode( '/', $href ); |
85 | $tagAttrs = [ |
86 | new KV( 'src', $href ), |
87 | new KV( 'alt', end( $checkAlt ) ), |
88 | new KV( 'rel', 'mw:externalImage' ) |
89 | ]; |
90 | |
91 | // combine with existing rdfa attrs |
92 | $tagAttrs = WikiLinkHandler::buildLinkAttrs( |
93 | $token->attribs, false, null, $tagAttrs )['attribs']; |
94 | return new TokenHandlerResult( |
95 | [ new SelfclosingTagTk( 'img', $tagAttrs, $dataParsoid ) ] ); |
96 | } else { |
97 | $tagAttrs = [ |
98 | new KV( 'rel', 'mw:ExtLink' ) |
99 | ]; |
100 | |
101 | // combine with existing rdfa attrs |
102 | // href is set explicitly below |
103 | $tagAttrs = WikiLinkHandler::buildLinkAttrs( |
104 | $token->attribs, false, null, $tagAttrs )['attribs']; |
105 | $builtTag = new TagTk( 'a', $tagAttrs, $dataParsoid ); |
106 | $dataParsoid->stx = 'url'; |
107 | |
108 | if ( !$this->options['inTemplate'] ) { |
109 | // Since we messed with the text of the link, we need |
110 | // to preserve the original in the RT data. Or else. |
111 | $builtTag->addNormalizedAttribute( |
112 | 'href', $href, $token->getWTSource( $this->manager->getFrame() ) |
113 | ); |
114 | } else { |
115 | $builtTag->addAttribute( 'href', $href ); |
116 | } |
117 | |
118 | $dp = new DataParsoid; |
119 | $dp->tsr = $dataParsoid->tsr->expandTsrK()->value; |
120 | return new TokenHandlerResult( [ |
121 | $builtTag, |
122 | // Make sure there are no IDN-ignored characters in the text so |
123 | // the user doesn't accidentally copy any. |
124 | Sanitizer::cleanUrl( $env->getSiteConfig(), $href, '' ), // mode could be 'wikilink' |
125 | new EndTagTk( |
126 | 'a', |
127 | [], |
128 | $dp |
129 | ) |
130 | ] |
131 | ); |
132 | } |
133 | } |
134 | |
135 | /** |
136 | * Bracketed external link |
137 | * @param Token $token |
138 | * @return TokenHandlerResult|null |
139 | */ |
140 | private function onExtLink( Token $token ): ?TokenHandlerResult { |
141 | $newAttrs = null; |
142 | $aStart = null; |
143 | $env = $this->env; |
144 | $origHref = $token->getAttributeV( 'href' ); |
145 | $hasExpandedAttrs = TokenUtils::hasTypeOf( $token, 'mw:ExpandedAttrs' ); |
146 | $href = TokenUtils::tokensToString( $origHref ); |
147 | $hrefWithEntities = TokenUtils::tokensToString( $origHref, false, [ |
148 | 'includeEntities' => true |
149 | ] |
150 | ); |
151 | $content = $token->getAttributeV( 'mw:content' ); |
152 | $dataParsoid = $token->dataParsoid->clone(); |
153 | $magLinkType = TokenUtils::matchTypeOf( |
154 | $token, '#^mw:(Ext|Wiki)Link/(ISBN|RFC|PMID)$#' |
155 | ); |
156 | $tokens = null; |
157 | |
158 | if ( $magLinkType ) { |
159 | $newHref = $href; |
160 | $newRel = 'mw:ExtLink'; |
161 | if ( str_ends_with( $magLinkType, '/ISBN' ) ) { |
162 | $newHref = $env->getSiteConfig()->relativeLinkPrefix() . $href; |
163 | // ISBNs use mw:WikiLink instead of mw:ExtLink |
164 | $newRel = 'mw:WikiLink'; |
165 | } |
166 | $newAttrs = [ |
167 | new KV( 'href', $newHref ), |
168 | new KV( 'rel', $newRel ) |
169 | ]; |
170 | $token->removeAttribute( 'typeof' ); |
171 | |
172 | // SSS FIXME: Right now, Parsoid does not support templating |
173 | // of ISBN attributes. So, "ISBN {{1x|1234567890}}" will not |
174 | // parse as you might expect it to. As a result, this code below |
175 | // that attempts to combine rdf attrs from earlier is unnecessary |
176 | // right now. But, it will become necessary if Parsoid starts |
177 | // supporting templating of ISBN attributes. |
178 | // |
179 | // combine with existing rdfa attrs |
180 | $newAttrs = WikiLinkHandler::buildLinkAttrs( |
181 | $token->attribs, false, null, $newAttrs )['attribs']; |
182 | $aStart = new TagTk( 'a', $newAttrs, $dataParsoid ); |
183 | $tokens = array_merge( [ $aStart ], |
184 | is_array( $content ) ? $content : [ $content ], [ new EndTagTk( 'a' ) ] ); |
185 | return new TokenHandlerResult( $tokens ); |
186 | } elseif ( ( !$hasExpandedAttrs && is_string( $origHref ) ) || |
187 | $this->urlParser->tokenizeURL( $hrefWithEntities ) !== false |
188 | ) { |
189 | if ( is_array( $content ) && count( $content ) === 1 && is_string( $content[0] ) ) { |
190 | $src = $content[0]; |
191 | if ( $env->getSiteConfig()->hasValidProtocol( $src ) && |
192 | $this->urlParser->tokenizeURL( $src ) !== false && |
193 | $this->hasImageLink( $src ) |
194 | ) { |
195 | $checkAlt = explode( '/', $src ); |
196 | $dp = new DataParsoid; |
197 | $dp->type = 'extlink'; |
198 | $content = [ new SelfclosingTagTk( 'img', [ |
199 | new KV( 'src', $src ), |
200 | new KV( 'alt', end( $checkAlt ) ) |
201 | ], $dp |
202 | ) ]; |
203 | } |
204 | } |
205 | |
206 | $newAttrs = [ new KV( 'rel', 'mw:ExtLink' ) ]; |
207 | // combine with existing rdfa attrs |
208 | // href is set explicitly below |
209 | $newAttrs = WikiLinkHandler::buildLinkAttrs( |
210 | $token->attribs, false, null, $newAttrs )['attribs']; |
211 | $aStart = new TagTk( 'a', $newAttrs, $dataParsoid ); |
212 | |
213 | if ( !$this->options['inTemplate'] ) { |
214 | // If we are from a top-level page, add normalized attr info for |
215 | // accurate roundtripping of original content. |
216 | // |
217 | // extLinkContentOffsets->start covers all spaces before content |
218 | // and we need src without those spaces. |
219 | $tsr0a = $dataParsoid->tsr->start + 1; |
220 | $tsr1a = $dataParsoid->extLinkContentOffsets->start - |
221 | strlen( $token->getAttributeV( 'spaces' ) ?? '' ); |
222 | $length = $tsr1a - $tsr0a; |
223 | $aStart->addNormalizedAttribute( 'href', $href, |
224 | substr( $this->manager->getFrame()->getSrcText(), $tsr0a, $length ) ); |
225 | } else { |
226 | $aStart->addAttribute( 'href', $href ); |
227 | } |
228 | |
229 | $content = PipelineUtils::getDOMFragmentToken( |
230 | $content, |
231 | $dataParsoid->tsr ? $dataParsoid->extLinkContentOffsets : null, |
232 | [ 'inlineContext' => true, 'token' => $token ] |
233 | ); |
234 | |
235 | $tokens = [ $aStart, $content, new EndTagTk( 'a' ) ]; |
236 | return new TokenHandlerResult( $tokens ); |
237 | } else { |
238 | // Not a link, convert href to plain text. |
239 | return new TokenHandlerResult( WikiLinkHandler::bailTokens( $this->manager, $token ) ); |
240 | } |
241 | } |
242 | |
243 | /** @inheritDoc */ |
244 | public function onTag( Token $token ): ?TokenHandlerResult { |
245 | switch ( $token->getName() ) { |
246 | case 'urllink': |
247 | return $this->onUrlLink( $token ); |
248 | case 'extlink': |
249 | return $this->onExtLink( $token ); |
250 | default: |
251 | return null; |
252 | } |
253 | } |
254 | } |