Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
80.00% |
32 / 40 |
|
75.00% |
3 / 4 |
CRAP | |
0.00% |
0 / 1 |
ExtractBody | |
80.00% |
32 / 40 |
|
75.00% |
3 / 4 |
16.80 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
shouldRun | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
expandRelativeAttrs | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
6 | |||
transformText | |
55.56% |
10 / 18 |
|
0.00% |
0 / 1 |
11.30 |
1 | <?php |
2 | |
3 | namespace MediaWiki\OutputTransform\Stages; |
4 | |
5 | use MediaWiki\Config\ServiceOptions; |
6 | use MediaWiki\Html\HtmlHelper; |
7 | use MediaWiki\OutputTransform\ContentTextTransformStage; |
8 | use MediaWiki\Parser\Parser; |
9 | use MediaWiki\Parser\ParserOptions; |
10 | use MediaWiki\Parser\ParserOutput; |
11 | use MediaWiki\Parser\Parsoid\ParsoidParser; |
12 | use MediaWiki\Utils\UrlUtils; |
13 | use Psr\Log\LoggerInterface; |
14 | use Wikimedia\RemexHtml\Serializer\SerializerNode; |
15 | |
16 | /** |
17 | * Applies base href, and strip everything but the <body> |
18 | * @internal |
19 | */ |
20 | class ExtractBody extends ContentTextTransformStage { |
21 | |
22 | private UrlUtils $urlUtils; |
23 | |
24 | // @phan-suppress-next-line PhanUndeclaredTypeProperty |
25 | private ?\MobileContext $mobileContext; |
26 | |
27 | public function __construct( |
28 | ServiceOptions $options, LoggerInterface $logger, UrlUtils $urlUtils, |
29 | // @phan-suppress-next-line PhanUndeclaredTypeParameter |
30 | ?\MobileContext $mobileContext |
31 | ) { |
32 | parent::__construct( $options, $logger ); |
33 | $this->urlUtils = $urlUtils; |
34 | $this->mobileContext = $mobileContext; |
35 | } |
36 | |
37 | public function shouldRun( ParserOutput $po, ?ParserOptions $popts, array $options = [] ): bool { |
38 | return ( $options['isParsoidContent'] ?? false ); |
39 | } |
40 | |
41 | private const EXPAND_ELEMENTS = [ |
42 | 'a' => true, 'img' => true, 'video' => true, 'audio' => true, |
43 | ]; |
44 | |
45 | private static function expandRelativeAttrs( |
46 | string $text, |
47 | string $baseHref, |
48 | string $pageFragmentPrefix, |
49 | UrlUtils $urlUtils |
50 | ): string { |
51 | // T350952: Expand relative links |
52 | // What we should be doing here is parsing as a title and then |
53 | // using Title::getLocalURL() |
54 | return HtmlHelper::modifyElements( |
55 | $text, |
56 | static function ( SerializerNode $node ): bool { |
57 | if ( !isset( self::EXPAND_ELEMENTS[$node->name] ) ) { |
58 | return false; |
59 | } |
60 | $attr = $node->name === 'a' ? 'href' : 'resource'; |
61 | return str_starts_with( $node->attrs[$attr] ?? '', './' ); |
62 | }, |
63 | static function ( SerializerNode $node ) use ( $baseHref, $pageFragmentPrefix, $urlUtils ): SerializerNode { |
64 | $attr = $node->name === 'a' ? 'href' : 'resource'; |
65 | $href = $node->attrs[$attr]; |
66 | // Convert page fragment urls to true fragment urls |
67 | // This ensures that those fragments include any URL query params |
68 | // and resolve internally. (Ex: on pages with ?useparsoid=1, |
69 | // cite link fragments should not take you to a different page). |
70 | if ( $pageFragmentPrefix && str_starts_with( $href, $pageFragmentPrefix ) ) { |
71 | $node->attrs[$attr] = substr( $href, strlen( $pageFragmentPrefix ) - 1 ); |
72 | } else { |
73 | $href = $baseHref . $href; |
74 | $node->attrs[$attr] = $urlUtils->expand( $href, PROTO_RELATIVE ) ?? false; |
75 | } |
76 | return $node; |
77 | } |
78 | ); |
79 | } |
80 | |
81 | protected function transformText( string $text, ParserOutput $po, ?ParserOptions $popts, array &$options ): string { |
82 | // T350952: temporary fix for subpage paths: use Parsoid's |
83 | // <base href> to expand relative links |
84 | $baseHref = ''; |
85 | if ( preg_match( '{<base href=["\']([^"\']+)["\'][^>]+>}', $text, $matches ) === 1 ) { |
86 | $baseHref = $matches[1]; |
87 | // @phan-suppress-next-line PhanUndeclaredClassMethod |
88 | if ( $this->mobileContext !== null && $this->mobileContext->usingMobileDomain() ) { |
89 | // @phan-suppress-next-line PhanUndeclaredClassMethod |
90 | $mobileUrl = $this->mobileContext->getMobileUrl( $baseHref ); |
91 | if ( $mobileUrl !== false ) { |
92 | $baseHref = $mobileUrl; |
93 | } |
94 | } |
95 | } |
96 | $title = $po->getExtensionData( ParsoidParser::PARSOID_TITLE_KEY ); |
97 | if ( !$title ) { |
98 | // We don't think this should ever trigger, but being conservative |
99 | $this->logger->error( __METHOD__ . ": Missing title information in ParserOutput" ); |
100 | } |
101 | $pageFragmentPrefix = "./" . $title . "#"; |
102 | foreach ( $po->getIndicators() as $name => $html ) { |
103 | $po->setIndicator( |
104 | $name, |
105 | self::expandRelativeAttrs( $html, $baseHref, $pageFragmentPrefix, $this->urlUtils ) |
106 | ); |
107 | } |
108 | $text = Parser::extractBody( $text ); |
109 | return self::expandRelativeAttrs( $text, $baseHref, $pageFragmentPrefix, $this->urlUtils ); |
110 | } |
111 | } |