Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
93.55% |
29 / 31 |
|
75.00% |
3 / 4 |
CRAP | |
0.00% |
0 / 1 |
ExtractBody | |
93.55% |
29 / 31 |
|
75.00% |
3 / 4 |
12.04 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
shouldRun | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
expandRelativeAttrs | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
6 | |||
transformText | |
81.82% |
9 / 11 |
|
0.00% |
0 / 1 |
4.10 |
1 | <?php |
2 | |
3 | namespace MediaWiki\OutputTransform\Stages; |
4 | |
5 | use MediaWiki\Html\HtmlHelper; |
6 | use MediaWiki\OutputTransform\ContentTextTransformStage; |
7 | use MediaWiki\Parser\Parser; |
8 | use MediaWiki\Parser\ParserOutput; |
9 | use MediaWiki\Parser\Parsoid\ParsoidParser; |
10 | use ParserOptions; |
11 | use Psr\Log\LoggerInterface; |
12 | use Wikimedia\RemexHtml\Serializer\SerializerNode; |
13 | |
14 | /** |
15 | * Applies base href, and strip everything but the <body> |
16 | * @internal |
17 | */ |
18 | class ExtractBody extends ContentTextTransformStage { |
19 | |
20 | private LoggerInterface $logger; |
21 | |
22 | public function __construct( LoggerInterface $logger ) { |
23 | $this->logger = $logger; |
24 | } |
25 | |
26 | public function shouldRun( ParserOutput $po, ?ParserOptions $popts, array $options = [] ): bool { |
27 | return ( $options['isParsoidContent'] ?? false ); |
28 | } |
29 | |
30 | private const EXPAND_ELEMENTS = [ |
31 | 'a' => true, 'img' => true, 'video' => true, 'audio' => true, |
32 | ]; |
33 | |
34 | private static function expandRelativeAttrs( string $text, string $baseHref, string $pageFragmentPrefix ): string { |
35 | // T350952: Expand relative links |
36 | // What we should be doing here is parsing as a title and then |
37 | // using Title::getLocalURL() |
38 | return HtmlHelper::modifyElements( |
39 | $text, |
40 | static function ( SerializerNode $node ): bool { |
41 | if ( !isset( self::EXPAND_ELEMENTS[$node->name] ) ) { |
42 | return false; |
43 | } |
44 | $attr = $node->name === 'a' ? 'href' : 'resource'; |
45 | return str_starts_with( $node->attrs[$attr] ?? '', './' ); |
46 | }, |
47 | static function ( SerializerNode $node ) use ( $baseHref, $pageFragmentPrefix ): SerializerNode { |
48 | $attr = $node->name === 'a' ? 'href' : 'resource'; |
49 | $href = $node->attrs[$attr]; |
50 | // Convert page fragment urls to true fragment urls |
51 | // This ensures that those fragments include any URL query params |
52 | // and resolve internally. (Ex: on pages with ?useparsoid=1, |
53 | // cite link fragments should not take you to a different page). |
54 | if ( $pageFragmentPrefix && str_starts_with( $href, $pageFragmentPrefix ) ) { |
55 | $node->attrs[$attr] = substr( $href, strlen( $pageFragmentPrefix ) - 1 ); |
56 | } else { |
57 | $href = $baseHref . $href; |
58 | $node->attrs[$attr] = wfExpandUrl( $href, PROTO_RELATIVE ); |
59 | } |
60 | return $node; |
61 | } |
62 | ); |
63 | } |
64 | |
65 | protected function transformText( string $text, ParserOutput $po, ?ParserOptions $popts, array &$options ): string { |
66 | // T350952: temporary fix for subpage paths: use Parsoid's |
67 | // <base href> to expand relative links |
68 | $baseHref = ''; |
69 | if ( preg_match( '{<base href=["\']([^"\']+)["\'][^>]+>}', $text, $matches ) === 1 ) { |
70 | $baseHref = $matches[1]; |
71 | } |
72 | $title = $po->getExtensionData( ParsoidParser::PARSOID_TITLE_KEY ); |
73 | if ( !$title ) { |
74 | // We don't think this should ever trigger, but being conservative |
75 | $this->logger->error( __METHOD__ . ": Missing title information in ParserOutput" ); |
76 | } |
77 | $pageFragmentPrefix = "./" . $title . "#"; |
78 | foreach ( $po->getIndicators() as $name => $html ) { |
79 | $po->setIndicator( $name, self::expandRelativeAttrs( $html, $baseHref, $pageFragmentPrefix ) ); |
80 | } |
81 | $text = Parser::extractBody( $text ); |
82 | return self::expandRelativeAttrs( $text, $baseHref, $pageFragmentPrefix ); |
83 | } |
84 | } |