Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 135 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
AddMetaData | |
0.00% |
0 / 135 |
|
0.00% |
0 / 3 |
380 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
2 | |||
updateBodyClasslist | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
2 | |||
run | |
0.00% |
0 / 97 |
|
0.00% |
0 / 1 |
306 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Processors; |
5 | |
6 | use Closure; |
7 | use DateTime; |
8 | use Wikimedia\Parsoid\Config\Env; |
9 | use Wikimedia\Parsoid\DOM\Element; |
10 | use Wikimedia\Parsoid\DOM\Node; |
11 | use Wikimedia\Parsoid\Parsoid; |
12 | use Wikimedia\Parsoid\Utils\DOMCompat; |
13 | use Wikimedia\Parsoid\Utils\DOMUtils; |
14 | use Wikimedia\Parsoid\Utils\PHPUtils; |
15 | use Wikimedia\Parsoid\Utils\Utils; |
16 | use Wikimedia\Parsoid\Wt2Html\DOMPostProcessor; |
17 | use Wikimedia\Parsoid\Wt2Html\Wt2HtmlDOMProcessor; |
18 | |
19 | class AddMetaData implements Wt2HtmlDOMProcessor { |
20 | private array $metadataMap; |
21 | private ?DOMPostProcessor $parentPipeline; |
22 | |
23 | public function __construct( ?DOMPostProcessor $domPP ) { |
24 | $this->parentPipeline = $domPP; |
25 | |
26 | // map from mediawiki metadata names to RDFa property names |
27 | $this->metadataMap = [ |
28 | 'ns' => [ |
29 | 'property' => 'mw:pageNamespace', |
30 | 'content' => '%d', |
31 | ], |
32 | 'id' => [ |
33 | 'property' => 'mw:pageId', |
34 | 'content' => '%d', |
35 | ], |
36 | |
37 | // DO NOT ADD rev_user, rev_userid, and rev_comment (See T125266) |
38 | |
39 | // 'rev_revid' is used to set the overall subject of the document, we don't |
40 | // need to add a specific <meta> or <link> element for it. |
41 | |
42 | 'rev_parentid' => [ |
43 | 'rel' => 'dc:replaces', |
44 | 'resource' => 'mwr:revision/%d', |
45 | ], |
46 | 'rev_timestamp' => [ |
47 | 'property' => 'dc:modified', |
48 | 'content' => static function ( $m ) { |
49 | # Convert from TS_MW ("mediawiki timestamp") format |
50 | $dt = DateTime::createFromFormat( 'YmdHis', $m['rev_timestamp'] ); |
51 | # Note that DateTime::ISO8601 is not actually ISO8601, alas. |
52 | return $dt->format( 'Y-m-d\TH:i:s.000\Z' ); |
53 | }, |
54 | ], |
55 | 'rev_sha1' => [ |
56 | 'property' => 'mw:revisionSHA1', |
57 | 'content' => '%s', |
58 | ] |
59 | ]; |
60 | } |
61 | |
62 | private function updateBodyClasslist( Element $body, Env $env ): void { |
63 | $dir = $env->getPageConfig()->getPageLanguageDir(); |
64 | $bodyCL = DOMCompat::getClassList( $body ); |
65 | $bodyCL->add( 'mw-content-' . $dir ); |
66 | $bodyCL->add( 'sitedir-' . $dir ); |
67 | $bodyCL->add( $dir ); |
68 | $body->setAttribute( 'dir', $dir ); |
69 | |
70 | // Set 'mw-body-content' directly on the body. |
71 | // This is the designated successor for #bodyContent in core skins. |
72 | $bodyCL->add( 'mw-body-content' ); |
73 | // Set 'parsoid-body' to add the desired layout styling from Vector. |
74 | $bodyCL->add( 'parsoid-body' ); |
75 | // Also, add the 'mediawiki' class. |
76 | // Some MediaWiki:Common.css seem to target this selector. |
77 | $bodyCL->add( 'mediawiki' ); |
78 | // Set 'mw-parser-output' directly on the body. |
79 | // Templates target this class as part of the TemplateStyles RFC |
80 | // FIXME: This isn't expected to be found on the same element as the |
81 | // body class above, since some css targets it as a descendant. |
82 | // In visual diff'ing, we migrate the body contents to a wrapper div |
83 | // with this class to reduce visual differences. Consider getting |
84 | // rid of it. |
85 | $bodyCL->add( 'mw-parser-output' ); |
86 | |
87 | // Set the parsoid version on the body, for consistency with |
88 | // the wrapper div. |
89 | $body->setAttribute( 'data-mw-parsoid-version', Parsoid::version() ); |
90 | $body->setAttribute( 'data-mw-html-version', Parsoid::defaultHTMLVersion() ); |
91 | } |
92 | |
93 | /** |
94 | * @inheritDoc |
95 | */ |
96 | public function run( |
97 | Env $env, Node $root, array $options = [], bool $atTopLevel = false |
98 | ): void { |
99 | $title = $env->getContextTitle(); |
100 | $document = $root->ownerDocument; |
101 | |
102 | // Set the charset in the <head> first. |
103 | // This also adds the <head> element if it was missing. |
104 | DOMUtils::appendToHead( $document, 'meta', [ 'charset' => 'utf-8' ] ); |
105 | |
106 | // add mw: and mwr: RDFa prefixes |
107 | $prefixes = [ |
108 | 'dc: http://purl.org/dc/terms/', |
109 | 'mw: http://mediawiki.org/rdf/' |
110 | ]; |
111 | $document->documentElement->setAttribute( 'prefix', implode( ' ', $prefixes ) ); |
112 | |
113 | // (From wfParseUrl in core:) |
114 | // Protocol-relative URLs are handled really badly by parse_url(). |
115 | // It's so bad that the easiest way to handle them is to just prepend |
116 | // 'https:' and strip the protocol out later. |
117 | $baseURI = $env->getSiteConfig()->baseURI(); |
118 | $wasRelative = substr( $baseURI, 0, 2 ) == '//'; |
119 | if ( $wasRelative ) { |
120 | $baseURI = "https:$baseURI"; |
121 | } |
122 | // add 'https://' to baseURI if it was missing |
123 | $pu = parse_url( $baseURI ); |
124 | $mwrPrefix = ( !empty( $pu['scheme'] ) ? '' : 'https://' ) . |
125 | $baseURI . 'Special:Redirect/'; |
126 | |
127 | ( DOMCompat::getHead( $document ) )->setAttribute( 'prefix', 'mwr: ' . $mwrPrefix ); |
128 | |
129 | // add <head> content based on page meta data: |
130 | |
131 | // Add page / revision metadata to the <head> |
132 | // PORT-FIXME: We will need to do some refactoring to eliminate |
133 | // this hardcoding. Probably even merge this into metadataMap |
134 | $pageConfig = $env->getPageConfig(); |
135 | $revProps = [ |
136 | 'id' => $pageConfig->getPageId(), |
137 | 'ns' => $title->getNamespace(), |
138 | 'rev_parentid' => $pageConfig->getParentRevisionId(), |
139 | 'rev_revid' => $pageConfig->getRevisionId(), |
140 | 'rev_sha1' => $pageConfig->getRevisionSha1(), |
141 | 'rev_timestamp' => $pageConfig->getRevisionTimestamp() |
142 | ]; |
143 | foreach ( $revProps as $key => $value ) { |
144 | // generate proper attributes for the <meta> or <link> tag |
145 | if ( $value === null || $value === '' || !isset( $this->metadataMap[$key] ) ) { |
146 | continue; |
147 | } |
148 | |
149 | $attrs = []; |
150 | $mdm = $this->metadataMap[$key]; |
151 | |
152 | /** FIXME: The JS side has a bunch of other checks here */ |
153 | |
154 | foreach ( $mdm as $k => $v ) { |
155 | // evaluate a function, or perform sprintf-style formatting, or |
156 | // use string directly, depending on value in metadataMap |
157 | if ( $v instanceof Closure ) { |
158 | $v = $v( $revProps ); |
159 | } elseif ( strpos( $v, '%' ) !== false ) { |
160 | // @phan-suppress-next-line PhanPluginPrintfVariableFormatString |
161 | $v = sprintf( $v, $value ); |
162 | } |
163 | $attrs[$k] = $v; |
164 | } |
165 | |
166 | // <link> is used if there's a resource or href attribute. |
167 | DOMUtils::appendToHead( $document, |
168 | isset( $attrs['resource'] ) || isset( $attrs['href'] ) ? 'link' : 'meta', |
169 | $attrs |
170 | ); |
171 | } |
172 | |
173 | if ( $revProps['rev_revid'] ) { |
174 | $document->documentElement->setAttribute( |
175 | 'about', $mwrPrefix . 'revision/' . $revProps['rev_revid'] |
176 | ); |
177 | } |
178 | |
179 | // Normalize before comparison |
180 | if ( $title->isSameLinkAs( $env->getSiteConfig()->mainPageLinkTarget() ) ) { |
181 | DOMUtils::appendToHead( $document, 'meta', [ |
182 | 'property' => 'isMainPage', |
183 | 'content' => 'true' /* HTML attribute values should be strings */ |
184 | ] ); |
185 | } |
186 | |
187 | // Set the parsoid content-type strings |
188 | // FIXME: Should we be using http-equiv for this? |
189 | DOMUtils::appendToHead( $document, 'meta', [ |
190 | 'property' => 'mw:htmlVersion', |
191 | 'content' => $env->getOutputContentVersion() |
192 | ] |
193 | ); |
194 | // Temporary backward compatibility for clients |
195 | // This could be skipped if we support a version downgrade path |
196 | // with a major version bump. |
197 | DOMUtils::appendToHead( $document, 'meta', [ |
198 | 'property' => 'mw:html:version', |
199 | 'content' => $env->getOutputContentVersion() |
200 | ] |
201 | ); |
202 | |
203 | $expTitle = explode( '/', $title->getPrefixedDBKey() ); |
204 | $expTitle = array_map( static function ( $comp ) { |
205 | return PHPUtils::encodeURIComponent( $comp ); |
206 | }, $expTitle ); |
207 | |
208 | DOMUtils::appendToHead( $document, 'link', [ |
209 | 'rel' => 'dc:isVersionOf', |
210 | 'href' => $env->getSiteConfig()->baseURI() . implode( '/', $expTitle ) |
211 | ] ); |
212 | |
213 | // Add base href pointing to the wiki root |
214 | DOMUtils::appendToHead( $document, 'base', [ |
215 | 'href' => $env->getSiteConfig()->baseURI() |
216 | ] ); |
217 | |
218 | // PageConfig guarantees language will always be non-null. |
219 | $lang = $env->getPageConfig()->getPageLanguageBcp47(); |
220 | $body = DOMCompat::getBody( $document ); |
221 | $body->setAttribute( 'lang', $lang->toBcp47Code() ); |
222 | $this->updateBodyClasslist( $body, $env ); |
223 | // T324431: Note that this is *not* the displaytitle, and that |
224 | // the title element contents are plaintext *not* HTML |
225 | DOMCompat::setTitle( $document, $title->getPrefixedText() ); |
226 | $env->getSiteConfig()->exportMetadataToHeadBcp47( |
227 | $document, $env->getMetadata(), |
228 | $title->getPrefixedText(), $lang |
229 | ); |
230 | |
231 | // Indicate whether LanguageConverter is enabled, so that downstream |
232 | // caches can split on variant (if necessary) |
233 | DOMUtils::appendToHead( $document, 'meta', [ |
234 | 'http-equiv' => 'content-language', |
235 | // Note that this is "wrong": we should be returning |
236 | // $env->htmlContentLanguageBcp47()->toBcp47Code() directly |
237 | // but for back-compat we'll return the "old" mediawiki-internal |
238 | // code for now |
239 | 'content' => Utils::bcp47ToMwCode( # T323052: remove this call |
240 | $env->htmlContentLanguageBcp47()->toBcp47Code() |
241 | ), |
242 | ] |
243 | ); |
244 | DOMUtils::appendToHead( $document, 'meta', [ |
245 | 'http-equiv' => 'vary', |
246 | 'content' => $env->htmlVary() |
247 | ] |
248 | ); |
249 | |
250 | if ( $env->profiling() && $this->parentPipeline ) { |
251 | $body = DOMCompat::getBody( $document ); |
252 | $body->appendChild( $body->ownerDocument->createTextNode( "\n" ) ); |
253 | $body->appendChild( $body->ownerDocument->createComment( $this->parentPipeline->getTimeProfile() ) ); |
254 | $body->appendChild( $body->ownerDocument->createTextNode( "\n" ) ); |
255 | } |
256 | |
257 | if ( $env->hasDumpFlag( 'wt2html:limits' ) ) { |
258 | /* |
259 | * PORT-FIXME: Not yet implemented |
260 | $env->printWt2HtmlResourceUsage( [ |
261 | 'HTML Size' => strlen( DOMCompat::getOuterHTML( $document->documentElement ) ) |
262 | ] ); |
263 | */ |
264 | } |
265 | } |
266 | } |