MediaWiki REL1_39
RemexRemoveTagHandler.php
Go to the documentation of this file.
1<?php
2
3namespace MediaWiki\Parser;
4
5use Sanitizer;
6use Wikimedia\RemexHtml\Tokenizer\Attributes;
7use Wikimedia\RemexHtml\Tokenizer\PlainAttributes;
8use Wikimedia\RemexHtml\Tokenizer\RelayTokenHandler;
9use Wikimedia\RemexHtml\Tokenizer\TokenHandler;
10
15class RemexRemoveTagHandler extends RelayTokenHandler {
20 private $source;
21
25 private $htmlsingle;
26
31 private $htmlsingleonly;
32
36 private $htmlelements;
37
42 private $attrCallback;
43
48 private $callbackArgs;
49
58 public function __construct(
59 TokenHandler $nextHandler,
60 string $source,
61 array $tagData,
62 ?callable $attrCallback,
63 ?array $callbackArgs
64 ) {
65 parent::__construct( $nextHandler );
66 $this->source = $source;
67 $this->htmlsingle = $tagData['htmlsingle'];
68 $this->htmlsingleonly = $tagData['htmlsingleonly'];
69 $this->htmlelements = $tagData['htmlelements'];
70 $this->attrCallback = $attrCallback;
71 $this->callbackArgs = $callbackArgs ?? [];
72 }
73
77 public function comment( $text, $sourceStart, $sourceLength ) {
78 // Don't relay comments.
79 }
80
95 private static function validateTag( string $element, Attributes $attrs ): bool {
96 if ( $element == 'meta' || $element == 'link' ) {
97 $params = $attrs->getValues();
98 if ( !isset( $params['itemprop'] ) ) {
99 // <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content
100 return false;
101 }
102 if ( $element == 'meta' && !isset( $params['content'] ) ) {
103 // <meta> must have a content="" for the itemprop
104 return false;
105 }
106 if ( $element == 'link' && !isset( $params['href'] ) ) {
107 // <link> must have an associated href=""
108 return false;
109 }
110 }
111
112 return true;
113 }
114
118 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
119 // Handle a start tag from the tokenizer: either relay it to the
120 // next stage, or re-emit it as raw text.
121
122 $badtag = false;
123 $t = strtolower( $name );
124 if ( isset( $this->htmlelements[$t] ) ) {
125 if ( $this->attrCallback ) {
126 $attrs = ( $this->attrCallback )( $attrs, ...$this->callbackArgs );
127 }
128 if ( $selfClose && !( isset( $this->htmlsingle[$t] ) || isset( $this->htmlsingleonly[$t] ) ) ) {
129 // Remove the self-closing slash, to be consistent with
130 // HTML5 semantics. T134423
131 $selfClose = false;
132 }
133 if ( !self::validateTag( $t, $attrs ) ) {
134 $badtag = true;
135 }
136 $fixedAttrs = Sanitizer::validateTagAttributes( $attrs->getValues(), $t );
137 $attrs = new PlainAttributes( $fixedAttrs );
138 if ( !$badtag ) {
139 if ( $selfClose && !isset( $this->htmlsingleonly[$t] ) ) {
140 // Interpret self-closing tags as empty tags even when
141 // HTML5 would interpret them as start tags. Such input
142 // is commonly seen on Wikimedia wikis with this intention.
143 $this->nextHandler->startTag( $name, $attrs, false, $sourceStart, $sourceLength );
144 $this->nextHandler->endTag( $name, $sourceStart + $sourceLength, 0 );
145 } else {
146 $this->nextHandler->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
147 }
148 return;
149 }
150 }
151 // Emit this as a text node instead.
152 $this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength );
153 }
154
158 public function endTag( $name, $sourceStart, $sourceLength ) {
159 // Handle an end tag from the tokenizer: either relay it to the
160 // next stage, or re-emit it as raw text.
161
162 $t = strtolower( $name );
163 if ( isset( $this->htmlelements[$t] ) ) {
164 // This is a good tag, relay it.
165 $this->nextHandler->endTag( $name, $sourceStart, $sourceLength );
166 } else {
167 // Emit this as a text node instead.
168 $this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength );
169 }
170 }
171
172}
if(!defined('MW_SETUP_CALLBACK'))
The persistent session ID (if any) loaded at startup.
Definition WebStart.php:82
Helper class for Sanitizer::removeSomeTags().
endTag( $name, $sourceStart, $sourceLength)
comment( $text, $sourceStart, $sourceLength)
__construct(TokenHandler $nextHandler, string $source, array $tagData, ?callable $attrCallback, ?array $callbackArgs)
startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength)
HTML sanitizer for MediaWiki.
Definition Sanitizer.php:41
$source