MediaWiki master
RemexRemoveTagHandler.php
Go to the documentation of this file.
1<?php
2
3namespace MediaWiki\Parser;
4
5use Wikimedia\RemexHtml\Tokenizer\Attributes;
6use Wikimedia\RemexHtml\Tokenizer\PlainAttributes;
7use Wikimedia\RemexHtml\Tokenizer\RelayTokenHandler;
8use Wikimedia\RemexHtml\Tokenizer\TokenHandler;
9
14class RemexRemoveTagHandler extends RelayTokenHandler {
19 private $source;
20
24 private $htmlsingle;
25
30 private $htmlsingleonly;
31
35 private $htmlelements;
36
38 private $commentRegex;
39
44 private $attrCallback;
45
50 private $callbackArgs;
51
63 public function __construct(
64 TokenHandler $nextHandler,
65 string $source,
66 array $tagData,
67 ?callable $attrCallback,
68 ?array $callbackArgs,
69 array $options
70 ) {
71 parent::__construct( $nextHandler );
72 $this->source = $source;
73 $this->htmlsingle = $tagData['htmlsingle'];
74 $this->htmlsingleonly = $tagData['htmlsingleonly'];
75 $this->htmlelements = $tagData['htmlelements'];
76 $this->attrCallback = $attrCallback;
77 $this->callbackArgs = $callbackArgs ?? [];
78 $this->commentRegex = $options['commentRegex'] ?? null;
79 }
80
84 public function comment( $text, $sourceStart, $sourceLength ) {
85 if ( $this->commentRegex !== null && preg_match( $this->commentRegex, $text ) ) {
86 $this->nextHandler->comment( $text, $sourceStart, $sourceLength );
87 }
88 }
89
104 private static function validateTag( string $element, Attributes $attrs ): bool {
105 if ( $element == 'meta' || $element == 'link' ) {
106 $params = $attrs->getValues();
107 if ( !isset( $params['itemprop'] ) ) {
108 // <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content
109 return false;
110 }
111 if ( $element == 'meta' && !isset( $params['content'] ) ) {
112 // <meta> must have a content="" for the itemprop
113 return false;
114 }
115 if ( $element == 'link' && !isset( $params['href'] ) ) {
116 // <link> must have an associated href=""
117 return false;
118 }
119 }
120
121 return true;
122 }
123
127 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
128 // Handle a start tag from the tokenizer: either relay it to the
129 // next stage, or re-emit it as raw text.
130
131 $badtag = false;
132 $t = strtolower( $name );
133 if ( isset( $this->htmlelements[$t] ) ) {
134 if ( $this->attrCallback ) {
135 $attrs = ( $this->attrCallback )( $attrs, ...$this->callbackArgs );
136 }
137 if ( $selfClose && !( isset( $this->htmlsingle[$t] ) || isset( $this->htmlsingleonly[$t] ) ) ) {
138 // Remove the self-closing slash, to be consistent with
139 // HTML5 semantics. T134423
140 $selfClose = false;
141 }
142 if ( !self::validateTag( $t, $attrs ) ) {
143 $badtag = true;
144 }
145 $fixedAttrs = Sanitizer::validateTagAttributes( $attrs->getValues(), $t );
146 $attrs = new PlainAttributes( $fixedAttrs );
147 if ( !$badtag ) {
148 if ( $selfClose && !isset( $this->htmlsingleonly[$t] ) ) {
149 // Interpret self-closing tags as empty tags even when
150 // HTML5 would interpret them as start tags. Such input
151 // is commonly seen on Wikimedia wikis with this intention.
152 $this->nextHandler->startTag( $name, $attrs, false, $sourceStart, $sourceLength );
153 $this->nextHandler->endTag( $name, $sourceStart + $sourceLength, 0 );
154 } else {
155 $this->nextHandler->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
156 }
157 return;
158 }
159 }
160 // Emit this as a text node instead.
161 $this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength );
162 }
163
167 public function endTag( $name, $sourceStart, $sourceLength ) {
168 // Handle an end tag from the tokenizer: either relay it to the
169 // next stage, or re-emit it as raw text.
170
171 $t = strtolower( $name );
172 if ( isset( $this->htmlelements[$t] ) ) {
173 // This is a good tag, relay it.
174 $this->nextHandler->endTag( $name, $sourceStart, $sourceLength );
175 } else {
176 // Emit this as a text node instead.
177 $this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength );
178 }
179 }
180
181}
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:69
Helper class for Sanitizer::removeSomeTags().
__construct(TokenHandler $nextHandler, string $source, array $tagData, ?callable $attrCallback, ?array $callbackArgs, array $options)
endTag( $name, $sourceStart, $sourceLength)
comment( $text, $sourceStart, $sourceLength)
startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength)
static validateTagAttributes(array $attribs, string $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
$source