MediaWiki master
RemexRemoveTagHandler.php
Go to the documentation of this file.
1<?php
2
3namespace MediaWiki\Parser;
4
5use Wikimedia\RemexHtml\Tokenizer\Attributes;
6use Wikimedia\RemexHtml\Tokenizer\PlainAttributes;
7use Wikimedia\RemexHtml\Tokenizer\RelayTokenHandler;
8use Wikimedia\RemexHtml\Tokenizer\TokenHandler;
9
14class RemexRemoveTagHandler extends RelayTokenHandler {
19 private $source;
20
24 private $htmlsingle;
25
30 private $htmlsingleonly;
31
35 private $htmlelements;
36
41 private $attrCallback;
42
47 private $callbackArgs;
48
57 public function __construct(
58 TokenHandler $nextHandler,
59 string $source,
60 array $tagData,
61 ?callable $attrCallback,
62 ?array $callbackArgs
63 ) {
64 parent::__construct( $nextHandler );
65 $this->source = $source;
66 $this->htmlsingle = $tagData['htmlsingle'];
67 $this->htmlsingleonly = $tagData['htmlsingleonly'];
68 $this->htmlelements = $tagData['htmlelements'];
69 $this->attrCallback = $attrCallback;
70 $this->callbackArgs = $callbackArgs ?? [];
71 }
72
76 public function comment( $text, $sourceStart, $sourceLength ) {
77 // Don't relay comments.
78 }
79
94 private static function validateTag( string $element, Attributes $attrs ): bool {
95 if ( $element == 'meta' || $element == 'link' ) {
96 $params = $attrs->getValues();
97 if ( !isset( $params['itemprop'] ) ) {
98 // <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content
99 return false;
100 }
101 if ( $element == 'meta' && !isset( $params['content'] ) ) {
102 // <meta> must have a content="" for the itemprop
103 return false;
104 }
105 if ( $element == 'link' && !isset( $params['href'] ) ) {
106 // <link> must have an associated href=""
107 return false;
108 }
109 }
110
111 return true;
112 }
113
117 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
118 // Handle a start tag from the tokenizer: either relay it to the
119 // next stage, or re-emit it as raw text.
120
121 $badtag = false;
122 $t = strtolower( $name );
123 if ( isset( $this->htmlelements[$t] ) ) {
124 if ( $this->attrCallback ) {
125 $attrs = ( $this->attrCallback )( $attrs, ...$this->callbackArgs );
126 }
127 if ( $selfClose && !( isset( $this->htmlsingle[$t] ) || isset( $this->htmlsingleonly[$t] ) ) ) {
128 // Remove the self-closing slash, to be consistent with
129 // HTML5 semantics. T134423
130 $selfClose = false;
131 }
132 if ( !self::validateTag( $t, $attrs ) ) {
133 $badtag = true;
134 }
135 $fixedAttrs = Sanitizer::validateTagAttributes( $attrs->getValues(), $t );
136 $attrs = new PlainAttributes( $fixedAttrs );
137 if ( !$badtag ) {
138 if ( $selfClose && !isset( $this->htmlsingleonly[$t] ) ) {
139 // Interpret self-closing tags as empty tags even when
140 // HTML5 would interpret them as start tags. Such input
141 // is commonly seen on Wikimedia wikis with this intention.
142 $this->nextHandler->startTag( $name, $attrs, false, $sourceStart, $sourceLength );
143 $this->nextHandler->endTag( $name, $sourceStart + $sourceLength, 0 );
144 } else {
145 $this->nextHandler->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
146 }
147 return;
148 }
149 }
150 // Emit this as a text node instead.
151 $this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength );
152 }
153
157 public function endTag( $name, $sourceStart, $sourceLength ) {
158 // Handle an end tag from the tokenizer: either relay it to the
159 // next stage, or re-emit it as raw text.
160
161 $t = strtolower( $name );
162 if ( isset( $this->htmlelements[$t] ) ) {
163 // This is a good tag, relay it.
164 $this->nextHandler->endTag( $name, $sourceStart, $sourceLength );
165 } else {
166 // Emit this as a text node instead.
167 $this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength );
168 }
169 }
170
171}
array $params
The job parameters.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Helper class for Sanitizer::removeSomeTags().
endTag( $name, $sourceStart, $sourceLength)
comment( $text, $sourceStart, $sourceLength)
__construct(TokenHandler $nextHandler, string $source, array $tagData, ?callable $attrCallback, ?array $callbackArgs)
startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength)
static validateTagAttributes(array $attribs, string $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
$source