MediaWiki  master
RemexRemoveTagHandler.php
Go to the documentation of this file.
1 <?php
2 
3 namespace MediaWiki\Parser;
4 
5 use Sanitizer;
6 use Wikimedia\RemexHtml\Tokenizer\Attributes;
7 use Wikimedia\RemexHtml\Tokenizer\PlainAttributes;
8 use Wikimedia\RemexHtml\Tokenizer\RelayTokenHandler;
9 use Wikimedia\RemexHtml\Tokenizer\TokenHandler;
10 
15 class RemexRemoveTagHandler extends RelayTokenHandler {
20  private $source;
21 
25  private $htmlsingle;
26 
31  private $htmlsingleonly;
32 
36  private $htmlelements;
37 
42  private $attrCallback;
43 
48  private $callbackArgs;
49 
58  public function __construct(
59  TokenHandler $nextHandler,
60  string $source,
61  array $tagData,
62  ?callable $attrCallback,
63  ?array $callbackArgs
64  ) {
65  parent::__construct( $nextHandler );
66  $this->source = $source;
67  $this->htmlsingle = $tagData['htmlsingle'];
68  $this->htmlsingleonly = $tagData['htmlsingleonly'];
69  $this->htmlelements = $tagData['htmlelements'];
70  $this->attrCallback = $attrCallback;
71  $this->callbackArgs = $callbackArgs ?? [];
72  }
73 
77  public function comment( $text, $sourceStart, $sourceLength ) {
78  // Don't relay comments.
79  }
80 
95  private static function validateTag( string $element, Attributes $attrs ): bool {
96  if ( $element == 'meta' || $element == 'link' ) {
97  $params = $attrs->getValues();
98  if ( !isset( $params['itemprop'] ) ) {
99  // <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content
100  return false;
101  }
102  if ( $element == 'meta' && !isset( $params['content'] ) ) {
103  // <meta> must have a content="" for the itemprop
104  return false;
105  }
106  if ( $element == 'link' && !isset( $params['href'] ) ) {
107  // <link> must have an associated href=""
108  return false;
109  }
110  }
111 
112  return true;
113  }
114 
118  public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
119  // Handle a start tag from the tokenizer: either relay it to the
120  // next stage, or re-emit it as raw text.
121 
122  $badtag = false;
123  $t = strtolower( $name );
124  if ( isset( $this->htmlelements[$t] ) ) {
125  if ( $this->attrCallback ) {
126  $attrs = ( $this->attrCallback )( $attrs, ...$this->callbackArgs );
127  }
128  if ( $selfClose && !( isset( $this->htmlsingle[$t] ) || isset( $this->htmlsingleonly[$t] ) ) ) {
129  // Remove the self-closing slash, to be consistent with
130  // HTML5 semantics. T134423
131  $selfClose = false;
132  }
133  if ( !self::validateTag( $t, $attrs ) ) {
134  $badtag = true;
135  }
136  $fixedAttrs = Sanitizer::validateTagAttributes( $attrs->getValues(), $t );
137  $attrs = new PlainAttributes( $fixedAttrs );
138  if ( !$badtag ) {
139  if ( $selfClose && !isset( $this->htmlsingleonly[$t] ) ) {
140  // Interpret self-closing tags as empty tags even when
141  // HTML5 would interpret them as start tags. Such input
142  // is commonly seen on Wikimedia wikis with this intention.
143  $this->nextHandler->startTag( $name, $attrs, false, $sourceStart, $sourceLength );
144  $this->nextHandler->endTag( $name, $sourceStart + $sourceLength, 0 );
145  } else {
146  $this->nextHandler->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
147  }
148  return;
149  }
150  }
151  // Emit this as a text node instead.
152  $this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength );
153  }
154 
158  public function endTag( $name, $sourceStart, $sourceLength ) {
159  // Handle an end tag from the tokenizer: either relay it to the
160  // next stage, or re-emit it as raw text.
161 
162  $t = strtolower( $name );
163  if ( isset( $this->htmlelements[$t] ) ) {
164  // This is a good tag, relay it.
165  $this->nextHandler->endTag( $name, $sourceStart, $sourceLength );
166  } else {
167  // Emit this as a text node instead.
168  $this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength );
169  }
170  }
171 
172 }
if(!defined('MW_SETUP_CALLBACK'))
The persistent session ID (if any) loaded at startup.
Definition: WebStart.php:82
Helper class for Sanitizer::removeSomeTags().
__construct(TokenHandler $nextHandler, string $source, array $tagData, ?callable $attrCallback, ?array $callbackArgs)
endTag( $name, $sourceStart, $sourceLength)
comment( $text, $sourceStart, $sourceLength)
startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength)
HTML sanitizer for MediaWiki.
Definition: Sanitizer.php:41
static validateTagAttributes( $attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:525
$source