MediaWiki  master
RemexStripTagHandler.php
Go to the documentation of this file.
1 <?php
2 
3 namespace MediaWiki\Parser;
4 
5 use Wikimedia\RemexHtml\Tokenizer\Attributes;
6 use Wikimedia\RemexHtml\Tokenizer\NullTokenHandler;
7 
12 class RemexStripTagHandler extends NullTokenHandler {
13  private $insideNonVisibleTag = false;
14  private $text = '';
15 
16  public function getResult() {
17  return $this->text;
18  }
19 
20  public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
21  if ( !$this->insideNonVisibleTag ) {
22  $this->text .= substr( $text, $start, $length );
23  }
24  }
25 
26  public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
27  if ( $this->isNonVisibleTag( $name ) ) {
28  $this->insideNonVisibleTag = true;
29  }
30  // Inject whitespace for typical block-level tags to
31  // prevent merging unrelated<br>words.
32  if ( $this->isBlockLevelTag( $name ) ) {
33  $this->text .= ' ';
34  }
35  }
36 
37  public function endTag( $name, $sourceStart, $sourceLength ) {
38  if ( $this->isNonVisibleTag( $name ) ) {
39  $this->insideNonVisibleTag = false;
40  }
41  // Inject whitespace for typical block-level tags to
42  // prevent merging unrelated<br>words.
43  if ( $this->isBlockLevelTag( $name ) ) {
44  $this->text .= ' ';
45  }
46  }
47 
48  // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
49  // retrieved on sept 12, 2018. <br> is not block level but was added anyways.
50  // The following is a complete list of all HTML block level elements
51  // (although "block-level" is not technically defined for elements that are
52  // new in HTML5).
53  // Structured as tag => true to allow O(1) membership test.
54  private const BLOCK_LEVEL_TAGS = [
55  'address' => true,
56  'article' => true,
57  'aside' => true,
58  'blockquote' => true,
59  'br' => true,
60  'canvas' => true,
61  'dd' => true,
62  'div' => true,
63  'dl' => true,
64  'dt' => true,
65  'fieldset' => true,
66  'figcaption' => true,
67  'figure' => true,
68  'footer' => true,
69  'form' => true,
70  'h1' => true,
71  'h2' => true,
72  'h3' => true,
73  'h4' => true,
74  'h5' => true,
75  'h6' => true,
76  'header' => true,
77  'hgroup' => true,
78  'hr' => true,
79  'li' => true,
80  'main' => true,
81  'nav' => true,
82  'noscript' => true,
83  'ol' => true,
84  'output' => true,
85  'p' => true,
86  'pre' => true,
87  'section' => true,
88  'table' => true,
89  'td' => true,
90  'tfoot' => true,
91  'th' => true,
92  'tr' => true,
93  'ul' => true,
94  'video' => true,
95  ];
96 
104  private function isBlockLevelTag( $tagName ) {
105  $key = strtolower( trim( $tagName ) );
106  return isset( self::BLOCK_LEVEL_TAGS[$key] );
107  }
108 
109  private const NON_VISIBLE_TAGS = [
110  'style' => true,
111  'script' => true,
112  ];
113 
125  private function isNonVisibleTag( $tagName ) {
126  $key = strtolower( trim( $tagName ) );
127  return isset( self::NON_VISIBLE_TAGS[$key] );
128  }
129 
130 }
Helper class for Sanitizer::stripAllTags().
isNonVisibleTag( $tagName)
Detect block tags which by default are non-visible items.
isBlockLevelTag( $tagName)
Detect block level tags.
characters( $text, $start, $length, $sourceStart, $sourceLength)
endTag( $name, $sourceStart, $sourceLength)
startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength)