MediaWiki  1.34.0
RemexStripTagHandler.php
Go to the documentation of this file.
1 <?php
2 
3 use RemexHtml\Tokenizer\Attributes;
4 use RemexHtml\Tokenizer\TokenHandler;
5 use RemexHtml\Tokenizer\Tokenizer;
6 
10 class RemexStripTagHandler implements TokenHandler {
11  private $text = '';
12 
13  public function getResult() {
14  return $this->text;
15  }
16 
17  function startDocument( Tokenizer $t, $fns, $fn ) {
18  // Do nothing.
19  }
20 
21  function endDocument( $pos ) {
22  // Do nothing.
23  }
24 
25  function error( $text, $pos ) {
26  // Do nothing.
27  }
28 
29  function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
30  $this->text .= substr( $text, $start, $length );
31  }
32 
33  function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
34  // Inject whitespace for typical block-level tags to
35  // prevent merging unrelated<br>words.
36  if ( $this->isBlockLevelTag( $name ) ) {
37  $this->text .= ' ';
38  }
39  }
40 
41  function endTag( $name, $sourceStart, $sourceLength ) {
42  // Inject whitespace for typical block-level tags to
43  // prevent merging unrelated<br>words.
44  if ( $this->isBlockLevelTag( $name ) ) {
45  $this->text .= ' ';
46  }
47  }
48 
49  function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
50  // Do nothing.
51  }
52 
53  function comment( $text, $sourceStart, $sourceLength ) {
54  // Do nothing.
55  }
56 
57  // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
58  // retrieved on sept 12, 2018. <br> is not block level but was added anyways.
59  // The following is a complete list of all HTML block level elements
60  // (although "block-level" is not technically defined for elements that are
61  // new in HTML5).
62  // Structured as tag => true to allow O(1) membership test.
63  private static $BLOCK_LEVEL_TAGS = [
64  'address' => true,
65  'article' => true,
66  'aside' => true,
67  'blockquote' => true,
68  'br' => true,
69  'canvas' => true,
70  'dd' => true,
71  'div' => true,
72  'dl' => true,
73  'dt' => true,
74  'fieldset' => true,
75  'figcaption' => true,
76  'figure' => true,
77  'footer' => true,
78  'form' => true,
79  'h1' => true,
80  'h2' => true,
81  'h3' => true,
82  'h4' => true,
83  'h5' => true,
84  'h6' => true,
85  'header' => true,
86  'hgroup' => true,
87  'hr' => true,
88  'li' => true,
89  'main' => true,
90  'nav' => true,
91  'noscript' => true,
92  'ol' => true,
93  'output' => true,
94  'p' => true,
95  'pre' => true,
96  'section' => true,
97  'table' => true,
98  'td' => true,
99  'tfoot' => true,
100  'th' => true,
101  'tr' => true,
102  'ul' => true,
103  'video' => true,
104  ];
105 
113  private function isBlockLevelTag( $tagName ) {
114  $key = strtolower( trim( $tagName ) );
115  return isset( self::$BLOCK_LEVEL_TAGS[$key] );
116  }
117 }
RemexStripTagHandler\error
error( $text, $pos)
Definition: RemexStripTagHandler.php:25
RemexStripTagHandler\isBlockLevelTag
isBlockLevelTag( $tagName)
Detect block level tags.
Definition: RemexStripTagHandler.php:113
RemexStripTagHandler\endTag
endTag( $name, $sourceStart, $sourceLength)
Definition: RemexStripTagHandler.php:41
RemexStripTagHandler\comment
comment( $text, $sourceStart, $sourceLength)
Definition: RemexStripTagHandler.php:53
RemexStripTagHandler
Definition: RemexStripTagHandler.php:10
RemexStripTagHandler\characters
characters( $text, $start, $length, $sourceStart, $sourceLength)
Definition: RemexStripTagHandler.php:29
RemexStripTagHandler\startDocument
startDocument(Tokenizer $t, $fns, $fn)
Definition: RemexStripTagHandler.php:17
RemexStripTagHandler\$BLOCK_LEVEL_TAGS
static $BLOCK_LEVEL_TAGS
Definition: RemexStripTagHandler.php:63
RemexStripTagHandler\$text
$text
Definition: RemexStripTagHandler.php:11
RemexStripTagHandler\doctype
doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength)
Definition: RemexStripTagHandler.php:49
$t
$t
Definition: make-normalization-table.php:143
RemexStripTagHandler\startTag
startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength)
Definition: RemexStripTagHandler.php:33
RemexStripTagHandler\endDocument
endDocument( $pos)
Definition: RemexStripTagHandler.php:21
RemexStripTagHandler\getResult
getResult()
Definition: RemexStripTagHandler.php:13