MediaWiki  master
RemexStripTagHandler.php
Go to the documentation of this file.
1 <?php
2 
3 use RemexHtml\Tokenizer\Attributes;
4 use RemexHtml\Tokenizer\NullTokenHandler;
5 
9 class RemexStripTagHandler extends NullTokenHandler {
10  private $text = '';
11 
12  public function getResult() {
13  return $this->text;
14  }
15 
16  public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
17  $this->text .= substr( $text, $start, $length );
18  }
19 
20  public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
21  // Inject whitespace for typical block-level tags to
22  // prevent merging unrelated<br>words.
23  if ( $this->isBlockLevelTag( $name ) ) {
24  $this->text .= ' ';
25  }
26  }
27 
28  public function endTag( $name, $sourceStart, $sourceLength ) {
29  // Inject whitespace for typical block-level tags to
30  // prevent merging unrelated<br>words.
31  if ( $this->isBlockLevelTag( $name ) ) {
32  $this->text .= ' ';
33  }
34  }
35 
36  // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
37  // retrieved on sept 12, 2018. <br> is not block level but was added anyways.
38  // The following is a complete list of all HTML block level elements
39  // (although "block-level" is not technically defined for elements that are
40  // new in HTML5).
41  // Structured as tag => true to allow O(1) membership test.
42  private const BLOCK_LEVEL_TAGS = [
43  'address' => true,
44  'article' => true,
45  'aside' => true,
46  'blockquote' => true,
47  'br' => true,
48  'canvas' => true,
49  'dd' => true,
50  'div' => true,
51  'dl' => true,
52  'dt' => true,
53  'fieldset' => true,
54  'figcaption' => true,
55  'figure' => true,
56  'footer' => true,
57  'form' => true,
58  'h1' => true,
59  'h2' => true,
60  'h3' => true,
61  'h4' => true,
62  'h5' => true,
63  'h6' => true,
64  'header' => true,
65  'hgroup' => true,
66  'hr' => true,
67  'li' => true,
68  'main' => true,
69  'nav' => true,
70  'noscript' => true,
71  'ol' => true,
72  'output' => true,
73  'p' => true,
74  'pre' => true,
75  'section' => true,
76  'table' => true,
77  'td' => true,
78  'tfoot' => true,
79  'th' => true,
80  'tr' => true,
81  'ul' => true,
82  'video' => true,
83  ];
84 
92  private function isBlockLevelTag( $tagName ) {
93  $key = strtolower( trim( $tagName ) );
94  return isset( self::BLOCK_LEVEL_TAGS[$key] );
95  }
96 }
RemexStripTagHandler\isBlockLevelTag
isBlockLevelTag( $tagName)
Detect block level tags.
Definition: RemexStripTagHandler.php:92
RemexStripTagHandler\endTag
endTag( $name, $sourceStart, $sourceLength)
Definition: RemexStripTagHandler.php:28
RemexStripTagHandler\BLOCK_LEVEL_TAGS
const BLOCK_LEVEL_TAGS
Definition: RemexStripTagHandler.php:42
RemexStripTagHandler
Definition: RemexStripTagHandler.php:9
RemexStripTagHandler\characters
characters( $text, $start, $length, $sourceStart, $sourceLength)
Definition: RemexStripTagHandler.php:16
RemexStripTagHandler\$text
$text
Definition: RemexStripTagHandler.php:10
RemexStripTagHandler\startTag
startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength)
Definition: RemexStripTagHandler.php:20
RemexStripTagHandler\getResult
getResult()
Definition: RemexStripTagHandler.php:12