Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
CRAP | |
100.00% |
1 / 1 |
| HtmlHelper | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
4 | |
100.00% |
1 / 1 |
| modifyElements | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
4 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Html; |
| 4 | |
| 5 | use MediaWiki\Tidy\RemexCompatFormatter; |
| 6 | use Wikimedia\RemexHtml\HTMLData; |
| 7 | use Wikimedia\RemexHtml\Serializer\HtmlFormatter; |
| 8 | use Wikimedia\RemexHtml\Serializer\Serializer; |
| 9 | use Wikimedia\RemexHtml\Tokenizer\Tokenizer; |
| 10 | use Wikimedia\RemexHtml\TreeBuilder\Dispatcher; |
| 11 | use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder; |
| 12 | |
| 13 | /** |
| 14 | * Static utilities for manipulating HTML strings. |
| 15 | */ |
| 16 | class HtmlHelper { |
| 17 | |
| 18 | /** |
| 19 | * Modify elements of an HTML fragment via a user-provided callback. |
| 20 | * @param string $htmlFragment HTML fragment. Must be valid (ie. coming from the parser, not |
| 21 | * the user). |
| 22 | * @param callable $shouldModifyCallback A callback which takes a single |
| 23 | * RemexHtml\Serializer\SerializerNode argument, and returns true if it should be modified. |
| 24 | * @param callable $modifyCallback A callback which takes a single |
| 25 | * RemexHtml\Serializer\SerializerNode argument and actually performs the modification on it. |
| 26 | * It must return the new node (which can be the original node object) |
| 27 | * or a string, which is treated as the outerHTML of a replacement. |
| 28 | * @param bool $html5format Defaults to true, which uses standard HTML5 |
| 29 | * serialization for the parsed HTML. If set to false, uses a |
| 30 | * serialization which is more compatible with the output of the |
| 31 | * legacy parser; see RemexCompatFormatter for more details. |
| 32 | * When false, attributes and text nodes contain unexpanded character references (entities). |
| 33 | * @return string |
| 34 | */ |
| 35 | public static function modifyElements( |
| 36 | string $htmlFragment, |
| 37 | callable $shouldModifyCallback, |
| 38 | callable $modifyCallback, |
| 39 | bool $html5format = true |
| 40 | ) { |
| 41 | if ( $html5format ) { |
| 42 | $formatter = new class( [], $shouldModifyCallback, $modifyCallback ) extends HtmlFormatter { |
| 43 | use HtmlHelperTrait; |
| 44 | }; |
| 45 | } else { |
| 46 | $formatter = new class( [], $shouldModifyCallback, $modifyCallback ) extends RemexCompatFormatter { |
| 47 | use HtmlHelperTrait; |
| 48 | }; |
| 49 | } |
| 50 | $serializer = new Serializer( $formatter ); |
| 51 | $treeBuilder = new TreeBuilder( $serializer, $html5format ? [] : [ |
| 52 | 'ignoreErrors' => true, |
| 53 | 'ignoreNulls' => true, |
| 54 | ] ); |
| 55 | $dispatcher = new Dispatcher( $treeBuilder ); |
| 56 | $tokenizer = new Tokenizer( $dispatcher, $htmlFragment, $html5format ? [] : [ |
| 57 | // RemexCompatFormatter expects 'ignoreCharRefs' to be used (T354361). The other options are |
| 58 | // for consistency with RemexDriver and supposedly improve performance. |
| 59 | 'ignoreErrors' => true, |
| 60 | 'ignoreCharRefs' => true, |
| 61 | 'ignoreNulls' => true, |
| 62 | 'skipPreprocess' => true, |
| 63 | ] ); |
| 64 | |
| 65 | $tokenizer->execute( [ |
| 66 | 'fragmentNamespace' => HTMLData::NS_HTML, |
| 67 | 'fragmentName' => 'body', |
| 68 | ] ); |
| 69 | |
| 70 | return $serializer->getResult(); |
| 71 | } |
| 72 | |
| 73 | } |