MediaWiki REL1_39
RemexDriver.php
Go to the documentation of this file.
1<?php
2
3namespace MediaWiki\Tidy;
4
7use Wikimedia\RemexHtml\HTMLData;
8use Wikimedia\RemexHtml\Serializer\Serializer;
9use Wikimedia\RemexHtml\Serializer\SerializerWithTracer;
10use Wikimedia\RemexHtml\Tokenizer\Tokenizer;
11use Wikimedia\RemexHtml\TreeBuilder\Dispatcher;
12use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder;
13use Wikimedia\RemexHtml\TreeBuilder\TreeMutationTracer;
14
16 private $treeMutationTrace;
17 private $serializerTrace;
18 private $mungerTrace;
19 private $pwrap;
20 private $enableLegacyMediaDOM;
21
23 public const CONSTRUCTOR_OPTIONS = [
26 ];
27
31 public function __construct( $options ) {
32 if ( is_array( $options ) ) {
33 wfDeprecated( __METHOD__ . " with array argument", '1.36' );
34 $config = $options;
35 } else {
36 $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
37 $config = $options->get( MainConfigNames::TidyConfig );
38 $this->enableLegacyMediaDOM = $options->get( MainConfigNames::ParserEnableLegacyMediaDOM );
39 }
40 $config += [
41 'treeMutationTrace' => false,
42 'serializerTrace' => false,
43 'mungerTrace' => false,
44 'pwrap' => true
45 ];
46 $this->treeMutationTrace = $config['treeMutationTrace'];
47 $this->serializerTrace = $config['serializerTrace'];
48 $this->mungerTrace = $config['mungerTrace'];
49 $this->pwrap = $config['pwrap'];
50 parent::__construct( $config );
51 }
52
54 public function tidy( $text, ?callable $textProcessor = null ) {
55 $traceCallback = static function ( $msg ) {
56 wfDebug( "RemexHtml: $msg" );
57 };
58 $formatter = new RemexCompatFormatter( [ 'textProcessor' => $textProcessor ] );
59 if ( $this->serializerTrace ) {
60 $serializer = new SerializerWithTracer( $formatter, null, $traceCallback );
61 } else {
62 $serializer = new Serializer( $formatter );
63 }
64 if ( $this->pwrap ) {
65 $munger = new RemexCompatMunger( $serializer, $this->mungerTrace );
66 } else {
67 $munger = $serializer;
68 }
69 if ( $this->treeMutationTrace ) {
70 $tracer = new TreeMutationTracer( $munger, $traceCallback );
71 } else {
72 $tracer = $munger;
73 }
74 $treeBuilderClass = $this->enableLegacyMediaDOM ? TreeBuilder::class : RemexCompatBuilder::class;
75 $treeBuilder = new $treeBuilderClass( $tracer, [
76 'ignoreErrors' => true,
77 'ignoreNulls' => true,
78 ] );
79 $dispatcher = new Dispatcher( $treeBuilder );
80 $tokenizer = new Tokenizer( $dispatcher, $text, [
81 'ignoreErrors' => true,
82 'ignoreCharRefs' => true,
83 'ignoreNulls' => true,
84 'skipPreprocess' => true,
85 ] );
86
87 $tokenizer->execute( [
88 'fragmentNamespace' => HTMLData::NS_HTML,
89 'fragmentName' => 'body'
90 ] );
91 return $serializer->getResult();
92 }
93}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
A class for passing options to services.
A class containing constants representing the names of configuration variables.
const TidyConfig
Name constant for the TidyConfig setting, for use with Config::get()
const ParserEnableLegacyMediaDOM
Name constant for the ParserEnableLegacyMediaDOM setting, for use with Config::get()
tidy( $text, ?callable $textProcessor=null)
Clean up HTML.string The corrected HTML output
Base class for HTML cleanup utilities.
return true
Definition router.php:92