MediaWiki master
RemexDriver.php
Go to the documentation of this file.
1<?php
2
3namespace MediaWiki\Tidy;
4
7use Wikimedia\RemexHtml\HTMLData;
8use Wikimedia\RemexHtml\Serializer\Serializer;
9use Wikimedia\RemexHtml\Serializer\SerializerWithTracer;
10use Wikimedia\RemexHtml\Tokenizer\Tokenizer;
11use Wikimedia\RemexHtml\TreeBuilder\Dispatcher;
12use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder;
13use Wikimedia\RemexHtml\TreeBuilder\TreeMutationTracer;
14
17 private $treeMutationTrace;
19 private $serializerTrace;
21 private $mungerTrace;
23 private $pwrap;
25 private $enableLegacyMediaDOM;
26
28 public const CONSTRUCTOR_OPTIONS = [
31 ];
32
33 public function __construct( ServiceOptions $options ) {
34 $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
36 $this->enableLegacyMediaDOM = $options->get( MainConfigNames::ParserEnableLegacyMediaDOM );
37 $config += [
38 'treeMutationTrace' => false,
39 'serializerTrace' => false,
40 'mungerTrace' => false,
41 'pwrap' => true
42 ];
43 $this->treeMutationTrace = $config['treeMutationTrace'];
44 $this->serializerTrace = $config['serializerTrace'];
45 $this->mungerTrace = $config['mungerTrace'];
46 $this->pwrap = $config['pwrap'];
47 parent::__construct( $config );
48 }
49
51 public function tidy( $text, ?callable $textProcessor = null ) {
52 $traceCallback = static function ( $msg ) {
53 wfDebug( "RemexHtml: $msg" );
54 };
55 $formatter = new RemexCompatFormatter( [ 'textProcessor' => $textProcessor ] );
56 if ( $this->serializerTrace ) {
57 $serializer = new SerializerWithTracer( $formatter, null, $traceCallback );
58 } else {
59 $serializer = new Serializer( $formatter );
60 }
61 if ( $this->pwrap ) {
62 $munger = new RemexCompatMunger( $serializer, $this->mungerTrace );
63 } else {
64 $munger = $serializer;
65 }
66 if ( $this->treeMutationTrace ) {
67 $tracer = new TreeMutationTracer( $munger, $traceCallback );
68 } else {
69 $tracer = $munger;
70 }
71 $treeBuilderClass = $this->enableLegacyMediaDOM ? TreeBuilder::class : RemexCompatBuilder::class;
72 $treeBuilder = new $treeBuilderClass( $tracer, [
73 'ignoreErrors' => true,
74 'ignoreNulls' => true,
75 ] );
76 $dispatcher = new Dispatcher( $treeBuilder );
77 $tokenizer = new Tokenizer( $dispatcher, $text, [
78 'ignoreErrors' => true,
79 'ignoreCharRefs' => true,
80 'ignoreNulls' => true,
81 'skipPreprocess' => true,
82 ] );
83
84 $tokenizer->execute( [
85 'fragmentNamespace' => HTMLData::NS_HTML,
86 'fragmentName' => 'body'
87 ] );
88 return $serializer->getResult();
89 }
90}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
A class for passing options to services.
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
A class containing constants representing the names of configuration variables.
const TidyConfig
Name constant for the TidyConfig setting, for use with Config::get()
const ParserEnableLegacyMediaDOM
Name constant for the ParserEnableLegacyMediaDOM setting, for use with Config::get()
__construct(ServiceOptions $options)
tidy( $text, ?callable $textProcessor=null)
Clean up HTML.string The corrected HTML output
Base class for HTML cleanup utilities.