MediaWiki  master
RemexDriver.php
Go to the documentation of this file.
1 <?php
2 
3 namespace MediaWiki\Tidy;
4 
7 use Wikimedia\RemexHtml\HTMLData;
8 use Wikimedia\RemexHtml\Serializer\Serializer;
9 use Wikimedia\RemexHtml\Serializer\SerializerWithTracer;
10 use Wikimedia\RemexHtml\Tokenizer\Tokenizer;
11 use Wikimedia\RemexHtml\TreeBuilder\Dispatcher;
12 use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder;
13 use Wikimedia\RemexHtml\TreeBuilder\TreeMutationTracer;
14 
15 class RemexDriver extends TidyDriverBase {
16  private $treeMutationTrace;
17  private $serializerTrace;
18  private $mungerTrace;
19  private $pwrap;
20  private $enableLegacyMediaDOM;
21 
23  public const CONSTRUCTOR_OPTIONS = [
26  ];
27 
31  public function __construct( ServiceOptions $options ) {
32  $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
34  $this->enableLegacyMediaDOM = $options->get( MainConfigNames::ParserEnableLegacyMediaDOM );
35  $config += [
36  'treeMutationTrace' => false,
37  'serializerTrace' => false,
38  'mungerTrace' => false,
39  'pwrap' => true
40  ];
41  $this->treeMutationTrace = $config['treeMutationTrace'];
42  $this->serializerTrace = $config['serializerTrace'];
43  $this->mungerTrace = $config['mungerTrace'];
44  $this->pwrap = $config['pwrap'];
45  parent::__construct( $config );
46  }
47 
49  public function tidy( $text, ?callable $textProcessor = null ) {
50  $traceCallback = static function ( $msg ) {
51  wfDebug( "RemexHtml: $msg" );
52  };
53  $formatter = new RemexCompatFormatter( [ 'textProcessor' => $textProcessor ] );
54  if ( $this->serializerTrace ) {
55  $serializer = new SerializerWithTracer( $formatter, null, $traceCallback );
56  } else {
57  $serializer = new Serializer( $formatter );
58  }
59  if ( $this->pwrap ) {
60  $munger = new RemexCompatMunger( $serializer, $this->mungerTrace );
61  } else {
62  $munger = $serializer;
63  }
64  if ( $this->treeMutationTrace ) {
65  $tracer = new TreeMutationTracer( $munger, $traceCallback );
66  } else {
67  $tracer = $munger;
68  }
69  $treeBuilderClass = $this->enableLegacyMediaDOM ? TreeBuilder::class : RemexCompatBuilder::class;
70  $treeBuilder = new $treeBuilderClass( $tracer, [
71  'ignoreErrors' => true,
72  'ignoreNulls' => true,
73  ] );
74  $dispatcher = new Dispatcher( $treeBuilder );
75  $tokenizer = new Tokenizer( $dispatcher, $text, [
76  'ignoreErrors' => true,
77  'ignoreCharRefs' => true,
78  'ignoreNulls' => true,
79  'skipPreprocess' => true,
80  ] );
81 
82  $tokenizer->execute( [
83  'fragmentNamespace' => HTMLData::NS_HTML,
84  'fragmentName' => 'body'
85  ] );
86  return $serializer->getResult();
87  }
88 }
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
A class for passing options to services.
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
A class containing constants representing the names of configuration variables.
const TidyConfig
Name constant for the TidyConfig setting, for use with Config::get()
const ParserEnableLegacyMediaDOM
Name constant for the ParserEnableLegacyMediaDOM setting, for use with Config::get()
__construct(ServiceOptions $options)
Definition: RemexDriver.php:31
tidy( $text, ?callable $textProcessor=null)
Clean up HTML.HTML document fragment to clean up $textProcessor A callback to run on the contents of ...
Definition: RemexDriver.php:49
Base class for HTML cleanup utilities.
return true
Definition: router.php:90