MediaWiki  master
RemexDriver.php
Go to the documentation of this file.
1 <?php
2 
3 namespace MediaWiki\Tidy;
4 
6 use Wikimedia\RemexHtml\HTMLData;
7 use Wikimedia\RemexHtml\Serializer\Serializer;
8 use Wikimedia\RemexHtml\Serializer\SerializerWithTracer;
9 use Wikimedia\RemexHtml\Tokenizer\Tokenizer;
10 use Wikimedia\RemexHtml\TreeBuilder\Dispatcher;
11 use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder;
12 use Wikimedia\RemexHtml\TreeBuilder\TreeMutationTracer;
13 
14 class RemexDriver extends TidyDriverBase {
17  private $mungerTrace;
18  private $pwrap;
19 
21  public const CONSTRUCTOR_OPTIONS = [
22  'TidyConfig',
23  ];
24 
28  public function __construct( $options ) {
29  if ( is_array( $options ) ) {
30  wfDeprecated( __METHOD__ . " with array argument", '1.36' );
31  $config = $options;
32  } else {
33  $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
34  $config = $options->get( 'TidyConfig' );
35  }
36  $config += [
37  'treeMutationTrace' => false,
38  'serializerTrace' => false,
39  'mungerTrace' => false,
40  'pwrap' => true
41  ];
42  $this->treeMutationTrace = $config['treeMutationTrace'];
43  $this->serializerTrace = $config['serializerTrace'];
44  $this->mungerTrace = $config['mungerTrace'];
45  $this->pwrap = $config['pwrap'];
46  parent::__construct( $config );
47  }
48 
50  public function tidy( $text, ?callable $textProcessor = null ) {
51  $traceCallback = static function ( $msg ) {
52  wfDebug( "RemexHtml: $msg" );
53  };
54  $formatter = new RemexCompatFormatter( [ 'textProcessor' => $textProcessor ] );
55  if ( $this->serializerTrace ) {
56  $serializer = new SerializerWithTracer( $formatter, null, $traceCallback );
57  } else {
58  $serializer = new Serializer( $formatter );
59  }
60  if ( $this->pwrap ) {
61  $munger = new RemexCompatMunger( $serializer, $this->mungerTrace );
62  } else {
63  $munger = $serializer;
64  }
65  if ( $this->treeMutationTrace ) {
66  $tracer = new TreeMutationTracer( $munger, $traceCallback );
67  } else {
68  $tracer = $munger;
69  }
70  $treeBuilder = new TreeBuilder( $tracer, [
71  'ignoreErrors' => true,
72  'ignoreNulls' => true,
73  ] );
74  $dispatcher = new Dispatcher( $treeBuilder );
75  $tokenizer = new Tokenizer( $dispatcher, $text, [
76  'ignoreErrors' => true,
77  'ignoreCharRefs' => true,
78  'ignoreNulls' => true,
79  'skipPreprocess' => true,
80  ] );
81 
82  $tokenizer->execute( [
83  'fragmentNamespace' => HTMLData::NS_HTML,
84  'fragmentName' => 'body'
85  ] );
86  return $serializer->getResult();
87  }
88 }
MediaWiki\Tidy\RemexCompatMunger
Definition: RemexCompatMunger.php:17
true
return true
Definition: router.php:90
MediaWiki\Tidy\RemexDriver\$treeMutationTrace
$treeMutationTrace
Definition: RemexDriver.php:15
MediaWiki\Tidy\RemexCompatFormatter
Definition: RemexCompatFormatter.php:13
MediaWiki\Tidy\RemexDriver\tidy
tidy( $text, ?callable $textProcessor=null)
Clean up HTML.HTML document fragment to clean up $textProcessor A callback to run on the contents of ...
Definition: RemexDriver.php:50
MediaWiki\Tidy\RemexDriver\$pwrap
$pwrap
Definition: RemexDriver.php:18
MediaWiki\Tidy\RemexDriver
Definition: RemexDriver.php:14
MediaWiki\Config\ServiceOptions
A class for passing options to services.
Definition: ServiceOptions.php:27
wfDeprecated
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
Definition: GlobalFunctions.php:997
MediaWiki\Tidy\RemexDriver\$serializerTrace
$serializerTrace
Definition: RemexDriver.php:16
MediaWiki\Tidy\RemexDriver\$mungerTrace
$mungerTrace
Definition: RemexDriver.php:17
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:894
MediaWiki\Tidy\TidyDriverBase
Base class for HTML cleanup utilities.
Definition: TidyDriverBase.php:8
MediaWiki\Tidy\RemexDriver\__construct
__construct( $options)
Definition: RemexDriver.php:28
MediaWiki\Tidy\RemexDriver\CONSTRUCTOR_OPTIONS
const CONSTRUCTOR_OPTIONS
Definition: RemexDriver.php:21
MediaWiki\Tidy\TidyDriverBase\$config
$config
Definition: TidyDriverBase.php:9
MediaWiki\Tidy
Definition: RemexCompatFormatter.php:3