MediaWiki  master
benchmarkSanitizer.php
Go to the documentation of this file.
1 <?php
23 
24 require_once __DIR__ . '/../includes/Benchmarker.php';
25 
32  public function __construct() {
33  parent::__construct();
34  $this->addDescription( 'Benchmark for Sanitizer methods.' );
35  $this->addOption( 'method', 'One of "validateEmail", "encodeAttribute", '
36  . '"safeEncodeAttribute", "internalRemoveHtmlTags", "removeSomeTags", "tidy", or "stripAllTags". '
37  . 'Default: (All)', false, true );
38  }
39 
40  public function execute() {
41  # text with no html simulates an interface message string or a title
42  $textWithNoHtml = 'This could be an article title';
43  $textWithHtmlSm = 'Before <wrap><in>and</in> another <unclose> <in>word</in></wrap>.';
44  $textWithHtmlLg = str_repeat(
45  // 28K (28 chars * 1000)
46  wfRandomString( 3 ) . ' <tag>' . wfRandomString( 5 ) . '</tag> ' . wfRandomString( 7 ),
47  1000
48  );
49 
50  $method = $this->getOption( 'method' );
51  $benches = [];
52 
53  if ( !$method || $method === 'validateEmail' ) {
54  $benches['Sanitizer::validateEmail (valid)'] = static function () {
55  Sanitizer::validateEmail( 'user@example.org' );
56  };
57  $benches['Sanitizer::validateEmail (invalid)'] = static function () {
58  Sanitizer::validateEmail( 'username@example! org' );
59  };
60  }
61  if ( !$method || $method === 'encodeAttribute' ) {
62  $benches['Sanitizer::encodeAttribute (simple)'] = static function () {
63  Sanitizer::encodeAttribute( 'simple' );
64  };
65  $benches['Sanitizer::encodeAttribute (special)'] = static function () {
66  Sanitizer::encodeAttribute( ":'\"\n https://example" );
67  };
68  }
69  if ( !$method || $method === 'safeEncodeAttribute' ) {
70  $benches['Sanitizer::safeEncodeAttribute (simple)'] = static function () {
72  };
73  $benches['Sanitizer::safeEncodeAttribute (special)'] = static function () {
74  Sanitizer::safeEncodeAttribute( ":'\"\n https://example" );
75  };
76  }
77  if ( !$method || $method === 'internalRemoveHtmlTags' ) {
78  $tiny = strlen( $textWithNoHtml );
79  $sm = strlen( $textWithHtmlSm );
80  $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
81  $benches["Sanitizer::internalRemoveHtmlTags (input: $tiny)"] = static function () use ( $textWithNoHtml ) {
82  Sanitizer::internalRemoveHtmlTags( $textWithNoHtml );
83  };
84  $benches["Sanitizer::internalRemoveHtmlTags (input: $sm)"] = static function () use ( $textWithHtmlSm ) {
85  Sanitizer::internalRemoveHtmlTags( $textWithHtmlSm );
86  };
87  $benches["Sanitizer::internalRemoveHtmlTags (input: $lg)"] = static function () use ( $textWithHtmlLg ) {
88  Sanitizer::internalRemoveHtmlTags( $textWithHtmlLg );
89  };
90  }
91  if ( !$method || $method === 'tidy' ) {
92  # This matches what DISPLAYTITLE was previously doing to sanitize
93  # title strings
94  $tiny = strlen( $textWithNoHtml );
95  $sm = strlen( $textWithHtmlSm );
96  $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
97  $doit = static function ( $text ) {
98  return static function () use ( $text ) {
99  $tidy = new \MediaWiki\Tidy\RemexDriver(
100  new \MediaWiki\Config\ServiceOptions( [ MainConfigNames::TidyConfig ], [
101  MainConfigNames::TidyConfig => [ 'pwrap' => false ],
102  ] ) );
103  $textWithTags = $tidy->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
104  $textWithTags = Sanitizer::normalizeCharReferences(
105  Sanitizer::internalRemoveHtmlTags( $textWithTags )
106  );
107  };
108  };
109  $benches["DISPLAYTITLE tidy (input: $tiny)"] = $doit( $textWithNoHtml );
110  $benches["DISPLAYTITLE tidy (input: $sm)"] = $doit( $textWithHtmlSm );
111  $benches["DISPLAYTITLE tidy (input: $lg)"] = $doit( $textWithHtmlLg );
112  }
113  if ( !$method || $method === 'removeSomeTags' ) {
114  $tiny = strlen( $textWithNoHtml );
115  $sm = strlen( $textWithHtmlSm );
116  $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
117  $benches["Sanitizer::removeSomeTags (input: $tiny)"] = static function () use ( $textWithNoHtml ) {
118  Sanitizer::removeSomeTags( $textWithNoHtml );
119  };
120  $benches["Sanitizer::removeSomeTags (input: $sm)"] = static function () use ( $textWithHtmlSm ) {
121  Sanitizer::removeSomeTags( $textWithHtmlSm );
122  };
123  $benches["Sanitizer::removeSomeTags (input: $lg)"] = static function () use ( $textWithHtmlLg ) {
124  Sanitizer::removeSomeTags( $textWithHtmlLg );
125  };
126  }
127  if ( !$method || $method === 'stripAllTags' ) {
128  $sm = strlen( $textWithHtmlSm );
129  $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
130  $benches["Sanitizer::stripAllTags (input: $sm)"] = static function () use ( $textWithHtmlSm ) {
131  Sanitizer::stripAllTags( $textWithHtmlSm );
132  };
133  $benches["Sanitizer::stripAllTags (input: $lg)"] = static function () use ( $textWithHtmlLg ) {
134  Sanitizer::stripAllTags( $textWithHtmlLg );
135  };
136  }
137 
138  $this->bench( $benches );
139  }
140 }
141 
142 $maintClass = BenchmarkSanitizer::class;
143 require_once RUN_MAINTENANCE_IF_MAIN;
wfRandomString( $length=32)
Get a random string containing a number of pseudo-random hex characters.
Maintenance script that benchmarks Sanitizer methods.
__construct()
Default constructor.
execute()
Do the actual work.
Base class for benchmark scripts.
Definition: Benchmarker.php:40
bench(array $benchs)
Definition: Benchmarker.php:49
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
A class containing constants representing the names of configuration variables.
The MediaWiki class is the helper class for the index.php entry point.
Definition: MediaWiki.php:43
static encodeAttribute( $text)
Encode an attribute value for HTML output.
Definition: Sanitizer.php:858
static removeSomeTags(string $text, array $options=[])
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments; the result will alw...
Definition: Sanitizer.php:392
static internalRemoveHtmlTags( $text, $processCallback=null, $args=[], $extratags=[], $removetags=[])
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments; BEWARE there may be...
Definition: Sanitizer.php:317
static normalizeCharReferences( $text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1258
static stripAllTags( $html)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed,...
Definition: Sanitizer.php:1725
static validateEmail( $addr)
Does a string look like an e-mail address?
Definition: Sanitizer.php:1893
static safeEncodeAttribute( $text)
Encode an attribute value for HTML tags, with extra armoring against further wiki processing.
Definition: Sanitizer.php:900
Interface for configuration instances.
Definition: Config.php:30