MediaWiki  master
benchmarkSanitizer.php
Go to the documentation of this file.
1 <?php
24 
25 require_once __DIR__ . '/../includes/Benchmarker.php';
26 
33  public function __construct() {
34  parent::__construct();
35  $this->addDescription( 'Benchmark for Sanitizer methods.' );
36  $this->addOption( 'method', 'One of "validateEmail", "encodeAttribute", '
37  . '"safeEncodeAttribute", "internalRemoveHtmlTags", "removeSomeTags", "tidy", or "stripAllTags". '
38  . 'Default: (All)', false, true );
39  }
40 
41  public function execute() {
42  # text with no html simulates an interface message string or a title
43  $textWithNoHtml = 'This could be an article title';
44  $textWithHtmlSm = 'Before <wrap><in>and</in> another <unclose> <in>word</in></wrap>.';
45  $textWithHtmlLg = str_repeat(
46  // 28K (28 chars * 1000)
47  wfRandomString( 3 ) . ' <tag>' . wfRandomString( 5 ) . '</tag> ' . wfRandomString( 7 ),
48  1000
49  );
50 
51  $method = $this->getOption( 'method' );
52  $benches = [];
53 
54  if ( !$method || $method === 'validateEmail' ) {
55  $benches['Sanitizer::validateEmail (valid)'] = static function () {
56  Sanitizer::validateEmail( 'user@example.org' );
57  };
58  $benches['Sanitizer::validateEmail (invalid)'] = static function () {
59  Sanitizer::validateEmail( 'username@example! org' );
60  };
61  }
62  if ( !$method || $method === 'encodeAttribute' ) {
63  $benches['Sanitizer::encodeAttribute (simple)'] = static function () {
64  Sanitizer::encodeAttribute( 'simple' );
65  };
66  $benches['Sanitizer::encodeAttribute (special)'] = static function () {
67  Sanitizer::encodeAttribute( ":'\"\n https://example" );
68  };
69  }
70  if ( !$method || $method === 'safeEncodeAttribute' ) {
71  $benches['Sanitizer::safeEncodeAttribute (simple)'] = static function () {
72  Sanitizer::safeEncodeAttribute( 'simple' );
73  };
74  $benches['Sanitizer::safeEncodeAttribute (special)'] = static function () {
75  Sanitizer::safeEncodeAttribute( ":'\"\n https://example" );
76  };
77  }
78  if ( !$method || $method === 'internalRemoveHtmlTags' ) {
79  $tiny = strlen( $textWithNoHtml );
80  $sm = strlen( $textWithHtmlSm );
81  $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
82  $benches["Sanitizer::internalRemoveHtmlTags (input: $tiny)"] = static function () use ( $textWithNoHtml ) {
83  Sanitizer::internalRemoveHtmlTags( $textWithNoHtml );
84  };
85  $benches["Sanitizer::internalRemoveHtmlTags (input: $sm)"] = static function () use ( $textWithHtmlSm ) {
86  Sanitizer::internalRemoveHtmlTags( $textWithHtmlSm );
87  };
88  $benches["Sanitizer::internalRemoveHtmlTags (input: $lg)"] = static function () use ( $textWithHtmlLg ) {
89  Sanitizer::internalRemoveHtmlTags( $textWithHtmlLg );
90  };
91  }
92  if ( !$method || $method === 'tidy' ) {
93  # This matches what DISPLAYTITLE was previously doing to sanitize
94  # title strings
95  $tiny = strlen( $textWithNoHtml );
96  $sm = strlen( $textWithHtmlSm );
97  $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
98  $doit = static function ( $text ) {
99  return static function () use ( $text ) {
100  $tidy = new \MediaWiki\Tidy\RemexDriver(
101  new \MediaWiki\Config\ServiceOptions( [ MainConfigNames::TidyConfig ], [
102  MainConfigNames::TidyConfig => [ 'pwrap' => false ],
103  ] ) );
104  $textWithTags = $tidy->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
105  $textWithTags = Sanitizer::normalizeCharReferences(
106  Sanitizer::internalRemoveHtmlTags( $textWithTags )
107  );
108  };
109  };
110  $benches["DISPLAYTITLE tidy (input: $tiny)"] = $doit( $textWithNoHtml );
111  $benches["DISPLAYTITLE tidy (input: $sm)"] = $doit( $textWithHtmlSm );
112  $benches["DISPLAYTITLE tidy (input: $lg)"] = $doit( $textWithHtmlLg );
113  }
114  if ( !$method || $method === 'removeSomeTags' ) {
115  $tiny = strlen( $textWithNoHtml );
116  $sm = strlen( $textWithHtmlSm );
117  $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
118  $benches["Sanitizer::removeSomeTags (input: $tiny)"] = static function () use ( $textWithNoHtml ) {
119  Sanitizer::removeSomeTags( $textWithNoHtml );
120  };
121  $benches["Sanitizer::removeSomeTags (input: $sm)"] = static function () use ( $textWithHtmlSm ) {
122  Sanitizer::removeSomeTags( $textWithHtmlSm );
123  };
124  $benches["Sanitizer::removeSomeTags (input: $lg)"] = static function () use ( $textWithHtmlLg ) {
125  Sanitizer::removeSomeTags( $textWithHtmlLg );
126  };
127  }
128  if ( !$method || $method === 'stripAllTags' ) {
129  $sm = strlen( $textWithHtmlSm );
130  $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
131  $benches["Sanitizer::stripAllTags (input: $sm)"] = static function () use ( $textWithHtmlSm ) {
132  Sanitizer::stripAllTags( $textWithHtmlSm );
133  };
134  $benches["Sanitizer::stripAllTags (input: $lg)"] = static function () use ( $textWithHtmlLg ) {
135  Sanitizer::stripAllTags( $textWithHtmlLg );
136  };
137  }
138 
139  $this->bench( $benches );
140  }
141 }
142 
143 $maintClass = BenchmarkSanitizer::class;
144 require_once RUN_MAINTENANCE_IF_MAIN;
wfRandomString( $length=32)
Get a random string containing a number of pseudo-random hex characters.
Maintenance script that benchmarks Sanitizer methods.
__construct()
Default constructor.
execute()
Do the actual work.
Base class for benchmark scripts.
Definition: Benchmarker.php:40
bench(array $benchs)
Definition: Benchmarker.php:49
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
A class containing constants representing the names of configuration variables.
HTML sanitizer for MediaWiki.
Definition: Sanitizer.php:46
The MediaWiki class is the helper class for the index.php entry point.
Definition: MediaWiki.php:50