MediaWiki master
benchmarkSanitizer.php
Go to the documentation of this file.
1<?php
24
25// @codeCoverageIgnoreStart
26require_once __DIR__ . '/../includes/Benchmarker.php';
27// @codeCoverageIgnoreEnd
28
35 public function __construct() {
36 parent::__construct();
37 $this->addDescription( 'Benchmark for Sanitizer methods.' );
38 $this->addOption( 'method', 'One of "validateEmail", "encodeAttribute", '
39 . '"safeEncodeAttribute", "internalRemoveHtmlTags", "removeSomeTags", "tidy", or "stripAllTags". '
40 . 'Default: (All)', false, true );
41 }
42
43 public function execute() {
44 # text with no html simulates an interface message string or a title
45 $textWithNoHtml = 'This could be an article title';
46 $textWithHtmlSm = 'Before <wrap><in>and</in> another <unclose> <in>word</in></wrap>.';
47 $textWithHtmlLg = str_repeat(
48 // 28K (28 chars * 1000)
49 wfRandomString( 3 ) . ' <tag>' . wfRandomString( 5 ) . '</tag> ' . wfRandomString( 7 ),
50 1000
51 );
52
53 $method = $this->getOption( 'method' );
54 $benches = [];
55
56 if ( !$method || $method === 'validateEmail' ) {
57 $benches['Sanitizer::validateEmail (valid)'] = static function () {
58 Sanitizer::validateEmail( 'user@example.org' );
59 };
60 $benches['Sanitizer::validateEmail (invalid)'] = static function () {
61 Sanitizer::validateEmail( 'username@example! org' );
62 };
63 }
64 if ( !$method || $method === 'encodeAttribute' ) {
65 $benches['Sanitizer::encodeAttribute (simple)'] = static function () {
66 Sanitizer::encodeAttribute( 'simple' );
67 };
68 $benches['Sanitizer::encodeAttribute (special)'] = static function () {
69 Sanitizer::encodeAttribute( ":'\"\n https://example" );
70 };
71 }
72 if ( !$method || $method === 'safeEncodeAttribute' ) {
73 $benches['Sanitizer::safeEncodeAttribute (simple)'] = static function () {
74 Sanitizer::safeEncodeAttribute( 'simple' );
75 };
76 $benches['Sanitizer::safeEncodeAttribute (special)'] = static function () {
77 Sanitizer::safeEncodeAttribute( ":'\"\n https://example" );
78 };
79 }
80 if ( !$method || $method === 'internalRemoveHtmlTags' ) {
81 $tiny = strlen( $textWithNoHtml );
82 $sm = strlen( $textWithHtmlSm );
83 $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
84 $benches["Sanitizer::internalRemoveHtmlTags (input: $tiny)"] = static function () use ( $textWithNoHtml ) {
85 Sanitizer::internalRemoveHtmlTags( $textWithNoHtml );
86 };
87 $benches["Sanitizer::internalRemoveHtmlTags (input: $sm)"] = static function () use ( $textWithHtmlSm ) {
88 Sanitizer::internalRemoveHtmlTags( $textWithHtmlSm );
89 };
90 $benches["Sanitizer::internalRemoveHtmlTags (input: $lg)"] = static function () use ( $textWithHtmlLg ) {
91 Sanitizer::internalRemoveHtmlTags( $textWithHtmlLg );
92 };
93 }
94 if ( !$method || $method === 'tidy' ) {
95 # This matches what DISPLAYTITLE was previously doing to sanitize
96 # title strings
97 $tiny = strlen( $textWithNoHtml );
98 $sm = strlen( $textWithHtmlSm );
99 $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
100 $doit = static function ( $text ) {
101 return static function () use ( $text ) {
102 $tidy = new \MediaWiki\Tidy\RemexDriver(
103 new \MediaWiki\Config\ServiceOptions( [ MainConfigNames::TidyConfig ], [
104 MainConfigNames::TidyConfig => [ 'pwrap' => false ],
105 ] ) );
106 $textWithTags = $tidy->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
107 $textWithTags = Sanitizer::normalizeCharReferences(
108 Sanitizer::internalRemoveHtmlTags( $textWithTags )
109 );
110 };
111 };
112 $benches["DISPLAYTITLE tidy (input: $tiny)"] = $doit( $textWithNoHtml );
113 $benches["DISPLAYTITLE tidy (input: $sm)"] = $doit( $textWithHtmlSm );
114 $benches["DISPLAYTITLE tidy (input: $lg)"] = $doit( $textWithHtmlLg );
115 }
116 if ( !$method || $method === 'removeSomeTags' ) {
117 $tiny = strlen( $textWithNoHtml );
118 $sm = strlen( $textWithHtmlSm );
119 $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
120 $benches["Sanitizer::removeSomeTags (input: $tiny)"] = static function () use ( $textWithNoHtml ) {
121 Sanitizer::removeSomeTags( $textWithNoHtml );
122 };
123 $benches["Sanitizer::removeSomeTags (input: $sm)"] = static function () use ( $textWithHtmlSm ) {
124 Sanitizer::removeSomeTags( $textWithHtmlSm );
125 };
126 $benches["Sanitizer::removeSomeTags (input: $lg)"] = static function () use ( $textWithHtmlLg ) {
127 Sanitizer::removeSomeTags( $textWithHtmlLg );
128 };
129 }
130 if ( !$method || $method === 'stripAllTags' ) {
131 $sm = strlen( $textWithHtmlSm );
132 $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
133 $benches["Sanitizer::stripAllTags (input: $sm)"] = static function () use ( $textWithHtmlSm ) {
134 Sanitizer::stripAllTags( $textWithHtmlSm );
135 };
136 $benches["Sanitizer::stripAllTags (input: $lg)"] = static function () use ( $textWithHtmlLg ) {
137 Sanitizer::stripAllTags( $textWithHtmlLg );
138 };
139 }
140
141 $this->bench( $benches );
142 }
143}
144
145// @codeCoverageIgnoreStart
146$maintClass = BenchmarkSanitizer::class;
147require_once RUN_MAINTENANCE_IF_MAIN;
148// @codeCoverageIgnoreEnd
wfRandomString( $length=32)
Get a random string containing a number of pseudo-random hex characters.
Maintenance script that benchmarks Sanitizer methods.
__construct()
Default constructor.
execute()
Do the actual work.
Base class for benchmark scripts.
bench(array $benchs)
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
A class containing constants representing the names of configuration variables.
HTML sanitizer for MediaWiki.
Definition Sanitizer.php:46
A helper class for throttling authentication attempts.