MediaWiki master
benchmarkSanitizer.php
Go to the documentation of this file.
1<?php
25
26// @codeCoverageIgnoreStart
27require_once __DIR__ . '/../includes/Benchmarker.php';
28// @codeCoverageIgnoreEnd
29
36 public function __construct() {
37 parent::__construct();
38 $this->addDescription( 'Benchmark for Sanitizer methods.' );
39 $this->addOption( 'method', 'One of "validateEmail", "encodeAttribute", '
40 . '"safeEncodeAttribute", "internalRemoveHtmlTags", "removeSomeTags", "tidy", or "stripAllTags". '
41 . 'Default: (All)', false, true );
42 }
43
44 public function execute() {
45 # text with no html simulates an interface message string or a title
46 $textWithNoHtml = 'This could be an article title';
47 $textWithHtmlSm = 'Before <wrap><in>and</in> another <unclose> <in>word</in></wrap>.';
48 $textWithHtmlLg = str_repeat(
49 // 28K (28 chars * 1000)
50 wfRandomString( 3 ) . ' <tag>' . wfRandomString( 5 ) . '</tag> ' . wfRandomString( 7 ),
51 1000
52 );
53
54 $method = $this->getOption( 'method' );
55 $benches = [];
56
57 if ( !$method || $method === 'validateEmail' ) {
58 $benches['Sanitizer::validateEmail (valid)'] = static function () {
59 Sanitizer::validateEmail( 'user@example.org' );
60 };
61 $benches['Sanitizer::validateEmail (invalid)'] = static function () {
62 Sanitizer::validateEmail( 'username@example! org' );
63 };
64 }
65 if ( !$method || $method === 'encodeAttribute' ) {
66 $benches['Sanitizer::encodeAttribute (simple)'] = static function () {
67 Sanitizer::encodeAttribute( 'simple' );
68 };
69 $benches['Sanitizer::encodeAttribute (special)'] = static function () {
70 Sanitizer::encodeAttribute( ":'\"\n https://example" );
71 };
72 }
73 if ( !$method || $method === 'safeEncodeAttribute' ) {
74 $benches['Sanitizer::safeEncodeAttribute (simple)'] = static function () {
75 Sanitizer::safeEncodeAttribute( 'simple' );
76 };
77 $benches['Sanitizer::safeEncodeAttribute (special)'] = static function () {
78 Sanitizer::safeEncodeAttribute( ":'\"\n https://example" );
79 };
80 }
81 if ( !$method || $method === 'internalRemoveHtmlTags' ) {
82 $tiny = strlen( $textWithNoHtml );
83 $sm = strlen( $textWithHtmlSm );
84 $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
85 $benches["Sanitizer::internalRemoveHtmlTags (input: $tiny)"] = static function () use ( $textWithNoHtml ) {
86 Sanitizer::internalRemoveHtmlTags( $textWithNoHtml );
87 };
88 $benches["Sanitizer::internalRemoveHtmlTags (input: $sm)"] = static function () use ( $textWithHtmlSm ) {
89 Sanitizer::internalRemoveHtmlTags( $textWithHtmlSm );
90 };
91 $benches["Sanitizer::internalRemoveHtmlTags (input: $lg)"] = static function () use ( $textWithHtmlLg ) {
92 Sanitizer::internalRemoveHtmlTags( $textWithHtmlLg );
93 };
94 }
95 if ( !$method || $method === 'tidy' ) {
96 # This matches what DISPLAYTITLE was previously doing to sanitize
97 # title strings
98 $tiny = strlen( $textWithNoHtml );
99 $sm = strlen( $textWithHtmlSm );
100 $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
101 $doit = static function ( $text ) {
102 return static function () use ( $text ) {
103 $tidy = new \MediaWiki\Tidy\RemexDriver(
104 new \MediaWiki\Config\ServiceOptions( [ MainConfigNames::TidyConfig ], [
105 MainConfigNames::TidyConfig => [ 'pwrap' => false ],
106 ] ) );
107 $textWithTags = $tidy->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
108 $textWithTags = Sanitizer::normalizeCharReferences(
109 Sanitizer::internalRemoveHtmlTags( $textWithTags )
110 );
111 };
112 };
113 $benches["DISPLAYTITLE tidy (input: $tiny)"] = $doit( $textWithNoHtml );
114 $benches["DISPLAYTITLE tidy (input: $sm)"] = $doit( $textWithHtmlSm );
115 $benches["DISPLAYTITLE tidy (input: $lg)"] = $doit( $textWithHtmlLg );
116 }
117 if ( !$method || $method === 'removeSomeTags' ) {
118 $tiny = strlen( $textWithNoHtml );
119 $sm = strlen( $textWithHtmlSm );
120 $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
121 $benches["Sanitizer::removeSomeTags (input: $tiny)"] = static function () use ( $textWithNoHtml ) {
122 Sanitizer::removeSomeTags( $textWithNoHtml );
123 };
124 $benches["Sanitizer::removeSomeTags (input: $sm)"] = static function () use ( $textWithHtmlSm ) {
125 Sanitizer::removeSomeTags( $textWithHtmlSm );
126 };
127 $benches["Sanitizer::removeSomeTags (input: $lg)"] = static function () use ( $textWithHtmlLg ) {
128 Sanitizer::removeSomeTags( $textWithHtmlLg );
129 };
130 }
131 if ( !$method || $method === 'stripAllTags' ) {
132 $sm = strlen( $textWithHtmlSm );
133 $lg = round( strlen( $textWithHtmlLg ) / 1000 ) . 'K';
134 $benches["Sanitizer::stripAllTags (input: $sm)"] = static function () use ( $textWithHtmlSm ) {
135 Sanitizer::stripAllTags( $textWithHtmlSm );
136 };
137 $benches["Sanitizer::stripAllTags (input: $lg)"] = static function () use ( $textWithHtmlLg ) {
138 Sanitizer::stripAllTags( $textWithHtmlLg );
139 };
140 }
141
142 $this->bench( $benches );
143 }
144}
145
146// @codeCoverageIgnoreStart
147$maintClass = BenchmarkSanitizer::class;
148require_once RUN_MAINTENANCE_IF_MAIN;
149// @codeCoverageIgnoreEnd
wfRandomString( $length=32)
Get a random string containing a number of pseudo-random hex characters.
Maintenance script that benchmarks Sanitizer methods.
__construct()
Default constructor.
execute()
Do the actual work.
A class containing constants representing the names of configuration variables.
Base class for benchmark scripts.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
addDescription( $text)
Set the description text.
HTML sanitizer for MediaWiki.
Definition Sanitizer.php:46
A helper class for throttling authentication attempts.