MediaWiki  master
bench_utf8_title_check.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/../includes/Benchmarker.php';
25 
33  private $data;
34 
35  private $isutf8;
36 
37  public function __construct() {
38  parent::__construct();
39 
40  // phpcs:disable Generic.Files.LineLength
41  $this->data = [
42  "",
43  // 7bit ASCII
44  "United States of America",
45  "S%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e",
46  "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn",
47  // This comes from T38839
48  "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn%7C"
49  . "Catherine%20Willows%7CDavid%20Hodges%7CDavid%20Phillips%7CGil%20Grissom%7CGreg%20Sanders%7CHodges%7C"
50  . "Internet%20Movie%20Database%7CJim%20Brass%7CLady%20Heather%7C"
51  . "Les%20Experts%20(s%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e)%7CLes%20Experts%20:%20Manhattan%7C"
52  . "Les%20Experts%20:%20Miami%7CListe%20des%20personnages%20des%20Experts%7C"
53  . "Liste%20des%20%C3%A9pisodes%20des%20Experts%7CMod%C3%A8le%20discussion:Palette%20Les%20Experts%7C"
54  . "Nick%20Stokes%7CPersonnage%20de%20fiction%7CPersonnage%20fictif%7CPersonnage%20de%20fiction%7C"
55  . "Personnages%20r%C3%A9currents%20dans%20Les%20Experts%7CRaymond%20Langston%7CRiley%20Adams%7C"
56  . "Saison%201%20des%20Experts%7CSaison%2010%20des%20Experts%7CSaison%2011%20des%20Experts%7C"
57  . "Saison%2012%20des%20Experts%7CSaison%202%20des%20Experts%7CSaison%203%20des%20Experts%7C"
58  . "Saison%204%20des%20Experts%7CSaison%205%20des%20Experts%7CSaison%206%20des%20Experts%7C"
59  . "Saison%207%20des%20Experts%7CSaison%208%20des%20Experts%7CSaison%209%20des%20Experts%7C"
60  . "Sara%20Sidle%7CSofia%20Curtis%7CS%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e%7CWallace%20Langham%7C"
61  . "Warrick%20Brown%7CWendy%20Simms%7C%C3%89tats-Unis"
62  ];
63  // phpcs:enable
64 
65  $this->addDescription( "Benchmark for using a regexp vs. mb_check_encoding " .
66  "to check for UTF-8 encoding." );
67  }
68 
69  public function execute() {
70  $benchmarks = [];
71  foreach ( $this->data as $val ) {
72  $benchmarks[] = [
73  'function' => [ $this, 'use_regexp' ],
74  'args' => [ rawurldecode( $val ) ]
75  ];
76  $benchmarks[] = [
77  'function' => [ $this, 'use_regexp_non_capturing' ],
78  'args' => [ rawurldecode( $val ) ]
79  ];
80  $benchmarks[] = [
81  'function' => [ $this, 'use_regexp_once_only' ],
82  'args' => [ rawurldecode( $val ) ]
83  ];
84  $benchmarks[] = [
85  'function' => [ $this, 'use_mb_check_encoding' ],
86  'args' => [ rawurldecode( $val ) ]
87  ];
88  }
89  $this->bench( $benchmarks );
90  }
91 
92  protected function use_regexp( $s ) {
93  $this->isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
94  '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
95  }
96 
97  protected function use_regexp_non_capturing( $s ) {
98  // Same as above with a non-capturing subgroup.
99  $this->isutf8 = preg_match( '/^(?:[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
100  '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
101  }
102 
103  protected function use_regexp_once_only( $s ) {
104  // Same as above with a once-only subgroup.
105  $this->isutf8 = preg_match( '/^(?>[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
106  '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
107  }
108 
109  protected function use_mb_check_encoding( $s ) {
110  $this->isutf8 = mb_check_encoding( $s, 'UTF-8' );
111  }
112 }
113 
114 $maintClass = BenchUtf8TitleCheck::class;
115 require_once RUN_MAINTENANCE_IF_MAIN;
RUN_MAINTENANCE_IF_MAIN
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:38
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:323
BenchUtf8TitleCheck\$isutf8
$isutf8
Definition: bench_utf8_title_check.php:35
$s
$s
Definition: mergeMessageFileList.php:185
Benchmarker\bench
bench(array $benchs)
Definition: Benchmarker.php:51
BenchUtf8TitleCheck\use_regexp
use_regexp( $s)
Definition: bench_utf8_title_check.php:92
$maintClass
$maintClass
Definition: bench_utf8_title_check.php:114
BenchUtf8TitleCheck\use_regexp_non_capturing
use_regexp_non_capturing( $s)
Definition: bench_utf8_title_check.php:97
BenchUtf8TitleCheck\$data
$data
Definition: bench_utf8_title_check.php:33
BenchUtf8TitleCheck\__construct
__construct()
Default constructor.
Definition: bench_utf8_title_check.php:37
BenchUtf8TitleCheck\execute
execute()
Do the actual work.
Definition: bench_utf8_title_check.php:69
Benchmarker
Base class for benchmark scripts.
Definition: Benchmarker.php:41
BenchUtf8TitleCheck\use_regexp_once_only
use_regexp_once_only( $s)
Definition: bench_utf8_title_check.php:103
BenchUtf8TitleCheck
This little benchmark executes the regexp formerly used in Language->checkTitleEncoding() and compare...
Definition: bench_utf8_title_check.php:32
BenchUtf8TitleCheck\use_mb_check_encoding
use_mb_check_encoding( $s)
Definition: bench_utf8_title_check.php:109