MediaWiki REL1_34
bench_utf8_title_check.php
Go to the documentation of this file.
1<?php
24require_once __DIR__ . '/Benchmarker.php';
25
33 private $data;
34
35 private $isutf8;
36
37 public function __construct() {
38 parent::__construct();
39
40 // phpcs:disable Generic.Files.LineLength
41 $this->data = [
42 "",
43 "United States of America", // 7bit ASCII
44 "S%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e",
45 "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn",
46 // This comes from T38839
47 "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn%7C"
48 . "Catherine%20Willows%7CDavid%20Hodges%7CDavid%20Phillips%7CGil%20Grissom%7CGreg%20Sanders%7CHodges%7C"
49 . "Internet%20Movie%20Database%7CJim%20Brass%7CLady%20Heather%7C"
50 . "Les%20Experts%20(s%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e)%7CLes%20Experts%20:%20Manhattan%7C"
51 . "Les%20Experts%20:%20Miami%7CListe%20des%20personnages%20des%20Experts%7C"
52 . "Liste%20des%20%C3%A9pisodes%20des%20Experts%7CMod%C3%A8le%20discussion:Palette%20Les%20Experts%7C"
53 . "Nick%20Stokes%7CPersonnage%20de%20fiction%7CPersonnage%20fictif%7CPersonnage%20de%20fiction%7C"
54 . "Personnages%20r%C3%A9currents%20dans%20Les%20Experts%7CRaymond%20Langston%7CRiley%20Adams%7C"
55 . "Saison%201%20des%20Experts%7CSaison%2010%20des%20Experts%7CSaison%2011%20des%20Experts%7C"
56 . "Saison%2012%20des%20Experts%7CSaison%202%20des%20Experts%7CSaison%203%20des%20Experts%7C"
57 . "Saison%204%20des%20Experts%7CSaison%205%20des%20Experts%7CSaison%206%20des%20Experts%7C"
58 . "Saison%207%20des%20Experts%7CSaison%208%20des%20Experts%7CSaison%209%20des%20Experts%7C"
59 . "Sara%20Sidle%7CSofia%20Curtis%7CS%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e%7CWallace%20Langham%7C"
60 . "Warrick%20Brown%7CWendy%20Simms%7C%C3%89tats-Unis"
61 ];
62 // phpcs:enable
63
64 $this->addDescription( "Benchmark for using a regexp vs. mb_check_encoding " .
65 "to check for UTF-8 encoding." );
66 }
67
68 public function execute() {
69 $benchmarks = [];
70 foreach ( $this->data as $val ) {
71 $benchmarks[] = [
72 'function' => [ $this, 'use_regexp' ],
73 'args' => [ rawurldecode( $val ) ]
74 ];
75 $benchmarks[] = [
76 'function' => [ $this, 'use_regexp_non_capturing' ],
77 'args' => [ rawurldecode( $val ) ]
78 ];
79 $benchmarks[] = [
80 'function' => [ $this, 'use_regexp_once_only' ],
81 'args' => [ rawurldecode( $val ) ]
82 ];
83 $benchmarks[] = [
84 'function' => [ $this, 'use_mb_check_encoding' ],
85 'args' => [ rawurldecode( $val ) ]
86 ];
87 }
88 $this->bench( $benchmarks );
89 }
90
91 protected function use_regexp( $s ) {
92 $this->isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
93 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
94 }
95
96 protected function use_regexp_non_capturing( $s ) {
97 // Same as above with a non-capturing subgroup.
98 $this->isutf8 = preg_match( '/^(?:[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
99 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
100 }
101
102 protected function use_regexp_once_only( $s ) {
103 // Same as above with a once-only subgroup.
104 $this->isutf8 = preg_match( '/^(?>[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
105 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
106 }
107
108 protected function use_mb_check_encoding( $s ) {
109 $this->isutf8 = mb_check_encoding( $s, 'UTF-8' );
110 }
111}
112
113$maintClass = BenchUtf8TitleCheck::class;
114require_once RUN_MAINTENANCE_IF_MAIN;
const RUN_MAINTENANCE_IF_MAIN
This little benchmark executes the regexp formerly used in Language->checkTitleEncoding() and compare...
__construct()
Default constructor.
execute()
Do the actual work.
Base class for benchmark scripts.
bench(array $benchs)
addDescription( $text)
Set the description text.