MediaWiki REL1_33
bench_utf8_title_check.php
Go to the documentation of this file.
1<?php
24require_once __DIR__ . '/Benchmarker.php';
25
33 private $data;
34
35 private $isutf8;
36
37 public function __construct() {
38 parent::__construct();
39
40 // phpcs:disable Generic.Files.LineLength
41 $this->data = [
42 "",
43 "United States of America", // 7bit ASCII
44 "S%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e",
45 "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn",
46 // This comes from T38839
47 "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn%7C"
48 . "Catherine%20Willows%7CDavid%20Hodges%7CDavid%20Phillips%7CGil%20Grissom%7CGreg%20Sanders%7CHodges%7C"
49 . "Internet%20Movie%20Database%7CJim%20Brass%7CLady%20Heather%7C"
50 . "Les%20Experts%20(s%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e)%7CLes%20Experts%20:%20Manhattan%7C"
51 . "Les%20Experts%20:%20Miami%7CListe%20des%20personnages%20des%20Experts%7C"
52 . "Liste%20des%20%C3%A9pisodes%20des%20Experts%7CMod%C3%A8le%20discussion:Palette%20Les%20Experts%7C"
53 . "Nick%20Stokes%7CPersonnage%20de%20fiction%7CPersonnage%20fictif%7CPersonnage%20de%20fiction%7C"
54 . "Personnages%20r%C3%A9currents%20dans%20Les%20Experts%7CRaymond%20Langston%7CRiley%20Adams%7C"
55 . "Saison%201%20des%20Experts%7CSaison%2010%20des%20Experts%7CSaison%2011%20des%20Experts%7C"
56 . "Saison%2012%20des%20Experts%7CSaison%202%20des%20Experts%7CSaison%203%20des%20Experts%7C"
57 . "Saison%204%20des%20Experts%7CSaison%205%20des%20Experts%7CSaison%206%20des%20Experts%7C"
58 . "Saison%207%20des%20Experts%7CSaison%208%20des%20Experts%7CSaison%209%20des%20Experts%7C"
59 . "Sara%20Sidle%7CSofia%20Curtis%7CS%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e%7CWallace%20Langham%7C"
60 . "Warrick%20Brown%7CWendy%20Simms%7C%C3%89tats-Unis"
61 ];
62 // phpcs:enable
63
64 $this->addDescription( "Benchmark for using a regexp vs. mb_check_encoding " .
65 "to check for UTF-8 encoding." );
66 }
67
68 public function execute() {
69 $benchmarks = [];
70 foreach ( $this->data as $val ) {
71 $benchmarks[] = [
72 'function' => [ $this, 'use_regexp' ],
73 'args' => [ rawurldecode( $val ) ]
74 ];
75 $benchmarks[] = [
76 'function' => [ $this, 'use_regexp_non_capturing' ],
77 'args' => [ rawurldecode( $val ) ]
78 ];
79 $benchmarks[] = [
80 'function' => [ $this, 'use_regexp_once_only' ],
81 'args' => [ rawurldecode( $val ) ]
82 ];
83 $benchmarks[] = [
84 'function' => [ $this, 'use_mb_check_encoding' ],
85 'args' => [ rawurldecode( $val ) ]
86 ];
87 }
88 $this->bench( $benchmarks );
89 }
90
91 protected function use_regexp( $s ) {
92 $this->isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
93 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
94 }
95
96 protected function use_regexp_non_capturing( $s ) {
97 // Same as above with a non-capturing subgroup.
98 $this->isutf8 = preg_match( '/^(?:[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
99 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
100 }
101
102 protected function use_regexp_once_only( $s ) {
103 // Same as above with a once-only subgroup.
104 $this->isutf8 = preg_match( '/^(?>[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
105 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
106 }
107
108 protected function use_mb_check_encoding( $s ) {
109 $this->isutf8 = mb_check_encoding( $s, 'UTF-8' );
110 }
111}
112
113$maintClass = BenchUtf8TitleCheck::class;
114require_once RUN_MAINTENANCE_IF_MAIN;
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
This little benchmark executes the regexp formerly used in Language->checkTitleEncoding() and compare...
__construct()
Default constructor.
execute()
Do the actual work.
Base class for benchmark scripts.
bench(array $benchs)
addDescription( $text)
Set the description text.
require_once RUN_MAINTENANCE_IF_MAIN