MediaWiki 1.39.10
MagicWordArray.php
Go to the documentation of this file.
1<?php
2
27
34 public $names = [];
35
37 private $factory;
38
40 private $hash;
41
43 private $baseRegex;
44
46 private $regex;
47
52 public function __construct( $names = [], MagicWordFactory $factory = null ) {
53 $this->names = $names;
54 $this->factory = $factory ?: MediaWikiServices::getInstance()->getMagicWordFactory();
55 }
56
62 public function add( $name ) {
63 $this->names[] = $name;
64 $this->hash = $this->baseRegex = $this->regex = null;
65 }
66
72 public function addArray( $names ) {
73 $this->names = array_merge( $this->names, array_values( $names ) );
74 $this->hash = $this->baseRegex = $this->regex = null;
75 }
76
81 public function getHash() {
82 if ( $this->hash === null ) {
83 $this->hash = [ 0 => [], 1 => [] ];
84 foreach ( $this->names as $name ) {
85 $magic = $this->factory->get( $name );
86 $case = intval( $magic->isCaseSensitive() );
87 foreach ( $magic->getSynonyms() as $syn ) {
88 if ( !$case ) {
89 $syn = $this->factory->getContentLanguage()->lc( $syn );
90 }
91 $this->hash[$case][$syn] = $name;
92 }
93 }
94 }
95 return $this->hash;
96 }
97
108 public function getBaseRegex( bool $capture = true, string $delimiter = '/' ): array {
109 if ( $capture && $delimiter === '/' && $this->baseRegex !== null ) {
110 return $this->baseRegex;
111 }
112 $regex = [ 0 => [], 1 => [] ];
113 $allGroups = [];
114 foreach ( $this->names as $name ) {
115 $magic = $this->factory->get( $name );
116 $case = $magic->isCaseSensitive() ? 1 : 0;
117 foreach ( $magic->getSynonyms() as $i => $syn ) {
118 if ( $capture ) {
119 // Group name must start with a non-digit in PCRE 8.34+
120 $it = strtr( $i, '0123456789', 'abcdefghij' );
121 $groupName = $it . '_' . $name;
122 $group = '(?P<' . $groupName . '>' . preg_quote( $syn, $delimiter ) . ')';
123 // look for same group names to avoid same named subpatterns in the regex
124 if ( isset( $allGroups[$groupName] ) ) {
125 throw new MWException(
126 __METHOD__ . ': duplicate internal name in magic word array: ' . $name
127 );
128 }
129 $allGroups[$groupName] = true;
130 $regex[$case][] = $group;
131 } else {
132 $regex[$case][] = preg_quote( $syn, $delimiter );
133 }
134 }
135 }
136 '@phan-var array<int,string[]> $regex';
137 foreach ( $regex as $case => &$re ) {
138 $re = count( $re ) ? implode( '|', $re ) : '(?!)';
139 if ( !$case ) {
140 $re = "(?i:{$re})";
141 }
142 }
143 '@phan-var array<int,string> $regex';
144
145 if ( $capture && $delimiter === '/' ) {
146 $this->baseRegex = $regex;
147 }
148 return $regex;
149 }
150
156 public function getRegex() {
157 if ( $this->regex === null ) {
158 $this->regex = [];
159 $base = $this->getBaseRegex( true, '/' );
160 foreach ( $base as $case => $re ) {
161 $this->regex[$case] = "/{$re}/S";
162 }
163 // As a performance optimization, turn on unicode mode only for
164 // case-insensitive matching.
165 $this->regex[0] .= 'u';
166 }
167 return $this->regex;
168 }
169
177 public function getVariableRegex() {
178 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
179 }
180
187 public function getRegexStart() {
188 $newRegex = [];
189 $base = $this->getBaseRegex( true, '/' );
190 foreach ( $base as $case => $re ) {
191 $newRegex[$case] = "/^(?:{$re})/S";
192 }
193 // As a performance optimization, turn on unicode mode only for
194 // case-insensitive matching.
195 $newRegex[0] .= 'u';
196 return $newRegex;
197 }
198
205 public function getVariableStartToEndRegex() {
206 $newRegex = [];
207 $base = $this->getBaseRegex( true, '/' );
208 foreach ( $base as $case => $re ) {
209 $newRegex[$case] = str_replace( "\\$1", "(.*?)", "/^(?:{$re})$/S" );
210 }
211 // As a performance optimization, turn on unicode mode only for
212 // case-insensitive matching.
213 $newRegex[0] .= 'u';
214 return $newRegex;
215 }
216
221 public function getNames() {
222 return $this->names;
223 }
224
235 public function parseMatch( $m ) {
236 reset( $m );
237 while ( ( $key = key( $m ) ) !== null ) {
238 $value = current( $m );
239 next( $m );
240 if ( $key === 0 || $value === '' ) {
241 continue;
242 }
243 $parts = explode( '_', $key, 2 );
244 if ( count( $parts ) != 2 ) {
245 // This shouldn't happen
246 // continue;
247 throw new MWException( __METHOD__ . ': bad parameter name' );
248 }
249 list( /* $synIndex */, $magicName ) = $parts;
250 $paramValue = next( $m );
251 return [ $magicName, $paramValue ];
252 }
253 // This shouldn't happen either
254 throw new MWException( __METHOD__ . ': parameter not found' );
255 }
256
267 public function matchVariableStartToEnd( $text ) {
268 $regexes = $this->getVariableStartToEndRegex();
269 foreach ( $regexes as $regex ) {
270 $m = [];
271 if ( preg_match( $regex, $text, $m ) ) {
272 return $this->parseMatch( $m );
273 }
274 }
275 return [ false, false ];
276 }
277
286 public function matchStartToEnd( $text ) {
287 $hash = $this->getHash();
288 if ( isset( $hash[1][$text] ) ) {
289 return $hash[1][$text];
290 }
291 $lc = $this->factory->getContentLanguage()->lc( $text );
292 return $hash[0][$lc] ?? false;
293 }
294
303 public function matchAndRemove( &$text ) {
304 $found = [];
305 $regexes = $this->getRegex();
306 foreach ( $regexes as $regex ) {
307 $matches = [];
308 $res = preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
309 if ( $res === false ) {
310 $error = preg_last_error();
311 // TODO: Remove function_exists when we require PHP8
312 $errorText = function_exists( 'preg_last_error_msg' ) ? preg_last_error_msg() : '';
313 LoggerFactory::getInstance( 'parser' )->warning( 'preg_match_all error: {code} {errorText}', [
314 'code' => $error,
315 'regex' => $regex,
316 'text' => $text,
317 'errorText' => $errorText
318 ] );
319 throw new Exception( "preg_match_all error $error: $errorText" );
320 } elseif ( $res ) {
321 foreach ( $matches as $m ) {
322 list( $name, $param ) = $this->parseMatch( $m );
323 $found[$name] = $param;
324 }
325 }
326 $res = preg_replace( $regex, '', $text );
327 if ( $res === null ) {
328 $error = preg_last_error();
329 // TODO: Remove function_exists when we require PHP8
330 $errorText = function_exists( 'preg_last_error_msg' ) ? preg_last_error_msg() : '';
331 LoggerFactory::getInstance( 'parser' )->warning( 'preg_replace error: {code} {errorText}', [
332 'code' => $error,
333 'regex' => $regex,
334 'text' => $text,
335 'errorText' => $errorText
336 ] );
337 throw new Exception( "preg_replace error $error: $errorText" );
338 }
339 $text = $res;
340 }
341 return $found;
342 }
343
354 public function matchStartAndRemove( &$text ) {
355 $regexes = $this->getRegexStart();
356 foreach ( $regexes as $regex ) {
357 if ( preg_match( $regex, $text, $m ) ) {
358 list( $id, ) = $this->parseMatch( $m );
359 if ( strlen( $m[0] ) >= strlen( $text ) ) {
360 $text = '';
361 } else {
362 $text = substr( $text, strlen( $m[0] ) );
363 }
364 return $id;
365 }
366 }
367 return false;
368 }
369}
if(!defined('MW_SETUP_CALLBACK'))
The persistent session ID (if any) loaded at startup.
Definition WebStart.php:82
MediaWiki exception.
Class for handling an array of magic words.
getBaseRegex(bool $capture=true, string $delimiter='/')
Get the base regex.
matchVariableStartToEnd( $text)
Match some text, with parameter capture Returns an array with the magic word name in the first elemen...
add( $name)
Add a magic word by name.
getVariableRegex()
Get a regex for matching variables with parameters.
__construct( $names=[], MagicWordFactory $factory=null)
parseMatch( $m)
Parse a match array from preg_match Returns array(magic word ID, parameter value) If there is no para...
matchStartToEnd( $text)
Match some text, without parameter capture Returns the magic word name, or false if there was no capt...
getRegexStart()
Get a regex anchored to the start of the string that does not match parameters.
matchStartAndRemove(&$text)
Return the ID of the magic word at the start of $text, and remove the prefix from $text.
getRegex()
Get an unanchored regex that does not match parameters.
getVariableStartToEndRegex()
Get an anchored regex for matching variables with parameters.
getHash()
Get a 2-d hashtable for this array.
matchAndRemove(&$text)
Returns an associative array, ID => param value, for all items that match Removes the matched items f...
addArray( $names)
Add a number of magic words by name.
A factory that stores information about MagicWords, and creates them on demand with caching.
PSR-3 logger instance factory.
Service locator for MediaWiki core services.