MediaWiki REL1_35
MagicWordArray.php
Go to the documentation of this file.
1<?php
2
27
34 public $names = [];
35
37 private $factory;
38
40 private $hash;
41
43 private $baseRegex;
44
45 private $regex;
46
51 public function __construct( $names = [], MagicWordFactory $factory = null ) {
52 $this->names = $names;
53 $this->factory = $factory ?: MediaWikiServices::getInstance()->getMagicWordFactory();
54 }
55
61 public function add( $name ) {
62 $this->names[] = $name;
63 $this->hash = $this->baseRegex = $this->regex = null;
64 }
65
71 public function addArray( $names ) {
72 $this->names = array_merge( $this->names, array_values( $names ) );
73 $this->hash = $this->baseRegex = $this->regex = null;
74 }
75
80 public function getHash() {
81 if ( $this->hash === null ) {
82 $this->hash = [ 0 => [], 1 => [] ];
83 foreach ( $this->names as $name ) {
84 $magic = $this->factory->get( $name );
85 $case = intval( $magic->isCaseSensitive() );
86 foreach ( $magic->getSynonyms() as $syn ) {
87 if ( !$case ) {
88 $syn = $this->factory->getContentLanguage()->lc( $syn );
89 }
90 $this->hash[$case][$syn] = $name;
91 }
92 }
93 }
94 return $this->hash;
95 }
96
101 public function getBaseRegex() : array {
102 if ( $this->baseRegex === null ) {
103 $this->baseRegex = [ 0 => '', 1 => '' ];
104 $allGroups = [];
105 foreach ( $this->names as $name ) {
106 $magic = $this->factory->get( $name );
107 $case = intval( $magic->isCaseSensitive() );
108 foreach ( $magic->getSynonyms() as $i => $syn ) {
109 // Group name must start with a non-digit in PCRE 8.34+
110 $it = strtr( $i, '0123456789', 'abcdefghij' );
111 $groupName = $it . '_' . $name;
112 $group = '(?P<' . $groupName . '>' . preg_quote( $syn, '/' ) . ')';
113 // look for same group names to avoid same named subpatterns in the regex
114 if ( isset( $allGroups[$groupName] ) ) {
115 throw new MWException(
116 __METHOD__ . ': duplicate internal name in magic word array: ' . $name
117 );
118 }
119 $allGroups[$groupName] = true;
120 if ( $this->baseRegex[$case] === '' ) {
121 $this->baseRegex[$case] = $group;
122 } else {
123 $this->baseRegex[$case] .= '|' . $group;
124 }
125 }
126 }
127 }
128 return $this->baseRegex;
129 }
130
136 public function getRegex() {
137 if ( $this->regex === null ) {
138 $base = $this->getBaseRegex();
139 $this->regex = [ '', '' ];
140 if ( $this->baseRegex[0] !== '' ) {
141 $this->regex[0] = "/{$base[0]}/iuS";
142 }
143 if ( $this->baseRegex[1] !== '' ) {
144 $this->regex[1] = "/{$base[1]}/S";
145 }
146 }
147 return $this->regex;
148 }
149
155 public function getVariableRegex() {
156 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
157 }
158
164 public function getRegexStart() {
165 $base = $this->getBaseRegex();
166 $newRegex = [ '', '' ];
167 if ( $base[0] !== '' ) {
168 $newRegex[0] = "/^(?:{$base[0]})/iuS";
169 }
170 if ( $base[1] !== '' ) {
171 $newRegex[1] = "/^(?:{$base[1]})/S";
172 }
173 return $newRegex;
174 }
175
181 public function getVariableStartToEndRegex() {
182 $base = $this->getBaseRegex();
183 $newRegex = [ '', '' ];
184 if ( $base[0] !== '' ) {
185 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
186 }
187 if ( $base[1] !== '' ) {
188 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
189 }
190 return $newRegex;
191 }
192
197 public function getNames() {
198 return $this->names;
199 }
200
211 public function parseMatch( $m ) {
212 reset( $m );
213 while ( ( $key = key( $m ) ) !== null ) {
214 $value = current( $m );
215 next( $m );
216 if ( $key === 0 || $value === '' ) {
217 continue;
218 }
219 $parts = explode( '_', $key, 2 );
220 if ( count( $parts ) != 2 ) {
221 // This shouldn't happen
222 // continue;
223 throw new MWException( __METHOD__ . ': bad parameter name' );
224 }
225 list( /* $synIndex */, $magicName ) = $parts;
226 $paramValue = next( $m );
227 return [ $magicName, $paramValue ];
228 }
229 // This shouldn't happen either
230 throw new MWException( __METHOD__ . ': parameter not found' );
231 }
232
243 public function matchVariableStartToEnd( $text ) {
244 $regexes = $this->getVariableStartToEndRegex();
245 foreach ( $regexes as $regex ) {
246 if ( $regex !== '' ) {
247 $m = [];
248 if ( preg_match( $regex, $text, $m ) ) {
249 return $this->parseMatch( $m );
250 }
251 }
252 }
253 return [ false, false ];
254 }
255
264 public function matchStartToEnd( $text ) {
265 $hash = $this->getHash();
266 if ( isset( $hash[1][$text] ) ) {
267 return $hash[1][$text];
268 }
269 $lc = $this->factory->getContentLanguage()->lc( $text );
270 return $hash[0][$lc] ?? false;
271 }
272
281 public function matchAndRemove( &$text ) {
282 $found = [];
283 $regexes = $this->getRegex();
284 foreach ( $regexes as $regex ) {
285 if ( $regex === '' ) {
286 continue;
287 }
288 $matches = [];
289 $res = preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
290 if ( $res === false ) {
291 LoggerFactory::getInstance( 'parser' )->warning( 'preg_match_all returned false', [
292 'code' => preg_last_error(),
293 'regex' => $regex,
294 'text' => $text,
295 ] );
296 } elseif ( $res ) {
297 foreach ( $matches as $m ) {
298 list( $name, $param ) = $this->parseMatch( $m );
299 $found[$name] = $param;
300 }
301 }
302 $res = preg_replace( $regex, '', $text );
303 if ( $res === null ) {
304 LoggerFactory::getInstance( 'parser' )->warning( 'preg_replace returned null', [
305 'code' => preg_last_error(),
306 'regex' => $regex,
307 'text' => $text,
308 ] );
309 }
310 $text = $res;
311 }
312 return $found;
313 }
314
325 public function matchStartAndRemove( &$text ) {
326 $regexes = $this->getRegexStart();
327 foreach ( $regexes as $regex ) {
328 if ( $regex === '' ) {
329 continue;
330 }
331 if ( preg_match( $regex, $text, $m ) ) {
332 list( $id, ) = $this->parseMatch( $m );
333 if ( strlen( $m[0] ) >= strlen( $text ) ) {
334 $text = '';
335 } else {
336 $text = substr( $text, strlen( $m[0] ) );
337 }
338 return $id;
339 }
340 }
341 return false;
342 }
343}
if(ini_get('mbstring.func_overload')) if(!defined('MW_ENTRY_POINT'))
Pre-config setup: Before loading LocalSettings.php.
Definition Setup.php:85
MediaWiki exception.
Class for handling an array of magic words.
matchVariableStartToEnd( $text)
Match some text, with parameter capture Returns an array with the magic word name in the first elemen...
add( $name)
Add a magic word by name.
getVariableRegex()
Get a regex for matching variables with parameters.
__construct( $names=[], MagicWordFactory $factory=null)
parseMatch( $m)
Parse a match array from preg_match Returns array(magic word ID, parameter value) If there is no para...
matchStartToEnd( $text)
Match some text, without parameter capture Returns the magic word name, or false if there was no capt...
getRegexStart()
Get a regex anchored to the start of the string that does not match parameters.
getBaseRegex()
Get the base regex.
matchStartAndRemove(&$text)
Return the ID of the magic word at the start of $text, and remove the prefix from $text.
getRegex()
Get an unanchored regex that does not match parameters.
getVariableStartToEndRegex()
Get an anchored regex for matching variables with parameters.
getHash()
Get a 2-d hashtable for this array.
MagicWordFactory $factory
matchAndRemove(&$text)
Returns an associative array, ID => param value, for all items that match Removes the matched items f...
addArray( $names)
Add a number of magic words by name.
string[] null $baseRegex
A factory that stores information about MagicWords, and creates them on demand with caching.
PSR-3 logger instance factory.
MediaWikiServices is the service locator for the application scope of MediaWiki.