MediaWiki  master
MagicWordArray.php
Go to the documentation of this file.
1 <?php
2 
27 
34  public $names = [];
35 
37  private $factory;
38 
40  private $hash;
41 
43  private $baseRegex;
44 
45  private $regex;
46 
51  public function __construct( $names = [], MagicWordFactory $factory = null ) {
52  $this->names = $names;
53  $this->factory = $factory ?: MediaWikiServices::getInstance()->getMagicWordFactory();
54  }
55 
61  public function add( $name ) {
62  $this->names[] = $name;
63  $this->hash = $this->baseRegex = $this->regex = null;
64  }
65 
71  public function addArray( $names ) {
72  $this->names = array_merge( $this->names, array_values( $names ) );
73  $this->hash = $this->baseRegex = $this->regex = null;
74  }
75 
80  public function getHash() {
81  if ( $this->hash === null ) {
82  $this->hash = [ 0 => [], 1 => [] ];
83  foreach ( $this->names as $name ) {
84  $magic = $this->factory->get( $name );
85  $case = intval( $magic->isCaseSensitive() );
86  foreach ( $magic->getSynonyms() as $syn ) {
87  if ( !$case ) {
88  $syn = $this->factory->getContentLanguage()->lc( $syn );
89  }
90  $this->hash[$case][$syn] = $name;
91  }
92  }
93  }
94  return $this->hash;
95  }
96 
107  public function getBaseRegex( bool $capture = true, string $delimiter = '/' ) : array {
108  if ( $capture && $delimiter === '/' && $this->baseRegex !== null ) {
109  return $this->baseRegex;
110  }
111  $regex = [ 0 => [], 1 => [] ];
112  $allGroups = [];
113  foreach ( $this->names as $name ) {
114  $magic = $this->factory->get( $name );
115  $case = $magic->isCaseSensitive() ? 1 : 0;
116  foreach ( $magic->getSynonyms() as $i => $syn ) {
117  if ( $capture ) {
118  // Group name must start with a non-digit in PCRE 8.34+
119  $it = strtr( $i, '0123456789', 'abcdefghij' );
120  $groupName = $it . '_' . $name;
121  $group = '(?P<' . $groupName . '>' . preg_quote( $syn, $delimiter ) . ')';
122  // look for same group names to avoid same named subpatterns in the regex
123  if ( isset( $allGroups[$groupName] ) ) {
124  throw new MWException(
125  __METHOD__ . ': duplicate internal name in magic word array: ' . $name
126  );
127  }
128  $allGroups[$groupName] = true;
129  $regex[$case][] = $group;
130  } else {
131  $regex[$case][] = preg_quote( $syn, $delimiter );
132  }
133  }
134  }
135  '@phan-var array<int,string[]> $regex';
136  foreach ( $regex as $case => &$re ) {
137  $re = count( $re ) ? implode( '|', $re ) : '(?!)';
138  if ( !$case ) {
139  $re = "(?i:{$re})";
140  }
141  }
142  '@phan-var array<int,string> $regex';
143 
144  if ( $capture && $delimiter === '/' ) {
145  $this->baseRegex = $regex;
146  }
147  return $regex;
148  }
149 
155  public function getRegex() {
156  if ( $this->regex === null ) {
157  $this->regex = [];
158  $base = $this->getBaseRegex( true, '/' );
159  foreach ( $base as $case => $re ) {
160  $this->regex[$case] = "/{$re}/S";
161  }
162  // As a performance optimization, turn on unicode mode only for
163  // case-insensitive matching.
164  $this->regex[0] .= 'u';
165  }
166  return $this->regex;
167  }
168 
176  public function getVariableRegex() {
177  return str_replace( "\\$1", "(.*?)", $this->getRegex() );
178  }
179 
186  public function getRegexStart() {
187  $newRegex = [];
188  $base = $this->getBaseRegex( true, '/' );
189  foreach ( $base as $case => $re ) {
190  $newRegex[$case] = "/^(?:{$re})/S";
191  }
192  // As a performance optimization, turn on unicode mode only for
193  // case-insensitive matching.
194  $newRegex[0] .= 'u';
195  return $newRegex;
196  }
197 
204  public function getVariableStartToEndRegex() {
205  $newRegex = [];
206  $base = $this->getBaseRegex( true, '/' );
207  foreach ( $base as $case => $re ) {
208  $newRegex[$case] = str_replace( "\\$1", "(.*?)", "/^(?:{$re})$/S" );
209  }
210  // As a performance optimization, turn on unicode mode only for
211  // case-insensitive matching.
212  $newRegex[0] .= 'u';
213  return $newRegex;
214  }
215 
220  public function getNames() {
221  return $this->names;
222  }
223 
234  public function parseMatch( $m ) {
235  reset( $m );
236  while ( ( $key = key( $m ) ) !== null ) {
237  $value = current( $m );
238  next( $m );
239  if ( $key === 0 || $value === '' ) {
240  continue;
241  }
242  $parts = explode( '_', $key, 2 );
243  if ( count( $parts ) != 2 ) {
244  // This shouldn't happen
245  // continue;
246  throw new MWException( __METHOD__ . ': bad parameter name' );
247  }
248  list( /* $synIndex */, $magicName ) = $parts;
249  $paramValue = next( $m );
250  return [ $magicName, $paramValue ];
251  }
252  // This shouldn't happen either
253  throw new MWException( __METHOD__ . ': parameter not found' );
254  }
255 
266  public function matchVariableStartToEnd( $text ) {
267  $regexes = $this->getVariableStartToEndRegex();
268  foreach ( $regexes as $regex ) {
269  $m = [];
270  if ( preg_match( $regex, $text, $m ) ) {
271  return $this->parseMatch( $m );
272  }
273  }
274  return [ false, false ];
275  }
276 
285  public function matchStartToEnd( $text ) {
286  $hash = $this->getHash();
287  if ( isset( $hash[1][$text] ) ) {
288  return $hash[1][$text];
289  }
290  $lc = $this->factory->getContentLanguage()->lc( $text );
291  return $hash[0][$lc] ?? false;
292  }
293 
302  public function matchAndRemove( &$text ) {
303  $found = [];
304  $regexes = $this->getRegex();
305  foreach ( $regexes as $regex ) {
306  $matches = [];
307  $res = preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
308  if ( $res === false ) {
309  LoggerFactory::getInstance( 'parser' )->warning( 'preg_match_all returned false', [
310  'code' => preg_last_error(),
311  'regex' => $regex,
312  'text' => $text,
313  ] );
314  } elseif ( $res ) {
315  foreach ( $matches as $m ) {
316  list( $name, $param ) = $this->parseMatch( $m );
317  $found[$name] = $param;
318  }
319  }
320  $res = preg_replace( $regex, '', $text );
321  if ( $res === null ) {
322  LoggerFactory::getInstance( 'parser' )->warning( 'preg_replace returned null', [
323  'code' => preg_last_error(),
324  'regex' => $regex,
325  'text' => $text,
326  ] );
327  }
328  $text = $res;
329  }
330  return $found;
331  }
332 
343  public function matchStartAndRemove( &$text ) {
344  $regexes = $this->getRegexStart();
345  foreach ( $regexes as $regex ) {
346  if ( preg_match( $regex, $text, $m ) ) {
347  list( $id, ) = $this->parseMatch( $m );
348  if ( strlen( $m[0] ) >= strlen( $text ) ) {
349  $text = '';
350  } else {
351  $text = substr( $text, strlen( $m[0] ) );
352  }
353  return $id;
354  }
355  }
356  return false;
357  }
358 }
MagicWordArray\__construct
__construct( $names=[], MagicWordFactory $factory=null)
Definition: MagicWordArray.php:51
MagicWordArray
Class for handling an array of magic words.
Definition: MagicWordArray.php:32
MagicWordArray\getVariableStartToEndRegex
getVariableStartToEndRegex()
Get an anchored regex for matching variables with parameters.
Definition: MagicWordArray.php:204
MagicWordArray\getNames
getNames()
Definition: MagicWordArray.php:220
MagicWordArray\matchStartAndRemove
matchStartAndRemove(&$text)
Return the ID of the magic word at the start of $text, and remove the prefix from $text.
Definition: MagicWordArray.php:343
MagicWordFactory
A factory that stores information about MagicWords, and creates them on demand with caching.
Definition: MagicWordFactory.php:37
MagicWordArray\getHash
getHash()
Get a 2-d hashtable for this array.
Definition: MagicWordArray.php:80
MagicWordArray\getRegexStart
getRegexStart()
Get a regex anchored to the start of the string that does not match parameters.
Definition: MagicWordArray.php:186
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:165
MagicWordArray\$factory
MagicWordFactory $factory
Definition: MagicWordArray.php:37
$res
$res
Definition: testCompression.php:57
$base
$base
Definition: generateLocalAutoload.php:11
MagicWordArray\matchStartToEnd
matchStartToEnd( $text)
Match some text, without parameter capture Returns the magic word name, or false if there was no capt...
Definition: MagicWordArray.php:285
MagicWordArray\$baseRegex
string[] null $baseRegex
Definition: MagicWordArray.php:43
MWException
MediaWiki exception.
Definition: MWException.php:29
MediaWiki\Logger\LoggerFactory
PSR-3 logger instance factory.
Definition: LoggerFactory.php:45
$matches
$matches
Definition: NoLocalSettings.php:24
MagicWordArray\$names
string[] $names
Definition: MagicWordArray.php:34
MagicWordArray\$hash
array $hash
Definition: MagicWordArray.php:40
MagicWordArray\add
add( $name)
Add a magic word by name.
Definition: MagicWordArray.php:61
MagicWordArray\getRegex
getRegex()
Get an unanchored regex that does not match parameters.
Definition: MagicWordArray.php:155
MagicWordArray\matchAndRemove
matchAndRemove(&$text)
Returns an associative array, ID => param value, for all items that match Removes the matched items f...
Definition: MagicWordArray.php:302
MagicWordArray\getVariableRegex
getVariableRegex()
Get a regex for matching variables with parameters.
Definition: MagicWordArray.php:176
MagicWordArray\parseMatch
parseMatch( $m)
Parse a match array from preg_match Returns array(magic word ID, parameter value) If there is no para...
Definition: MagicWordArray.php:234
MagicWordArray\$regex
$regex
Definition: MagicWordArray.php:45
MagicWordArray\getBaseRegex
getBaseRegex(bool $capture=true, string $delimiter='/')
Get the base regex.
Definition: MagicWordArray.php:107
MagicWordArray\matchVariableStartToEnd
matchVariableStartToEnd( $text)
Match some text, with parameter capture Returns an array with the magic word name in the first elemen...
Definition: MagicWordArray.php:266
MagicWordArray\addArray
addArray( $names)
Add a number of magic words by name.
Definition: MagicWordArray.php:71