MediaWiki  master
MagicWordArray.php
Go to the documentation of this file.
1 <?php
2 
27 
34  public $names = [];
35 
37  private $factory;
38 
40  private $hash;
41 
43  private $baseRegex;
44 
46  private $regex;
47 
52  public function __construct( $names = [], MagicWordFactory $factory = null ) {
53  $this->names = $names;
54  $this->factory = $factory ?: MediaWikiServices::getInstance()->getMagicWordFactory();
55  }
56 
62  public function add( $name ) {
63  $this->names[] = $name;
64  $this->hash = $this->baseRegex = $this->regex = null;
65  }
66 
72  public function addArray( $names ) {
73  $this->names = array_merge( $this->names, array_values( $names ) );
74  $this->hash = $this->baseRegex = $this->regex = null;
75  }
76 
81  public function getHash() {
82  if ( $this->hash === null ) {
83  $this->hash = [ 0 => [], 1 => [] ];
84  foreach ( $this->names as $name ) {
85  $magic = $this->factory->get( $name );
86  $case = intval( $magic->isCaseSensitive() );
87  foreach ( $magic->getSynonyms() as $syn ) {
88  if ( !$case ) {
89  $syn = $this->factory->getContentLanguage()->lc( $syn );
90  }
91  $this->hash[$case][$syn] = $name;
92  }
93  }
94  }
95  return $this->hash;
96  }
97 
108  public function getBaseRegex( bool $capture = true, string $delimiter = '/' ): array {
109  if ( $capture && $delimiter === '/' && $this->baseRegex !== null ) {
110  return $this->baseRegex;
111  }
112  $regex = [ 0 => [], 1 => [] ];
113  $allGroups = [];
114  foreach ( $this->names as $name ) {
115  $magic = $this->factory->get( $name );
116  $case = $magic->isCaseSensitive() ? 1 : 0;
117  foreach ( $magic->getSynonyms() as $i => $syn ) {
118  if ( $capture ) {
119  // Group name must start with a non-digit in PCRE 8.34+
120  $it = strtr( $i, '0123456789', 'abcdefghij' );
121  $groupName = $it . '_' . $name;
122  $group = '(?P<' . $groupName . '>' . preg_quote( $syn, $delimiter ) . ')';
123  // look for same group names to avoid same named subpatterns in the regex
124  if ( isset( $allGroups[$groupName] ) ) {
125  throw new MWException(
126  __METHOD__ . ': duplicate internal name in magic word array: ' . $name
127  );
128  }
129  $allGroups[$groupName] = true;
130  $regex[$case][] = $group;
131  } else {
132  $regex[$case][] = preg_quote( $syn, $delimiter );
133  }
134  }
135  }
136  '@phan-var array<int,string[]> $regex';
137  foreach ( $regex as $case => &$re ) {
138  $re = count( $re ) ? implode( '|', $re ) : '(?!)';
139  if ( !$case ) {
140  $re = "(?i:{$re})";
141  }
142  }
143  '@phan-var array<int,string> $regex';
144 
145  if ( $capture && $delimiter === '/' ) {
146  $this->baseRegex = $regex;
147  }
148  return $regex;
149  }
150 
156  public function getRegex() {
157  if ( $this->regex === null ) {
158  $this->regex = [];
159  $base = $this->getBaseRegex( true, '/' );
160  foreach ( $base as $case => $re ) {
161  $this->regex[$case] = "/{$re}/S";
162  }
163  // As a performance optimization, turn on unicode mode only for
164  // case-insensitive matching.
165  $this->regex[0] .= 'u';
166  }
167  return $this->regex;
168  }
169 
177  public function getVariableRegex() {
178  return str_replace( "\\$1", "(.*?)", $this->getRegex() );
179  }
180 
187  public function getRegexStart() {
188  $newRegex = [];
189  $base = $this->getBaseRegex( true, '/' );
190  foreach ( $base as $case => $re ) {
191  $newRegex[$case] = "/^(?:{$re})/S";
192  }
193  // As a performance optimization, turn on unicode mode only for
194  // case-insensitive matching.
195  $newRegex[0] .= 'u';
196  return $newRegex;
197  }
198 
205  public function getVariableStartToEndRegex() {
206  $newRegex = [];
207  $base = $this->getBaseRegex( true, '/' );
208  foreach ( $base as $case => $re ) {
209  $newRegex[$case] = str_replace( "\\$1", "(.*?)", "/^(?:{$re})$/S" );
210  }
211  // As a performance optimization, turn on unicode mode only for
212  // case-insensitive matching.
213  $newRegex[0] .= 'u';
214  return $newRegex;
215  }
216 
221  public function getNames() {
222  return $this->names;
223  }
224 
235  public function parseMatch( $m ) {
236  reset( $m );
237  while ( ( $key = key( $m ) ) !== null ) {
238  $value = current( $m );
239  next( $m );
240  if ( $key === 0 || $value === '' ) {
241  continue;
242  }
243  $parts = explode( '_', $key, 2 );
244  if ( count( $parts ) != 2 ) {
245  // This shouldn't happen
246  // continue;
247  throw new MWException( __METHOD__ . ': bad parameter name' );
248  }
249  list( /* $synIndex */, $magicName ) = $parts;
250  $paramValue = next( $m );
251  return [ $magicName, $paramValue ];
252  }
253  // This shouldn't happen either
254  throw new MWException( __METHOD__ . ': parameter not found' );
255  }
256 
267  public function matchVariableStartToEnd( $text ) {
268  $regexes = $this->getVariableStartToEndRegex();
269  foreach ( $regexes as $regex ) {
270  $m = [];
271  if ( preg_match( $regex, $text, $m ) ) {
272  return $this->parseMatch( $m );
273  }
274  }
275  return [ false, false ];
276  }
277 
286  public function matchStartToEnd( $text ) {
287  $hash = $this->getHash();
288  if ( isset( $hash[1][$text] ) ) {
289  return $hash[1][$text];
290  }
291  $lc = $this->factory->getContentLanguage()->lc( $text );
292  return $hash[0][$lc] ?? false;
293  }
294 
303  public function matchAndRemove( &$text ) {
304  $found = [];
305  $regexes = $this->getRegex();
306  foreach ( $regexes as $regex ) {
307  $matches = [];
308  $res = preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
309  if ( $res === false ) {
310  LoggerFactory::getInstance( 'parser' )->warning( 'preg_match_all returned false', [
311  'code' => preg_last_error(),
312  'regex' => $regex,
313  'text' => $text,
314  ] );
315  } elseif ( $res ) {
316  foreach ( $matches as $m ) {
317  list( $name, $param ) = $this->parseMatch( $m );
318  $found[$name] = $param;
319  }
320  }
321  $res = preg_replace( $regex, '', $text );
322  if ( $res === null ) {
323  LoggerFactory::getInstance( 'parser' )->warning( 'preg_replace returned null', [
324  'code' => preg_last_error(),
325  'regex' => $regex,
326  'text' => $text,
327  ] );
328  }
329  $text = $res;
330  }
331  return $found;
332  }
333 
344  public function matchStartAndRemove( &$text ) {
345  $regexes = $this->getRegexStart();
346  foreach ( $regexes as $regex ) {
347  if ( preg_match( $regex, $text, $m ) ) {
348  list( $id, ) = $this->parseMatch( $m );
349  if ( strlen( $m[0] ) >= strlen( $text ) ) {
350  $text = '';
351  } else {
352  $text = substr( $text, strlen( $m[0] ) );
353  }
354  return $id;
355  }
356  }
357  return false;
358  }
359 }
MagicWordArray\__construct
__construct( $names=[], MagicWordFactory $factory=null)
Definition: MagicWordArray.php:52
MagicWordArray
Class for handling an array of magic words.
Definition: MagicWordArray.php:32
MagicWordArray\getVariableStartToEndRegex
getVariableStartToEndRegex()
Get an anchored regex for matching variables with parameters.
Definition: MagicWordArray.php:205
MagicWordArray\getNames
getNames()
Definition: MagicWordArray.php:221
MagicWordArray\matchStartAndRemove
matchStartAndRemove(&$text)
Return the ID of the magic word at the start of $text, and remove the prefix from $text.
Definition: MagicWordArray.php:344
MagicWordFactory
A factory that stores information about MagicWords, and creates them on demand with caching.
Definition: MagicWordFactory.php:37
MagicWordArray\getHash
getHash()
Get a 2-d hashtable for this array.
Definition: MagicWordArray.php:81
MagicWordArray\getRegexStart
getRegexStart()
Get a regex anchored to the start of the string that does not match parameters.
Definition: MagicWordArray.php:187
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:193
MagicWordArray\$hash
array null $hash
Definition: MagicWordArray.php:40
MagicWordArray\$factory
MagicWordFactory $factory
Definition: MagicWordArray.php:37
$res
$res
Definition: testCompression.php:57
$base
$base
Definition: generateLocalAutoload.php:11
MagicWordArray\matchStartToEnd
matchStartToEnd( $text)
Match some text, without parameter capture Returns the magic word name, or false if there was no capt...
Definition: MagicWordArray.php:286
MagicWordArray\$baseRegex
string[] null $baseRegex
Definition: MagicWordArray.php:43
MWException
MediaWiki exception.
Definition: MWException.php:29
MediaWiki\Logger\LoggerFactory
PSR-3 logger instance factory.
Definition: LoggerFactory.php:45
MagicWordArray\$regex
string[] null $regex
Definition: MagicWordArray.php:46
$matches
$matches
Definition: NoLocalSettings.php:24
MagicWordArray\$names
string[] $names
Definition: MagicWordArray.php:34
MagicWordArray\add
add( $name)
Add a magic word by name.
Definition: MagicWordArray.php:62
MagicWordArray\getRegex
getRegex()
Get an unanchored regex that does not match parameters.
Definition: MagicWordArray.php:156
MagicWordArray\matchAndRemove
matchAndRemove(&$text)
Returns an associative array, ID => param value, for all items that match Removes the matched items f...
Definition: MagicWordArray.php:303
MagicWordArray\getVariableRegex
getVariableRegex()
Get a regex for matching variables with parameters.
Definition: MagicWordArray.php:177
MagicWordArray\parseMatch
parseMatch( $m)
Parse a match array from preg_match Returns array(magic word ID, parameter value) If there is no para...
Definition: MagicWordArray.php:235
MagicWordArray\getBaseRegex
getBaseRegex(bool $capture=true, string $delimiter='/')
Get the base regex.
Definition: MagicWordArray.php:108
MagicWordArray\matchVariableStartToEnd
matchVariableStartToEnd( $text)
Match some text, with parameter capture Returns an array with the magic word name in the first elemen...
Definition: MagicWordArray.php:267
MagicWordArray\addArray
addArray( $names)
Add a number of magic words by name.
Definition: MagicWordArray.php:72