MediaWiki  1.34.0
MagicWordArray.php
Go to the documentation of this file.
1 <?php
2 
27 
34  public $names = [];
35 
37  private $factory;
38 
40  private $hash;
41 
42  private $baseRegex;
43 
44  private $regex;
45 
50  public function __construct( $names = [], MagicWordFactory $factory = null ) {
51  $this->names = $names;
52  $this->factory = $factory ?: MediaWikiServices::getInstance()->getMagicWordFactory();
53  }
54 
60  public function add( $name ) {
61  $this->names[] = $name;
62  $this->hash = $this->baseRegex = $this->regex = null;
63  }
64 
70  public function addArray( $names ) {
71  $this->names = array_merge( $this->names, array_values( $names ) );
72  $this->hash = $this->baseRegex = $this->regex = null;
73  }
74 
79  public function getHash() {
80  if ( is_null( $this->hash ) ) {
81  $this->hash = [ 0 => [], 1 => [] ];
82  foreach ( $this->names as $name ) {
83  $magic = $this->factory->get( $name );
84  $case = intval( $magic->isCaseSensitive() );
85  foreach ( $magic->getSynonyms() as $syn ) {
86  if ( !$case ) {
87  $syn = $this->factory->getContentLanguage()->lc( $syn );
88  }
89  $this->hash[$case][$syn] = $name;
90  }
91  }
92  }
93  return $this->hash;
94  }
95 
100  public function getBaseRegex() {
101  if ( is_null( $this->baseRegex ) ) {
102  $this->baseRegex = [ 0 => '', 1 => '' ];
103  $allGroups = [];
104  foreach ( $this->names as $name ) {
105  $magic = $this->factory->get( $name );
106  $case = intval( $magic->isCaseSensitive() );
107  foreach ( $magic->getSynonyms() as $i => $syn ) {
108  // Group name must start with a non-digit in PCRE 8.34+
109  $it = strtr( $i, '0123456789', 'abcdefghij' );
110  $groupName = $it . '_' . $name;
111  $group = '(?P<' . $groupName . '>' . preg_quote( $syn, '/' ) . ')';
112  // look for same group names to avoid same named subpatterns in the regex
113  if ( isset( $allGroups[$groupName] ) ) {
114  throw new MWException(
115  __METHOD__ . ': duplicate internal name in magic word array: ' . $name
116  );
117  }
118  $allGroups[$groupName] = true;
119  if ( $this->baseRegex[$case] === '' ) {
120  $this->baseRegex[$case] = $group;
121  } else {
122  $this->baseRegex[$case] .= '|' . $group;
123  }
124  }
125  }
126  }
127  return $this->baseRegex;
128  }
129 
134  public function getRegex() {
135  if ( is_null( $this->regex ) ) {
136  $base = $this->getBaseRegex();
137  $this->regex = [ '', '' ];
138  if ( $this->baseRegex[0] !== '' ) {
139  $this->regex[0] = "/{$base[0]}/iuS";
140  }
141  if ( $this->baseRegex[1] !== '' ) {
142  $this->regex[1] = "/{$base[1]}/S";
143  }
144  }
145  return $this->regex;
146  }
147 
153  public function getVariableRegex() {
154  return str_replace( "\\$1", "(.*?)", $this->getRegex() );
155  }
156 
162  public function getRegexStart() {
163  $base = $this->getBaseRegex();
164  $newRegex = [ '', '' ];
165  if ( $base[0] !== '' ) {
166  $newRegex[0] = "/^(?:{$base[0]})/iuS";
167  }
168  if ( $base[1] !== '' ) {
169  $newRegex[1] = "/^(?:{$base[1]})/S";
170  }
171  return $newRegex;
172  }
173 
179  public function getVariableStartToEndRegex() {
180  $base = $this->getBaseRegex();
181  $newRegex = [ '', '' ];
182  if ( $base[0] !== '' ) {
183  $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
184  }
185  if ( $base[1] !== '' ) {
186  $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
187  }
188  return $newRegex;
189  }
190 
195  public function getNames() {
196  return $this->names;
197  }
198 
209  public function parseMatch( $m ) {
210  reset( $m );
211  while ( ( $key = key( $m ) ) !== null ) {
212  $value = current( $m );
213  next( $m );
214  if ( $key === 0 || $value === '' ) {
215  continue;
216  }
217  $parts = explode( '_', $key, 2 );
218  if ( count( $parts ) != 2 ) {
219  // This shouldn't happen
220  // continue;
221  throw new MWException( __METHOD__ . ': bad parameter name' );
222  }
223  list( /* $synIndex */, $magicName ) = $parts;
224  $paramValue = next( $m );
225  return [ $magicName, $paramValue ];
226  }
227  // This shouldn't happen either
228  throw new MWException( __METHOD__ . ': parameter not found' );
229  }
230 
241  public function matchVariableStartToEnd( $text ) {
242  $regexes = $this->getVariableStartToEndRegex();
243  foreach ( $regexes as $regex ) {
244  if ( $regex !== '' ) {
245  $m = [];
246  if ( preg_match( $regex, $text, $m ) ) {
247  return $this->parseMatch( $m );
248  }
249  }
250  }
251  return [ false, false ];
252  }
253 
262  public function matchStartToEnd( $text ) {
263  $hash = $this->getHash();
264  if ( isset( $hash[1][$text] ) ) {
265  return $hash[1][$text];
266  }
267  $lc = $this->factory->getContentLanguage()->lc( $text );
268  return $hash[0][$lc] ?? false;
269  }
270 
279  public function matchAndRemove( &$text ) {
280  $found = [];
281  $regexes = $this->getRegex();
282  foreach ( $regexes as $regex ) {
283  if ( $regex === '' ) {
284  continue;
285  }
286  $matches = [];
287  $res = preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
288  if ( $res === false ) {
289  LoggerFactory::getInstance( 'parser' )->warning( 'preg_match_all returned false', [
290  'code' => preg_last_error(),
291  'regex' => $regex,
292  'text' => $text,
293  ] );
294  } elseif ( $res ) {
295  foreach ( $matches as $m ) {
296  list( $name, $param ) = $this->parseMatch( $m );
297  $found[$name] = $param;
298  }
299  }
300  $res = preg_replace( $regex, '', $text );
301  if ( $res === null ) {
302  LoggerFactory::getInstance( 'parser' )->warning( 'preg_replace returned null', [
303  'code' => preg_last_error(),
304  'regex' => $regex,
305  'text' => $text,
306  ] );
307  }
308  $text = $res;
309  }
310  return $found;
311  }
312 
323  public function matchStartAndRemove( &$text ) {
324  $regexes = $this->getRegexStart();
325  foreach ( $regexes as $regex ) {
326  if ( $regex === '' ) {
327  continue;
328  }
329  if ( preg_match( $regex, $text, $m ) ) {
330  list( $id, ) = $this->parseMatch( $m );
331  if ( strlen( $m[0] ) >= strlen( $text ) ) {
332  $text = '';
333  } else {
334  $text = substr( $text, strlen( $m[0] ) );
335  }
336  return $id;
337  }
338  }
339  return false;
340  }
341 }
MagicWordArray\__construct
__construct( $names=[], MagicWordFactory $factory=null)
Definition: MagicWordArray.php:50
MagicWordArray
Class for handling an array of magic words.
Definition: MagicWordArray.php:32
MagicWordArray\getVariableStartToEndRegex
getVariableStartToEndRegex()
Get an anchored regex for matching variables with parameters.
Definition: MagicWordArray.php:179
MagicWordArray\getNames
getNames()
Definition: MagicWordArray.php:195
MagicWordArray\matchStartAndRemove
matchStartAndRemove(&$text)
Return the ID of the magic word at the start of $text, and remove the prefix from $text.
Definition: MagicWordArray.php:323
MagicWordFactory
A factory that stores information about MagicWords, and creates them on demand with caching.
Definition: MagicWordFactory.php:34
MagicWordArray\getHash
getHash()
Get a 2-d hashtable for this array.
Definition: MagicWordArray.php:79
MagicWordArray\getRegexStart
getRegexStart()
Get a regex anchored to the start of the string that does not match parameters.
Definition: MagicWordArray.php:162
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:117
MagicWordArray\$factory
MagicWordFactory $factory
Definition: MagicWordArray.php:37
MagicWordArray\$baseRegex
$baseRegex
Definition: MagicWordArray.php:42
$res
$res
Definition: testCompression.php:52
$base
$base
Definition: generateLocalAutoload.php:11
MagicWordArray\matchStartToEnd
matchStartToEnd( $text)
Match some text, without parameter capture Returns the magic word name, or false if there was no capt...
Definition: MagicWordArray.php:262
MWException
MediaWiki exception.
Definition: MWException.php:26
MediaWiki\Logger\LoggerFactory
PSR-3 logger instance factory.
Definition: LoggerFactory.php:45
$matches
$matches
Definition: NoLocalSettings.php:24
MagicWordArray\$names
string[] $names
Definition: MagicWordArray.php:34
MagicWordArray\$hash
array $hash
Definition: MagicWordArray.php:40
MagicWordArray\add
add( $name)
Add a magic word by name.
Definition: MagicWordArray.php:60
MagicWordArray\getBaseRegex
getBaseRegex()
Get the base regex.
Definition: MagicWordArray.php:100
MagicWordArray\getRegex
getRegex()
Get an unanchored regex that does not match parameters.
Definition: MagicWordArray.php:134
MagicWordArray\matchAndRemove
matchAndRemove(&$text)
Returns an associative array, ID => param value, for all items that match Removes the matched items f...
Definition: MagicWordArray.php:279
MagicWordArray\getVariableRegex
getVariableRegex()
Get a regex for matching variables with parameters.
Definition: MagicWordArray.php:153
MagicWordArray\parseMatch
parseMatch( $m)
Parse a match array from preg_match Returns array(magic word ID, parameter value) If there is no para...
Definition: MagicWordArray.php:209
MagicWordArray\$regex
$regex
Definition: MagicWordArray.php:44
MagicWordArray\matchVariableStartToEnd
matchVariableStartToEnd( $text)
Match some text, with parameter capture Returns an array with the magic word name in the first elemen...
Definition: MagicWordArray.php:241
MagicWordArray\addArray
addArray( $names)
Add a number of magic words by name.
Definition: MagicWordArray.php:70