MediaWiki  master
MagicWordArray.php
Go to the documentation of this file.
1 <?php
2 
27 
34  public $names = [];
35 
37  private $factory;
38 
40  private $hash;
41 
43  private $baseRegex;
44 
45  private $regex;
46 
51  public function __construct( $names = [], MagicWordFactory $factory = null ) {
52  $this->names = $names;
53  $this->factory = $factory ?: MediaWikiServices::getInstance()->getMagicWordFactory();
54  }
55 
61  public function add( $name ) {
62  $this->names[] = $name;
63  $this->hash = $this->baseRegex = $this->regex = null;
64  }
65 
71  public function addArray( $names ) {
72  $this->names = array_merge( $this->names, array_values( $names ) );
73  $this->hash = $this->baseRegex = $this->regex = null;
74  }
75 
80  public function getHash() {
81  if ( is_null( $this->hash ) ) {
82  $this->hash = [ 0 => [], 1 => [] ];
83  foreach ( $this->names as $name ) {
84  $magic = $this->factory->get( $name );
85  $case = intval( $magic->isCaseSensitive() );
86  foreach ( $magic->getSynonyms() as $syn ) {
87  if ( !$case ) {
88  $syn = $this->factory->getContentLanguage()->lc( $syn );
89  }
90  $this->hash[$case][$syn] = $name;
91  }
92  }
93  }
94  return $this->hash;
95  }
96 
101  public function getBaseRegex() : array {
102  if ( is_null( $this->baseRegex ) ) {
103  $this->baseRegex = [ 0 => '', 1 => '' ];
104  $allGroups = [];
105  foreach ( $this->names as $name ) {
106  $magic = $this->factory->get( $name );
107  $case = intval( $magic->isCaseSensitive() );
108  foreach ( $magic->getSynonyms() as $i => $syn ) {
109  // Group name must start with a non-digit in PCRE 8.34+
110  $it = strtr( $i, '0123456789', 'abcdefghij' );
111  $groupName = $it . '_' . $name;
112  $group = '(?P<' . $groupName . '>' . preg_quote( $syn, '/' ) . ')';
113  // look for same group names to avoid same named subpatterns in the regex
114  if ( isset( $allGroups[$groupName] ) ) {
115  throw new MWException(
116  __METHOD__ . ': duplicate internal name in magic word array: ' . $name
117  );
118  }
119  $allGroups[$groupName] = true;
120  if ( $this->baseRegex[$case] === '' ) {
121  $this->baseRegex[$case] = $group;
122  } else {
123  $this->baseRegex[$case] .= '|' . $group;
124  }
125  }
126  }
127  }
128  return $this->baseRegex;
129  }
130 
136  public function getRegex() {
137  if ( is_null( $this->regex ) ) {
138  $base = $this->getBaseRegex();
139  $this->regex = [ '', '' ];
140  if ( $this->baseRegex[0] !== '' ) {
141  $this->regex[0] = "/{$base[0]}/iuS";
142  }
143  if ( $this->baseRegex[1] !== '' ) {
144  $this->regex[1] = "/{$base[1]}/S";
145  }
146  }
147  return $this->regex;
148  }
149 
155  public function getVariableRegex() {
156  return str_replace( "\\$1", "(.*?)", $this->getRegex() );
157  }
158 
164  public function getRegexStart() {
165  $base = $this->getBaseRegex();
166  $newRegex = [ '', '' ];
167  if ( $base[0] !== '' ) {
168  $newRegex[0] = "/^(?:{$base[0]})/iuS";
169  }
170  if ( $base[1] !== '' ) {
171  $newRegex[1] = "/^(?:{$base[1]})/S";
172  }
173  return $newRegex;
174  }
175 
181  public function getVariableStartToEndRegex() {
182  $base = $this->getBaseRegex();
183  $newRegex = [ '', '' ];
184  if ( $base[0] !== '' ) {
185  $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
186  }
187  if ( $base[1] !== '' ) {
188  $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
189  }
190  return $newRegex;
191  }
192 
197  public function getNames() {
198  return $this->names;
199  }
200 
211  public function parseMatch( $m ) {
212  reset( $m );
213  while ( ( $key = key( $m ) ) !== null ) {
214  $value = current( $m );
215  next( $m );
216  if ( $key === 0 || $value === '' ) {
217  continue;
218  }
219  $parts = explode( '_', $key, 2 );
220  if ( count( $parts ) != 2 ) {
221  // This shouldn't happen
222  // continue;
223  throw new MWException( __METHOD__ . ': bad parameter name' );
224  }
225  list( /* $synIndex */, $magicName ) = $parts;
226  $paramValue = next( $m );
227  return [ $magicName, $paramValue ];
228  }
229  // This shouldn't happen either
230  throw new MWException( __METHOD__ . ': parameter not found' );
231  }
232 
243  public function matchVariableStartToEnd( $text ) {
244  $regexes = $this->getVariableStartToEndRegex();
245  foreach ( $regexes as $regex ) {
246  if ( $regex !== '' ) {
247  $m = [];
248  if ( preg_match( $regex, $text, $m ) ) {
249  return $this->parseMatch( $m );
250  }
251  }
252  }
253  return [ false, false ];
254  }
255 
264  public function matchStartToEnd( $text ) {
265  $hash = $this->getHash();
266  if ( isset( $hash[1][$text] ) ) {
267  return $hash[1][$text];
268  }
269  $lc = $this->factory->getContentLanguage()->lc( $text );
270  return $hash[0][$lc] ?? false;
271  }
272 
281  public function matchAndRemove( &$text ) {
282  $found = [];
283  $regexes = $this->getRegex();
284  foreach ( $regexes as $regex ) {
285  if ( $regex === '' ) {
286  continue;
287  }
288  $matches = [];
289  $res = preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
290  if ( $res === false ) {
291  LoggerFactory::getInstance( 'parser' )->warning( 'preg_match_all returned false', [
292  'code' => preg_last_error(),
293  'regex' => $regex,
294  'text' => $text,
295  ] );
296  } elseif ( $res ) {
297  foreach ( $matches as $m ) {
298  list( $name, $param ) = $this->parseMatch( $m );
299  $found[$name] = $param;
300  }
301  }
302  $res = preg_replace( $regex, '', $text );
303  if ( $res === null ) {
304  LoggerFactory::getInstance( 'parser' )->warning( 'preg_replace returned null', [
305  'code' => preg_last_error(),
306  'regex' => $regex,
307  'text' => $text,
308  ] );
309  }
310  $text = $res;
311  }
312  return $found;
313  }
314 
325  public function matchStartAndRemove( &$text ) {
326  $regexes = $this->getRegexStart();
327  foreach ( $regexes as $regex ) {
328  if ( $regex === '' ) {
329  continue;
330  }
331  if ( preg_match( $regex, $text, $m ) ) {
332  list( $id, ) = $this->parseMatch( $m );
333  if ( strlen( $m[0] ) >= strlen( $text ) ) {
334  $text = '';
335  } else {
336  $text = substr( $text, strlen( $m[0] ) );
337  }
338  return $id;
339  }
340  }
341  return false;
342  }
343 }
getVariableRegex()
Get a regex for matching variables with parameters.
matchAndRemove(&$text)
Returns an associative array, ID => param value, for all items that match Removes the matched items f...
getRegexStart()
Get a regex anchored to the start of the string that does not match parameters.
string [] null $baseRegex
matchStartAndRemove(&$text)
Return the ID of the magic word at the start of $text, and remove the prefix from $text...
getBaseRegex()
Get the base regex.
getHash()
Get a 2-d hashtable for this array.
string [] $names
add( $name)
Add a magic word by name.
__construct( $names=[], MagicWordFactory $factory=null)
Class for handling an array of magic words.
matchStartToEnd( $text)
Match some text, without parameter capture Returns the magic word name, or false if there was no capt...
matchVariableStartToEnd( $text)
Match some text, with parameter capture Returns an array with the magic word name in the first elemen...
addArray( $names)
Add a number of magic words by name.
MagicWordFactory $factory
getRegex()
Get an unanchored regex that does not match parameters.
getVariableStartToEndRegex()
Get an anchored regex for matching variables with parameters.
parseMatch( $m)
Parse a match array from preg_match Returns array(magic word ID, parameter value) If there is no para...
$matches