MediaWiki  master
MagicWord.php
Go to the documentation of this file.
1 <?php
24 namespace MediaWiki\Parser;
25 
26 use Language;
28 use MWException;
29 use StringUtils;
30 
65 class MagicWord {
69  public $mId;
70 
72  public $mSynonyms;
73 
76 
78  private $mRegex = '';
79 
81  private $mRegexStart = '';
82 
84  private $mRegexStartToEnd = '';
85 
87  private $mBaseRegex = '';
88 
90  private $mVariableRegex = '';
91 
93  private $mVariableStartToEndRegex = '';
94 
96  private $mModified = false;
97 
99  private $mFound = false;
100 
102  private $contLang;
103 
116  public function __construct( $id = null, $syn = [], $cs = false, Language $contLang = null ) {
117  $this->mId = $id;
118  $this->mSynonyms = (array)$syn;
119  $this->mCaseSensitive = $cs;
120  $this->contLang = $contLang ?: MediaWikiServices::getInstance()->getContentLanguage();
121  }
122 
129  public function load( $id ) {
130  $this->mId = $id;
131  $this->contLang->getMagic( $this );
132  if ( !$this->mSynonyms ) {
133  $this->mSynonyms = [ 'brionmademeputthishere' ];
134  throw new MWException( "Error: invalid magic word '$id'" );
135  }
136  }
137 
142  public function initRegex() {
143  // Sort the synonyms by length, descending, so that the longest synonym
144  // matches in precedence to the shortest
145  $synonyms = $this->mSynonyms;
146  usort( $synonyms, [ $this, 'compareStringLength' ] );
147 
148  $escSyn = [];
149  foreach ( $synonyms as $synonym ) {
150  // In case a magic word contains /, like that's going to happen;)
151  $escSyn[] = preg_quote( $synonym, '/' );
152  }
153  $this->mBaseRegex = implode( '|', $escSyn );
154 
155  $case = $this->mCaseSensitive ? '' : 'iu';
156  $this->mRegex = "/{$this->mBaseRegex}/{$case}";
157  $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
158  $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
159  $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
160  $this->mVariableStartToEndRegex = str_replace(
161  "\\$1",
162  "(.*?)",
163  "/^(?:{$this->mBaseRegex})$/{$case}"
164  );
165  }
166 
177  public function compareStringLength( $s1, $s2 ) {
178  $l1 = strlen( $s1 );
179  $l2 = strlen( $s2 );
180  return $l2 <=> $l1; // descending
181  }
182 
188  public function getRegex() {
189  if ( $this->mRegex == '' ) {
190  $this->initRegex();
191  }
192  return $this->mRegex;
193  }
194 
202  public function getRegexCase() {
203  if ( $this->mRegex === '' ) {
204  $this->initRegex();
205  }
206 
207  return $this->mCaseSensitive ? '' : 'iu';
208  }
209 
215  public function getRegexStart() {
216  if ( $this->mRegex == '' ) {
217  $this->initRegex();
218  }
219  return $this->mRegexStart;
220  }
221 
228  public function getRegexStartToEnd() {
229  if ( $this->mRegexStartToEnd == '' ) {
230  $this->initRegex();
231  }
232  return $this->mRegexStartToEnd;
233  }
234 
240  public function getBaseRegex() {
241  if ( $this->mRegex == '' ) {
242  $this->initRegex();
243  }
244  return $this->mBaseRegex;
245  }
246 
254  public function match( $text ) {
255  return (bool)preg_match( $this->getRegex(), $text );
256  }
257 
265  public function matchStart( $text ) {
266  return (bool)preg_match( $this->getRegexStart(), $text );
267  }
268 
277  public function matchStartToEnd( $text ) {
278  return (bool)preg_match( $this->getRegexStartToEnd(), $text );
279  }
280 
291  public function matchVariableStartToEnd( $text ) {
292  $matches = [];
293  $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
294  if ( $matchcount == 0 ) {
295  return null;
296  } else {
297  # multiple matched parts (variable match); some will be empty because of
298  # synonyms. The variable will be the second non-empty one so remove any
299  # blank elements and re-sort the indices.
300  # See also T8526
301 
302  $matches = array_values( array_filter( $matches ) );
303 
304  if ( count( $matches ) == 1 ) {
305  return $matches[0];
306  } else {
307  return $matches[1];
308  }
309  }
310  }
311 
320  public function matchAndRemove( &$text ) {
321  $this->mFound = false;
322  $text = preg_replace_callback(
323  $this->getRegex(),
324  [ $this, 'pregRemoveAndRecord' ],
325  $text
326  );
327 
328  return $this->mFound;
329  }
330 
335  public function matchStartAndRemove( &$text ) {
336  $this->mFound = false;
337  $text = preg_replace_callback(
338  $this->getRegexStart(),
339  [ $this, 'pregRemoveAndRecord' ],
340  $text
341  );
342 
343  return $this->mFound;
344  }
345 
351  public function pregRemoveAndRecord() {
352  $this->mFound = true;
353  return '';
354  }
355 
365  public function replace( $replacement, $subject, $limit = -1 ) {
366  $res = preg_replace(
367  $this->getRegex(),
368  StringUtils::escapeRegexReplacement( $replacement ),
369  $subject,
370  $limit
371  );
372  $this->mModified = $res !== $subject;
373  return $res;
374  }
375 
386  public function substituteCallback( $text, $callback ) {
387  $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
388  $this->mModified = $res !== $text;
389  return $res;
390  }
391 
397  public function getVariableRegex() {
398  if ( $this->mVariableRegex == '' ) {
399  $this->initRegex();
400  }
401  return $this->mVariableRegex;
402  }
403 
409  public function getVariableStartToEndRegex() {
410  if ( $this->mVariableStartToEndRegex == '' ) {
411  $this->initRegex();
412  }
413  return $this->mVariableStartToEndRegex;
414  }
415 
423  public function getSynonym( $i ) {
424  return $this->mSynonyms[$i];
425  }
426 
430  public function getSynonyms() {
431  return $this->mSynonyms;
432  }
433 
440  public function getWasModified() {
441  return $this->mModified;
442  }
443 
451  public function addToArray( &$array, $value ) {
452  foreach ( $this->mSynonyms as $syn ) {
453  $array[$this->contLang->lc( $syn )] = $value;
454  }
455  }
456 
460  public function isCaseSensitive() {
461  return $this->mCaseSensitive;
462  }
463 
467  public function getId() {
468  return $this->mId;
469  }
470 }
471 
475 class_alias( MagicWord::class, 'MagicWord' );
$matches
Base class for language-specific code.
Definition: Language.php:61
MediaWiki exception.
Definition: MWException.php:33
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
This class encapsulates "magic words" such as "#redirect", NOTOC, etc.
Definition: MagicWord.php:65
bool $mCaseSensitive
Definition: MagicWord.php:75
getRegexStart()
Gets a regex matching the word, if it is at the string start.
Definition: MagicWord.php:215
initRegex()
Preliminary initialisation.
Definition: MagicWord.php:142
addToArray(&$array, $value)
Adds all the synonyms of this MagicWord to an array, to allow quick lookup in a list of magic words.
Definition: MagicWord.php:451
__construct( $id=null, $syn=[], $cs=false, Language $contLang=null)
#-
Definition: MagicWord.php:116
matchStart( $text)
Returns true if the text starts with the word.
Definition: MagicWord.php:265
string null $mId
#-
Definition: MagicWord.php:69
getVariableStartToEndRegex()
Matches the entire string, where $1 is a wildcard.
Definition: MagicWord.php:409
compareStringLength( $s1, $s2)
A comparison function that returns -1, 0 or 1 depending on whether the first string is longer,...
Definition: MagicWord.php:177
getVariableRegex()
Matches the word, where $1 is a wildcard.
Definition: MagicWord.php:397
getRegexStartToEnd()
Gets a regex matching the word from start to end of a string.
Definition: MagicWord.php:228
load( $id)
Initialises this object with an ID.
Definition: MagicWord.php:129
matchStartAndRemove(&$text)
Definition: MagicWord.php:335
getWasModified()
Returns true if the last call to replace() or substituteCallback() returned a modified text,...
Definition: MagicWord.php:440
match( $text)
Returns true if the text contains the word.
Definition: MagicWord.php:254
matchStartToEnd( $text)
Returns true if the text matched the word.
Definition: MagicWord.php:277
matchAndRemove(&$text)
Returns true if the text matches the word, and alters the input string, removing all instances of the...
Definition: MagicWord.php:320
getRegex()
Gets a regex representing matching the word.
Definition: MagicWord.php:188
string[] $mSynonyms
Definition: MagicWord.php:72
matchVariableStartToEnd( $text)
Returns NULL if there's no match, the value of $1 otherwise The return code is the matched string,...
Definition: MagicWord.php:291
getRegexCase()
Gets the regexp case modifier to use, i.e.
Definition: MagicWord.php:202
substituteCallback( $text, $callback)
Variable handling: {{SUBST:xxx}} style words Calls back a function to determine what to replace xxx w...
Definition: MagicWord.php:386
pregRemoveAndRecord()
Used in matchAndRemove()
Definition: MagicWord.php:351
getSynonym( $i)
Accesses the synonym list directly.
Definition: MagicWord.php:423
replace( $replacement, $subject, $limit=-1)
Replaces the word with something else.
Definition: MagicWord.php:365
getBaseRegex()
regex without the slashes and what not
Definition: MagicWord.php:240
A collection of static methods to play with strings.
Definition: StringUtils.php:29
static escapeRegexReplacement( $string)
Escape a string to make it suitable for inclusion in a preg_replace() replacement parameter.