MediaWiki REL1_39
MagicWordArray.php
Go to the documentation of this file.
1<?php
2
26
33 public $names = [];
34
36 private $factory;
37
39 private $hash;
40
42 private $baseRegex;
43
45 private $regex;
46
51 public function __construct( $names = [], MagicWordFactory $factory = null ) {
52 $this->names = $names;
53 $this->factory = $factory ?: MediaWikiServices::getInstance()->getMagicWordFactory();
54 }
55
61 public function add( $name ) {
62 $this->names[] = $name;
63 $this->hash = $this->baseRegex = $this->regex = null;
64 }
65
71 public function addArray( $names ) {
72 $this->names = array_merge( $this->names, array_values( $names ) );
73 $this->hash = $this->baseRegex = $this->regex = null;
74 }
75
80 public function getHash() {
81 if ( $this->hash === null ) {
82 $this->hash = [ 0 => [], 1 => [] ];
83 foreach ( $this->names as $name ) {
84 $magic = $this->factory->get( $name );
85 $case = intval( $magic->isCaseSensitive() );
86 foreach ( $magic->getSynonyms() as $syn ) {
87 if ( !$case ) {
88 $syn = $this->factory->getContentLanguage()->lc( $syn );
89 }
90 $this->hash[$case][$syn] = $name;
91 }
92 }
93 }
94 return $this->hash;
95 }
96
107 public function getBaseRegex( bool $capture = true, string $delimiter = '/' ): array {
108 if ( $capture && $delimiter === '/' && $this->baseRegex !== null ) {
109 return $this->baseRegex;
110 }
111 $regex = [ 0 => [], 1 => [] ];
112 $allGroups = [];
113 foreach ( $this->names as $name ) {
114 $magic = $this->factory->get( $name );
115 $case = $magic->isCaseSensitive() ? 1 : 0;
116 foreach ( $magic->getSynonyms() as $i => $syn ) {
117 if ( $capture ) {
118 // Group name must start with a non-digit in PCRE 8.34+
119 $it = strtr( $i, '0123456789', 'abcdefghij' );
120 $groupName = $it . '_' . $name;
121 $group = '(?P<' . $groupName . '>' . preg_quote( $syn, $delimiter ) . ')';
122 // look for same group names to avoid same named subpatterns in the regex
123 if ( isset( $allGroups[$groupName] ) ) {
124 throw new MWException(
125 __METHOD__ . ': duplicate internal name in magic word array: ' . $name
126 );
127 }
128 $allGroups[$groupName] = true;
129 $regex[$case][] = $group;
130 } else {
131 $regex[$case][] = preg_quote( $syn, $delimiter );
132 }
133 }
134 }
135 '@phan-var array<int,string[]> $regex';
136 foreach ( $regex as $case => &$re ) {
137 $re = count( $re ) ? implode( '|', $re ) : '(?!)';
138 if ( !$case ) {
139 $re = "(?i:{$re})";
140 }
141 }
142 '@phan-var array<int,string> $regex';
143
144 if ( $capture && $delimiter === '/' ) {
145 $this->baseRegex = $regex;
146 }
147 return $regex;
148 }
149
155 public function getRegex() {
156 if ( $this->regex === null ) {
157 $this->regex = [];
158 $base = $this->getBaseRegex( true, '/' );
159 foreach ( $base as $case => $re ) {
160 $this->regex[$case] = "/{$re}/S";
161 }
162 // As a performance optimization, turn on unicode mode only for
163 // case-insensitive matching.
164 $this->regex[0] .= 'u';
165 }
166 return $this->regex;
167 }
168
176 public function getVariableRegex() {
177 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
178 }
179
186 public function getRegexStart() {
187 $newRegex = [];
188 $base = $this->getBaseRegex( true, '/' );
189 foreach ( $base as $case => $re ) {
190 $newRegex[$case] = "/^(?:{$re})/S";
191 }
192 // As a performance optimization, turn on unicode mode only for
193 // case-insensitive matching.
194 $newRegex[0] .= 'u';
195 return $newRegex;
196 }
197
204 public function getVariableStartToEndRegex() {
205 $newRegex = [];
206 $base = $this->getBaseRegex( true, '/' );
207 foreach ( $base as $case => $re ) {
208 $newRegex[$case] = str_replace( "\\$1", "(.*?)", "/^(?:{$re})$/S" );
209 }
210 // As a performance optimization, turn on unicode mode only for
211 // case-insensitive matching.
212 $newRegex[0] .= 'u';
213 return $newRegex;
214 }
215
220 public function getNames() {
221 return $this->names;
222 }
223
233 private function parseMatch( array $matches ): array {
234 $magicName = null;
235 foreach ( $matches as $key => $match ) {
236 if ( $magicName !== null ) {
237 // The structure we found at this point is [ …,
238 // 'a_magicWordName' => 'matchedSynonym',
239 // n => 'matchedSynonym (again)',
240 // n + 1 => 'parameterValue',
241 // … ]
242 return [ $magicName, $match, $matches[$key + 1] ?? false ];
243 }
244 // Skip the initial full match and any non-matching group
245 if ( $match !== '' && $key !== 0 ) {
246 $parts = explode( '_', $key, 2 );
247 if ( !isset( $parts[1] ) ) {
248 // This shouldn't happen
249 throw new MWException( __METHOD__ . ': Unexpected group name' );
250 }
251 $magicName = $parts[1];
252 }
253 }
254 // This shouldn't happen either
255 throw new MWException( __METHOD__ . ': parameter not found' );
256 }
257
268 public function matchVariableStartToEnd( $text ) {
269 $regexes = $this->getVariableStartToEndRegex();
270 foreach ( $regexes as $regex ) {
271 $m = [];
272 if ( preg_match( $regex, $text, $m ) ) {
273 [ $id, $alias, $param ] = $this->parseMatch( $m );
274 return [ $id, $param ];
275 }
276 }
277 return [ false, false ];
278 }
279
288 public function matchStartToEnd( $text ) {
289 $hash = $this->getHash();
290 if ( isset( $hash[1][$text] ) ) {
291 return $hash[1][$text];
292 }
293 $lc = $this->factory->getContentLanguage()->lc( $text );
294 return $hash[0][$lc] ?? false;
295 }
296
307 public function matchAndRemove( &$text, bool $returnAlias = false ): array {
308 $found = [];
309 $regexes = $this->getRegex();
310 $res = preg_replace_callback( $regexes, function ( $m ) use ( &$found, $returnAlias ) {
311 [ $name, $alias, $param ] = $this->parseMatch( $m );
312 $found[$name] = $returnAlias ? $alias : $param;
313 return '';
314 }, $text );
315 // T321234: Don't try to fix old revisions with broken UTF-8, just return $text as is
316 if ( $res !== null ) {
317 $text = $res;
318 }
319 return $found;
320 }
321
332 public function matchStartAndRemove( &$text ) {
333 $regexes = $this->getRegexStart();
334 foreach ( $regexes as $regex ) {
335 if ( preg_match( $regex, $text, $m ) ) {
336 list( $id, ) = $this->parseMatch( $m );
337 if ( strlen( $m[0] ) >= strlen( $text ) ) {
338 $text = '';
339 } else {
340 $text = substr( $text, strlen( $m[0] ) );
341 }
342 return $id;
343 }
344 }
345 return false;
346 }
347}
if(!defined('MW_SETUP_CALLBACK'))
The persistent session ID (if any) loaded at startup.
Definition WebStart.php:82
MediaWiki exception.
Class for handling an array of magic words.
getBaseRegex(bool $capture=true, string $delimiter='/')
Get the base regex.
matchVariableStartToEnd( $text)
Match some text, with parameter capture Returns an array with the magic word name in the first elemen...
add( $name)
Add a magic word by name.
getVariableRegex()
Get a regex for matching variables with parameters.
__construct( $names=[], MagicWordFactory $factory=null)
matchStartToEnd( $text)
Match some text, without parameter capture Returns the magic word name, or false if there was no capt...
matchAndRemove(&$text, bool $returnAlias=false)
Returns an associative array, ID => param value, for all items that match Removes the matched items f...
getRegexStart()
Get a regex anchored to the start of the string that does not match parameters.
matchStartAndRemove(&$text)
Return the ID of the magic word at the start of $text, and remove the prefix from $text.
getRegex()
Get an unanchored regex that does not match parameters.
getVariableStartToEndRegex()
Get an anchored regex for matching variables with parameters.
getHash()
Get a 2-d hashtable for this array.
addArray( $names)
Add a number of magic words by name.
A factory that stores information about MagicWords, and creates them on demand with caching.
Service locator for MediaWiki core services.