MediaWiki master
MagicWordArray.php
Go to the documentation of this file.
1<?php
7namespace MediaWiki\Parser;
8
9use LogicException;
11
21
23 public $names = [];
24 private MagicWordFactory $factory;
25
27 private $hash;
28
30 private $baseRegex;
31
33 private $regex;
34
39 public function __construct( $names = [], ?MagicWordFactory $factory = null ) {
40 $this->names = $names;
41 $this->factory = $factory ?: MediaWikiServices::getInstance()->getMagicWordFactory();
42 }
43
49 public function add( $name ): void {
50 $this->names[] = $name;
51 $this->hash = $this->baseRegex = $this->regex = null;
52 }
53
59 public function getHash(): array {
60 if ( $this->hash === null ) {
61 $this->hash = [ 0 => [], 1 => [] ];
62 foreach ( $this->names as $name ) {
63 $magic = $this->factory->get( $name );
64 $case = intval( $magic->isCaseSensitive() );
65 foreach ( $magic->getSynonyms() as $syn ) {
66 if ( !$case ) {
67 $syn = $this->factory->getContentLanguage()->lc( $syn );
68 }
69 $this->hash[$case][$syn] = $name;
70 }
71 }
72 }
73 return $this->hash;
74 }
75
87 public function getBaseRegex( bool $capture = true, string $delimiter = '/' ): array {
88 if ( $capture && $delimiter === '/' && $this->baseRegex !== null ) {
89 return $this->baseRegex;
90 }
91 $regex = [ 0 => [], 1 => [] ];
92 foreach ( $this->names as $name ) {
93 $magic = $this->factory->get( $name );
94 $case = $magic->isCaseSensitive() ? 1 : 0;
95 foreach ( $magic->getSynonyms() as $i => $syn ) {
96 if ( $capture ) {
97 // Group name must start with a non-digit in PCRE 8.34+
98 $it = strtr( $i, '0123456789', 'abcdefghij' );
99 $groupName = $it . '_' . $name;
100 $group = '(?P<' . $groupName . '>' . preg_quote( $syn, $delimiter ) . ')';
101 $regex[$case][] = $group;
102 } else {
103 $regex[$case][] = preg_quote( $syn, $delimiter );
104 }
105 }
106 }
107 '@phan-var array<int,string[]> $regex';
108 foreach ( $regex as $case => &$re ) {
109 $re = count( $re ) ? implode( '|', $re ) : '(?!)';
110 if ( !$case ) {
111 $re = "(?i:{$re})";
112 }
113 }
114 '@phan-var array<int,string> $regex';
115
116 if ( $capture && $delimiter === '/' ) {
117 $this->baseRegex = $regex;
118 }
119 return $regex;
120 }
121
127 private function getRegex(): array {
128 if ( $this->regex === null ) {
129 $this->regex = [];
130 $base = $this->getBaseRegex( true, '/' );
131 foreach ( $base as $case => $re ) {
132 $this->regex[$case] = "/$re/JS";
133 }
134 // As a performance optimization, turn on unicode mode only for
135 // case-insensitive matching.
136 $this->regex[0] .= 'u';
137 }
138 return $this->regex;
139 }
140
146 private function getRegexStart(): array {
147 $newRegex = [];
148 $base = $this->getBaseRegex( true, '/' );
149 foreach ( $base as $case => $re ) {
150 $newRegex[$case] = "/^(?:$re)/JS";
151 }
152 // As a performance optimization, turn on unicode mode only for
153 // case-insensitive matching.
154 $newRegex[0] .= 'u';
155 return $newRegex;
156 }
157
163 private function getVariableStartToEndRegex(): array {
164 $newRegex = [];
165 $base = $this->getBaseRegex( true, '/' );
166 foreach ( $base as $case => $re ) {
167 $newRegex[$case] = str_replace( '\$1', '(.*?)', "/^(?:$re)$/JS" );
168 }
169 // As a performance optimization, turn on unicode mode only for
170 // case-insensitive matching.
171 $newRegex[0] .= 'u';
172 return $newRegex;
173 }
174
179 public function getNames() {
180 return $this->names;
181 }
182
190 private function parseMatch( array $matches ): array {
191 $magicName = null;
192 foreach ( $matches as $key => $match ) {
193 if ( $magicName !== null ) {
194 // The structure we found at this point is [ …,
195 // 'a_magicWordName' => 'matchedSynonym',
196 // n => 'matchedSynonym (again)',
197 // n + 1 => 'parameterValue',
198 // … ]
199 return [ $magicName, $match, $matches[$key + 1] ?? false ];
200 }
201 // Skip the initial full match and any non-matching group
202 if ( $match !== '' && $key !== 0 ) {
203 $parts = explode( '_', $key, 2 );
204 if ( !isset( $parts[1] ) ) {
205 throw new LogicException( 'Unexpected group name' );
206 }
207 $magicName = $parts[1];
208 }
209 }
210 throw new LogicException( 'Unexpected $m array with no match' );
211 }
212
220 public function matchVariableStartToEnd( $text ): array {
221 $regexes = $this->getVariableStartToEndRegex();
222 foreach ( $regexes as $regex ) {
223 $m = [];
224 if ( preg_match( $regex, $text, $m ) ) {
225 [ $id, $alias, $param ] = $this->parseMatch( $m );
226 return [ $id, $param ];
227 }
228 }
229 return [ false, false ];
230 }
231
239 public function matchStartToEnd( $text ) {
240 $hash = $this->getHash();
241 if ( isset( $hash[1][$text] ) ) {
242 return $hash[1][$text];
243 }
244 $lc = $this->factory->getContentLanguage()->lc( $text );
245 return $hash[0][$lc] ?? false;
246 }
247
261 public function matchAndRemove( &$text, bool $returnAlias = false ): array {
262 $found = [];
263 $regexes = $this->getRegex();
264 $res = preg_replace_callback( $regexes, function ( $m ) use ( &$found, $returnAlias ) {
265 [ $name, $alias, $param ] = $this->parseMatch( $m );
266 $found[$name] = $returnAlias ? $alias : $param;
267 return '';
268 }, $text );
269 // T321234: Don't try to fix old revisions with broken UTF-8, just return $text as is
270 if ( $res !== null ) {
271 $text = $res;
272 }
273 return $found;
274 }
275
286 public function matchStartAndRemove( &$text ) {
287 $regexes = $this->getRegexStart();
288 foreach ( $regexes as $regex ) {
289 if ( preg_match( $regex, $text, $m ) ) {
290 [ $id, ] = $this->parseMatch( $m );
291 if ( strlen( $m[0] ) >= strlen( $text ) ) {
292 $text = '';
293 } else {
294 $text = substr( $text, strlen( $m[0] ) );
295 }
296 return $id;
297 }
298 }
299 return false;
300 }
301}
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:68
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Class for handling an array of magic words.
matchVariableStartToEnd( $text)
Match some text, with parameter capture.
__construct( $names=[], ?MagicWordFactory $factory=null)
matchStartAndRemove(&$text)
Return the ID of the magic word at the start of $text, and remove the prefix from $text.
getBaseRegex(bool $capture=true, string $delimiter='/')
Get the base regex.
matchStartToEnd( $text)
Match some text, without parameter capture.
add( $name)
Add a magic word by name.
matchAndRemove(&$text, bool $returnAlias=false)
Return an associative array for all items that match.
getHash()
Get a 2-d hashtable for this array.
Store information about magic words, and create/cache MagicWord objects.