MediaWiki master
StripState.php
Go to the documentation of this file.
1<?php
10namespace MediaWiki\Parser;
11
12use Closure;
13use InvalidArgumentException;
14use Wikimedia\Parsoid\Fragments\PFragment;
15
23 protected $data;
25 protected $extra;
27 protected $regex;
28
29 protected ?Parser $parser;
30
34 protected $depth = 0;
36 protected $highestDepth = 0;
38 protected $expandSize = 0;
39
41 protected $depthLimit = 20;
43 protected $sizeLimit = 5_000_000;
44
51 public function __construct( ?Parser $parser = null, $options = [] ) {
52 $this->data = [
53 'nowiki' => [],
54 'general' => []
55 ];
56 $this->extra = [];
57 $this->regex = '/' . Parser::MARKER_PREFIX . "([^\x7f<>&'\"]+)" . Parser::MARKER_SUFFIX . '/';
58 $this->circularRefGuard = [];
59 $this->parser = $parser;
60
61 if ( isset( $options['depthLimit'] ) ) {
62 $this->depthLimit = $options['depthLimit'];
63 }
64 if ( isset( $options['sizeLimit'] ) ) {
65 $this->sizeLimit = $options['sizeLimit'];
66 }
67 }
68
76 public function addNoWiki( $marker, $value, ?string $extra = null ) {
77 $this->addItem( 'nowiki', $marker, $value, $extra );
78 }
79
84 public function addGeneral( $marker, $value ) {
85 $this->addItem( 'general', $marker, $value );
86 }
87
94 public function addExtTag( $marker, $value ) {
95 $this->addItem( 'exttag', $marker, $value );
96 }
97
104 public function addParsoidOpaque( $marker, PFragment $extra ) {
105 $this->addItem( 'parsoid', $marker, '<parsoid opaque>', $extra );
106 }
107
117 protected function addItem( $type, $marker, $value, $extra = null ) {
118 if ( !preg_match( $this->regex, $marker, $m ) ) {
119 throw new InvalidArgumentException( "Invalid marker: $marker" );
120 }
121
122 $this->data[$type][$m[1]] = $value;
123 if ( $extra !== null ) {
124 $this->extra[$type][$m[1]] = $extra;
125 }
126 }
127
132 public function unstripGeneral( $text ) {
133 return $this->unstripType( 'general', $text );
134 }
135
140 public function unstripNoWiki( $text ) {
141 return $this->unstripType( 'nowiki', $text );
142 }
143
150 public function replaceNoWikis( string $text, callable $callback ): string {
151 // Shortcut
152 if ( !count( $this->data['nowiki'] ) ) {
153 return $text;
154 }
155
156 $callback = function ( $m ) use ( $callback ) {
157 $marker = $m[1];
158 if ( isset( $this->data['nowiki'][$marker] ) ) {
159 $value = $this->data['nowiki'][$marker];
160 if ( $value instanceof Closure ) {
161 $value = $value();
162 }
163
164 $this->expandSize += strlen( $value );
165 if ( $this->expandSize > $this->sizeLimit ) {
166 return $this->getLimitationWarning( 'unstrip-size', $this->sizeLimit );
167 }
168
169 return $callback( $value );
170 } else {
171 return $m[0];
172 }
173 };
174
175 return preg_replace_callback( $this->regex, $callback, $text );
176 }
177
186 public function split( string $text ): array {
187 $result = [];
188 $pieces = preg_split( $this->regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
189 for ( $i = 0; $i < count( $pieces ); $i++ ) {
190 if ( $i % 2 === 0 ) {
191 $result[] = [
192 'type' => 'string',
193 'content' => $pieces[$i],
194 ];
195 continue;
196 }
197 $marker = $pieces[$i];
198 foreach ( $this->data as $type => $items ) {
199 if ( isset( $items[$marker] ) ) {
200 $value = $items[$marker];
201 $extra = $this->extra[$type][$marker] ?? null;
202 if ( $value instanceof Closure ) {
203 $value = $value();
204 }
205
206 if ( $type === 'exttag' ) {
207 // Catch circular refs / enforce depth limits
208 // similar to code in unstripType().
209 if ( isset( $this->circularRefGuard[$marker] ) ) {
210 $result[] = [
211 'type' => 'string',
212 'content' => $this->getWarning( 'parser-unstrip-loop-warning' )
213 ];
214 continue;
215 }
216
217 if ( $this->depth > $this->highestDepth ) {
218 $this->highestDepth = $this->depth;
219 }
220 if ( $this->depth >= $this->depthLimit ) {
221 $result[] = [
222 'type' => 'string',
223 'content' => $this->getLimitationWarning( 'unstrip-depth', $this->depthLimit )
224 ];
225 continue;
226 }
227
228 // For exttag types, the output size should include the output of
229 // the extension, but don't think unstripType is doing that, and so
230 // we aren't doing that either here. But this is kinda broken.
231 // See T380758#10355050.
232 $this->expandSize += strlen( $value );
233 if ( $this->expandSize > $this->sizeLimit ) {
234 $result[] = [
235 'type' => 'string',
236 'content' => $this->getLimitationWarning( 'unstrip-size', $this->sizeLimit )
237 ];
238 continue;
239 }
240
241 $this->circularRefGuard[$marker] = true;
242 $this->depth++;
243 $result = array_merge( $result, $this->split( $value ) );
244 $this->depth--;
245 unset( $this->circularRefGuard[$marker] );
246 } else {
247 $result[] = [
248 'type' => $type,
249 'content' => $value,
250 'extra' => $extra,
251 'marker' => Parser::MARKER_PREFIX . $marker . Parser::MARKER_SUFFIX,
252 ];
253 }
254 continue 2;
255 }
256 }
257 $result[] = [
258 'type' => 'unknown',
259 'content' => null,
260 'marker' => Parser::MARKER_PREFIX . $marker . Parser::MARKER_SUFFIX,
261 ];
262 }
263 return $result;
264 }
265
270 public function unstripBoth( $text ) {
271 $text = $this->unstripType( 'general', $text );
272 $text = $this->unstripType( 'nowiki', $text );
273 return $text;
274 }
275
281 protected function unstripType( $type, $text ) {
282 // Shortcut
283 if ( !count( $this->data[$type] ) ) {
284 return $text;
285 }
286
287 $callback = function ( $m ) use ( $type ) {
288 $marker = $m[1];
289 if ( isset( $this->data[$type][$marker] ) ) {
290 if ( isset( $this->circularRefGuard[$marker] ) ) {
291 return $this->getWarning( 'parser-unstrip-loop-warning' );
292 }
293
294 if ( $this->depth > $this->highestDepth ) {
295 $this->highestDepth = $this->depth;
296 }
297 if ( $this->depth >= $this->depthLimit ) {
298 return $this->getLimitationWarning( 'unstrip-depth', $this->depthLimit );
299 }
300
301 $value = $this->data[$type][$marker];
302 if ( $value instanceof Closure ) {
303 $value = $value();
304 }
305
306 $this->expandSize += strlen( $value );
307 if ( $this->expandSize > $this->sizeLimit ) {
308 return $this->getLimitationWarning( 'unstrip-size', $this->sizeLimit );
309 }
310
311 $this->circularRefGuard[$marker] = true;
312 $this->depth++;
313 $ret = $this->unstripType( $type, $value );
314 $this->depth--;
315 unset( $this->circularRefGuard[$marker] );
316
317 return $ret;
318 } else {
319 return $m[0];
320 }
321 };
322
323 $text = preg_replace_callback( $this->regex, $callback, $text );
324 return $text;
325 }
326
334 private function getLimitationWarning( $type, $max = '' ) {
335 if ( $this->parser ) {
336 $this->parser->limitationWarn( $type, $max );
337 }
338 return $this->getWarning( "$type-warning", $max );
339 }
340
348 private function getWarning( $message, $max = '' ) {
349 return '<span class="error">' .
350 wfMessage( $message )
351 ->numParams( $max )->inContentLanguage()->text() .
352 '</span>';
353 }
354
361 public function getLimitReport() {
362 return [
363 [ 'limitreport-unstrip-depth',
364 [
365 $this->highestDepth,
366 $this->depthLimit
367 ],
368 ],
369 [ 'limitreport-unstrip-size',
370 [
371 $this->expandSize,
372 $this->sizeLimit
373 ],
374 ]
375 ];
376 }
377
384 public function killMarkers( $text ) {
385 return preg_replace( $this->regex, '', $text );
386 }
387}
388
390class_alias( StripState::class, 'StripState' );
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:68
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition Parser.php:135
__construct(?Parser $parser=null, $options=[])
addItem( $type, $marker, $value, $extra=null)
replaceNoWikis(string $text, callable $callback)
addParsoidOpaque( $marker, PFragment $extra)
addNoWiki( $marker, $value, ?string $extra=null)
Add a nowiki strip item.
killMarkers( $text)
Remove any strip markers found in the given text.
getLimitReport()
Get an array of parameters to pass to ParserOutput::setLimitReportData()
addGeneral( $marker, $value)
addExtTag( $marker, $value)
split(string $text)
Split the given text by strip markers, returning an array of [ 'type' => ..., 'content' => ....