MediaWiki REL1_34
StringUtils.php
Go to the documentation of this file.
1<?php
2
3use Wikimedia\AtEase\AtEase;
4
44 static function isUtf8( $value ) {
45 return mb_check_encoding( (string)$value, 'UTF-8' );
46 }
47
59 static function delimiterExplode( $startDelim, $endDelim, $separator,
60 $subject, $nested = false ) {
61 $inputPos = 0;
62 $lastPos = 0;
63 $depth = 0;
64 $encStart = preg_quote( $startDelim, '!' );
65 $encEnd = preg_quote( $endDelim, '!' );
66 $encSep = preg_quote( $separator, '!' );
67 $len = strlen( $subject );
68 $m = [];
69 $exploded = [];
70 while (
71 $inputPos < $len &&
72 preg_match(
73 "!$encStart|$encEnd|$encSep!S", $subject, $m,
74 PREG_OFFSET_CAPTURE, $inputPos
75 )
76 ) {
77 $match = $m[0][0];
78 $matchPos = $m[0][1];
79 $inputPos = $matchPos + strlen( $match );
80 if ( $match === $separator ) {
81 if ( $depth === 0 ) {
82 $exploded[] = substr(
83 $subject, $lastPos, $matchPos - $lastPos
84 );
85 $lastPos = $inputPos;
86 }
87 } elseif ( $match === $startDelim ) {
88 if ( $depth === 0 || $nested ) {
89 $depth++;
90 }
91 } else {
92 $depth--;
93 }
94 }
95 $exploded[] = substr( $subject, $lastPos );
96 // This method could be rewritten in the future to avoid creating an
97 // intermediate array, since the return type is just an iterator.
98 return new ArrayIterator( $exploded );
99 }
100
118 static function hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject ) {
119 $segments = explode( $startDelim, $subject );
120 $output = array_shift( $segments );
121 foreach ( $segments as $s ) {
122 $endDelimPos = strpos( $s, $endDelim );
123 if ( $endDelimPos === false ) {
124 $output .= $startDelim . $s;
125 } else {
126 $output .= $replace . substr( $s, $endDelimPos + strlen( $endDelim ) );
127 }
128 }
129
130 return $output;
131 }
132
157 static function delimiterReplaceCallback( $startDelim, $endDelim, $callback,
158 $subject, $flags = ''
159 ) {
160 $inputPos = 0;
161 $outputPos = 0;
162 $contentPos = 0;
163 $output = '';
164 $foundStart = false;
165 $encStart = preg_quote( $startDelim, '!' );
166 $encEnd = preg_quote( $endDelim, '!' );
167 $strcmp = strpos( $flags, 'i' ) === false ? 'strcmp' : 'strcasecmp';
168 $endLength = strlen( $endDelim );
169 $m = [];
170
171 while ( $inputPos < strlen( $subject ) &&
172 preg_match( "!($encStart)|($encEnd)!S$flags", $subject, $m, PREG_OFFSET_CAPTURE, $inputPos )
173 ) {
174 $tokenOffset = $m[0][1];
175 if ( $m[1][0] != '' ) {
176 if ( $foundStart &&
177 $strcmp( $endDelim, substr( $subject, $tokenOffset, $endLength ) ) == 0
178 ) {
179 # An end match is present at the same location
180 $tokenType = 'end';
181 $tokenLength = $endLength;
182 } else {
183 $tokenType = 'start';
184 $tokenLength = strlen( $m[0][0] );
185 }
186 } elseif ( $m[2][0] != '' ) {
187 $tokenType = 'end';
188 $tokenLength = strlen( $m[0][0] );
189 } else {
190 throw new InvalidArgumentException( 'Invalid delimiter given to ' . __METHOD__ );
191 }
192
193 if ( $tokenType == 'start' ) {
194 # Only move the start position if we haven't already found a start
195 # This means that START START END matches outer pair
196 if ( !$foundStart ) {
197 # Found start
198 $inputPos = $tokenOffset + $tokenLength;
199 # Write out the non-matching section
200 $output .= substr( $subject, $outputPos, $tokenOffset - $outputPos );
201 $outputPos = $tokenOffset;
202 $contentPos = $inputPos;
203 $foundStart = true;
204 } else {
205 # Move the input position past the *first character* of START,
206 # to protect against missing END when it overlaps with START
207 $inputPos = $tokenOffset + 1;
208 }
209 } elseif ( $tokenType == 'end' ) {
210 if ( $foundStart ) {
211 # Found match
212 $output .= $callback( [
213 substr( $subject, $outputPos, $tokenOffset + $tokenLength - $outputPos ),
214 substr( $subject, $contentPos, $tokenOffset - $contentPos )
215 ] );
216 $foundStart = false;
217 } else {
218 # Non-matching end, write it out
219 $output .= substr( $subject, $inputPos, $tokenOffset + $tokenLength - $outputPos );
220 }
221 $inputPos = $outputPos = $tokenOffset + $tokenLength;
222 } else {
223 throw new InvalidArgumentException( 'Invalid delimiter given to ' . __METHOD__ );
224 }
225 }
226 if ( $outputPos < strlen( $subject ) ) {
227 $output .= substr( $subject, $outputPos );
228 }
229
230 return $output;
231 }
232
248 static function delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags = '' ) {
249 return self::delimiterReplaceCallback(
250 $startDelim, $endDelim,
251 function ( array $matches ) use ( $replace ) {
252 return strtr( $replace, [ '$0' => $matches[0], '$1' => $matches[1] ] );
253 },
254 $subject, $flags
255 );
256 }
257
265 static function explodeMarkup( $separator, $text ) {
266 $placeholder = "\x00";
267
268 // Remove placeholder instances
269 $text = str_replace( $placeholder, '', $text );
270
271 // Replace instances of the separator inside HTML-like tags with the placeholder
272 $cleaned = self::delimiterReplaceCallback(
273 '<', '>',
274 function ( array $matches ) use ( $separator, $placeholder ) {
275 return str_replace( $separator, $placeholder, $matches[0] );
276 },
277 $text
278 );
279
280 // Explode, then put the replaced separators back in
281 $items = explode( $separator, $cleaned );
282 foreach ( $items as $i => $str ) {
283 $items[$i] = str_replace( $placeholder, $separator, $str );
284 }
285
286 return $items;
287 }
288
297 static function replaceMarkup( $search, $replace, $text ) {
298 $placeholder = "\x00";
299
300 // Remove placeholder instances
301 $text = str_replace( $placeholder, '', $text );
302
303 // Replace instances of the separator inside HTML-like tags with the placeholder
304 $cleaned = self::delimiterReplaceCallback(
305 '<', '>',
306 function ( array $matches ) use ( $search, $placeholder ) {
307 return str_replace( $search, $placeholder, $matches[0] );
308 },
309 $text
310 );
311
312 // Explode, then put the replaced separators back in
313 $cleaned = str_replace( $search, $replace, $cleaned );
314 $text = str_replace( $placeholder, $search, $cleaned );
315
316 return $text;
317 }
318
328 public static function isValidPCRERegex( $string ) {
329 AtEase::suppressWarnings();
330 // @phan-suppress-next-line PhanParamSuspiciousOrder False positive
331 $isValid = preg_match( $string, '' );
332 AtEase::restoreWarnings();
333 return $isValid !== false;
334 }
335
343 static function escapeRegexReplacement( $string ) {
344 $string = str_replace( '\\', '\\\\', $string );
345 $string = str_replace( '$', '\\$', $string );
346 return $string;
347 }
348
356 static function explode( $separator, $subject ) {
357 if ( substr_count( $subject, $separator ) > 1000 ) {
358 return new ExplodeIterator( $separator, $subject );
359 } else {
360 return new ArrayIterator( explode( $separator, $subject ) );
361 }
362 }
363}
An iterator which works exactly like:
A collection of static methods to play with strings.
static hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject)
Perform an operation equivalent to preg_replace()
static delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags='')
Perform an operation equivalent to preg_replace() with flags.
static explodeMarkup( $separator, $text)
More or less "markup-safe" explode() Ignores any instances of the separator inside <....
static isValidPCRERegex( $string)
Utility function to check if the given string is a valid PCRE regex.
static delimiterReplaceCallback( $startDelim, $endDelim, $callback, $subject, $flags='')
Perform an operation equivalent to preg_replace_callback()
static escapeRegexReplacement( $string)
Escape a string to make it suitable for inclusion in a preg_replace() replacement parameter.
static isUtf8( $value)
Test whether a string is valid UTF-8.
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <....
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
static delimiterExplode( $startDelim, $endDelim, $separator, $subject, $nested=false)
Explode a string, but ignore any instances of the separator inside the given start and end delimiters...