MediaWiki master
StringUtils.php
Go to the documentation of this file.
1<?php
2
4
5use ArrayIterator;
6use InvalidArgumentException;
7use Wikimedia\Assert\Assert;
9
35 public static function isUtf8( $value ) {
36 return mb_check_encoding( (string)$value, 'UTF-8' );
37 }
38
50 public static function delimiterExplode( $startDelim, $endDelim, $separator,
51 $subject, $nested = false ) {
52 $inputPos = 0;
53 $lastPos = 0;
54 $depth = 0;
55 $encStart = preg_quote( $startDelim, '!' );
56 $encEnd = preg_quote( $endDelim, '!' );
57 $encSep = preg_quote( $separator, '!' );
58 $len = strlen( $subject );
59 $m = [];
60 $exploded = [];
61 while (
62 $inputPos < $len &&
63 preg_match(
64 "!$encStart|$encEnd|$encSep!S", $subject, $m,
65 PREG_OFFSET_CAPTURE, $inputPos
66 )
67 ) {
68 $match = $m[0][0];
69 $matchPos = $m[0][1];
70 $inputPos = $matchPos + strlen( $match );
71 if ( $match === $separator ) {
72 if ( $depth === 0 ) {
73 $exploded[] = substr(
74 $subject, $lastPos, $matchPos - $lastPos
75 );
76 $lastPos = $inputPos;
77 }
78 } elseif ( $match === $startDelim ) {
79 if ( $depth === 0 || $nested ) {
80 $depth++;
81 }
82 } else {
83 $depth--;
84 }
85 }
86 $exploded[] = substr( $subject, $lastPos );
87 // This method could be rewritten in the future to avoid creating an
88 // intermediate array, since the return type is just an iterator.
89 return new ArrayIterator( $exploded );
90 }
91
109 public static function hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject ) {
110 $segments = explode( $startDelim, $subject );
111 $output = array_shift( $segments );
112 foreach ( $segments as $s ) {
113 $endDelimPos = strpos( $s, $endDelim );
114 if ( $endDelimPos === false ) {
115 $output .= $startDelim . $s;
116 } else {
117 $output .= $replace . substr( $s, $endDelimPos + strlen( $endDelim ) );
118 }
119 }
120
121 return $output;
122 }
123
147 private static function delimiterReplaceCallback( $startDelim, $endDelim, $callback,
148 $subject, $flags = ''
149 ) {
150 $inputPos = 0;
151 $outputPos = 0;
152 $contentPos = 0;
153 $output = '';
154 $foundStart = false;
155 $encStart = preg_quote( $startDelim, '!' );
156 $encEnd = preg_quote( $endDelim, '!' );
157 $strcmp = !str_contains( $flags, 'i' ) ? 'strcmp' : 'strcasecmp';
158 $endLength = strlen( $endDelim );
159 $m = [];
160
161 while ( $inputPos < strlen( $subject ) &&
162 preg_match( "!($encStart)|($encEnd)!S$flags", $subject, $m, PREG_OFFSET_CAPTURE, $inputPos )
163 ) {
164 $tokenOffset = $m[0][1];
165 if ( $m[1][0] != '' ) {
166 if ( $foundStart &&
167 $strcmp( $endDelim, substr( $subject, $tokenOffset, $endLength ) ) == 0
168 ) {
169 # An end match is present at the same location
170 $tokenType = 'end';
171 $tokenLength = $endLength;
172 } else {
173 $tokenType = 'start';
174 $tokenLength = strlen( $m[0][0] );
175 }
176 } elseif ( $m[2][0] != '' ) {
177 $tokenType = 'end';
178 $tokenLength = strlen( $m[0][0] );
179 } else {
180 throw new InvalidArgumentException( 'Invalid delimiter given to ' . __METHOD__ );
181 }
182
183 if ( $tokenType == 'start' ) {
184 # Only move the start position if we haven't already found a start
185 # This means that START START END matches outer pair
186 if ( !$foundStart ) {
187 # Found start
188 $inputPos = $tokenOffset + $tokenLength;
189 # Write out the non-matching section
190 $output .= substr( $subject, $outputPos, $tokenOffset - $outputPos );
191 $outputPos = $tokenOffset;
192 $contentPos = $inputPos;
193 $foundStart = true;
194 } else {
195 # Move the input position past the *first character* of START,
196 # to protect against missing END when it overlaps with START
197 $inputPos = $tokenOffset + 1;
198 }
199 } elseif ( $tokenType == 'end' ) {
200 if ( $foundStart ) {
201 # Found match
202 $output .= $callback( [
203 substr( $subject, $outputPos, $tokenOffset + $tokenLength - $outputPos ),
204 substr( $subject, $contentPos, $tokenOffset - $contentPos )
205 ] );
206 $foundStart = false;
207 } else {
208 # Non-matching end, write it out
209 $output .= substr( $subject, $inputPos, $tokenOffset + $tokenLength - $outputPos );
210 }
211 $inputPos = $outputPos = $tokenOffset + $tokenLength;
212 } else {
213 throw new InvalidArgumentException( 'Invalid delimiter given to ' . __METHOD__ );
214 }
215 }
216 if ( $outputPos < strlen( $subject ) ) {
217 $output .= substr( $subject, $outputPos );
218 }
219
220 return $output;
221 }
222
238 public static function delimiterReplace(
239 $startDelim, $endDelim, $replace, $subject, $flags = ''
240 ) {
241 return self::delimiterReplaceCallback(
242 $startDelim, $endDelim,
243 static function ( array $matches ) use ( $replace ) {
244 return strtr( $replace, [ '$0' => $matches[0], '$1' => $matches[1] ] );
245 },
246 $subject, $flags
247 );
248 }
249
258 public static function replaceMarkup( $search, $replace, $text ) {
259 $placeholder = "\x00";
260
261 // Remove placeholder instances
262 $text = str_replace( $placeholder, '', $text );
263
264 // Replace instances of the separator inside HTML-like tags with the placeholder
265 $cleaned = self::delimiterReplaceCallback(
266 '<', '>',
267 static function ( array $matches ) use ( $search, $placeholder ) {
268 return str_replace( $search, $placeholder, $matches[0] );
269 },
270 $text
271 );
272
273 // Explode, then put the replaced separators back in
274 $cleaned = str_replace( $search, $replace, $cleaned );
275 $text = str_replace( $placeholder, $search, $cleaned );
276
277 return $text;
278 }
279
289 public static function isValidPCRERegex( $string ) {
290 // @phan-suppress-next-line PhanParamSuspiciousOrder False positive
291 return @preg_match( $string, '' ) !== false; // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
292 }
293
301 public static function escapeRegexReplacement( $string ) {
302 $string = str_replace( '\\', '\\\\', $string );
303 return str_replace( '$', '\\$', $string );
304 }
305
313 public static function explode( $separator, $subject ) {
314 if ( substr_count( $subject, $separator ) > 1000 ) {
315 return new ExplodeIterator( $separator, $subject );
316 } else {
317 return new ArrayIterator( explode( $separator, $subject ) );
318 }
319 }
320
336 public static function unpack( string $format, string $data, $length = false ): array {
337 Assert::parameterType( [ 'integer', 'false' ], $length, '$length' );
338 if ( $length !== false ) {
339 $realLen = strlen( $data );
340 if ( $realLen < $length ) {
341 throw new UnpackFailedException( "Tried to unpack a "
342 . "string of length $realLen, but needed one "
343 . "of at least length $length."
344 );
345 }
346 }
347
348 // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
349 $result = @unpack( $format, $data );
350
351 if ( $result === false ) {
352 // If it cannot extract the packed data.
353 throw new UnpackFailedException( "unpack could not unpack binary data" );
354 }
355 return $result;
356 }
357}
358
360class_alias( StringUtils::class, 'StringUtils' );
An iterator which works exactly like:
A collection of static methods to play with strings.
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <....
static delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags='')
Perform an operation equivalent to preg_replace() with flags.
static delimiterExplode( $startDelim, $endDelim, $separator, $subject, $nested=false)
Explode a string, but ignore any instances of the separator inside the given start and end delimiters...
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
static hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject)
Perform an operation equivalent to preg_replace()
static escapeRegexReplacement( $string)
Escape a string to make it suitable for inclusion in a preg_replace() replacement parameter.
static isValidPCRERegex( $string)
Utility function to check if the given string is a valid PCRE regex.
static isUtf8( $value)
Test whether a string is valid UTF-8.
static unpack(string $format, string $data, $length=false)
Wrapper around php's unpack.