MediaWiki  1.34.0
StringUtils.php
Go to the documentation of this file.
1 <?php
2 
3 use Wikimedia\AtEase\AtEase;
4 
29 class StringUtils {
44  static function isUtf8( $value ) {
45  return mb_check_encoding( (string)$value, 'UTF-8' );
46  }
47 
59  static function delimiterExplode( $startDelim, $endDelim, $separator,
60  $subject, $nested = false ) {
61  $inputPos = 0;
62  $lastPos = 0;
63  $depth = 0;
64  $encStart = preg_quote( $startDelim, '!' );
65  $encEnd = preg_quote( $endDelim, '!' );
66  $encSep = preg_quote( $separator, '!' );
67  $len = strlen( $subject );
68  $m = [];
69  $exploded = [];
70  while (
71  $inputPos < $len &&
72  preg_match(
73  "!$encStart|$encEnd|$encSep!S", $subject, $m,
74  PREG_OFFSET_CAPTURE, $inputPos
75  )
76  ) {
77  $match = $m[0][0];
78  $matchPos = $m[0][1];
79  $inputPos = $matchPos + strlen( $match );
80  if ( $match === $separator ) {
81  if ( $depth === 0 ) {
82  $exploded[] = substr(
83  $subject, $lastPos, $matchPos - $lastPos
84  );
85  $lastPos = $inputPos;
86  }
87  } elseif ( $match === $startDelim ) {
88  if ( $depth === 0 || $nested ) {
89  $depth++;
90  }
91  } else {
92  $depth--;
93  }
94  }
95  $exploded[] = substr( $subject, $lastPos );
96  // This method could be rewritten in the future to avoid creating an
97  // intermediate array, since the return type is just an iterator.
98  return new ArrayIterator( $exploded );
99  }
100 
118  static function hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject ) {
119  $segments = explode( $startDelim, $subject );
120  $output = array_shift( $segments );
121  foreach ( $segments as $s ) {
122  $endDelimPos = strpos( $s, $endDelim );
123  if ( $endDelimPos === false ) {
124  $output .= $startDelim . $s;
125  } else {
126  $output .= $replace . substr( $s, $endDelimPos + strlen( $endDelim ) );
127  }
128  }
129 
130  return $output;
131  }
132 
157  static function delimiterReplaceCallback( $startDelim, $endDelim, $callback,
158  $subject, $flags = ''
159  ) {
160  $inputPos = 0;
161  $outputPos = 0;
162  $contentPos = 0;
163  $output = '';
164  $foundStart = false;
165  $encStart = preg_quote( $startDelim, '!' );
166  $encEnd = preg_quote( $endDelim, '!' );
167  $strcmp = strpos( $flags, 'i' ) === false ? 'strcmp' : 'strcasecmp';
168  $endLength = strlen( $endDelim );
169  $m = [];
170 
171  while ( $inputPos < strlen( $subject ) &&
172  preg_match( "!($encStart)|($encEnd)!S$flags", $subject, $m, PREG_OFFSET_CAPTURE, $inputPos )
173  ) {
174  $tokenOffset = $m[0][1];
175  if ( $m[1][0] != '' ) {
176  if ( $foundStart &&
177  $strcmp( $endDelim, substr( $subject, $tokenOffset, $endLength ) ) == 0
178  ) {
179  # An end match is present at the same location
180  $tokenType = 'end';
181  $tokenLength = $endLength;
182  } else {
183  $tokenType = 'start';
184  $tokenLength = strlen( $m[0][0] );
185  }
186  } elseif ( $m[2][0] != '' ) {
187  $tokenType = 'end';
188  $tokenLength = strlen( $m[0][0] );
189  } else {
190  throw new InvalidArgumentException( 'Invalid delimiter given to ' . __METHOD__ );
191  }
192 
193  if ( $tokenType == 'start' ) {
194  # Only move the start position if we haven't already found a start
195  # This means that START START END matches outer pair
196  if ( !$foundStart ) {
197  # Found start
198  $inputPos = $tokenOffset + $tokenLength;
199  # Write out the non-matching section
200  $output .= substr( $subject, $outputPos, $tokenOffset - $outputPos );
201  $outputPos = $tokenOffset;
202  $contentPos = $inputPos;
203  $foundStart = true;
204  } else {
205  # Move the input position past the *first character* of START,
206  # to protect against missing END when it overlaps with START
207  $inputPos = $tokenOffset + 1;
208  }
209  } elseif ( $tokenType == 'end' ) {
210  if ( $foundStart ) {
211  # Found match
212  $output .= $callback( [
213  substr( $subject, $outputPos, $tokenOffset + $tokenLength - $outputPos ),
214  substr( $subject, $contentPos, $tokenOffset - $contentPos )
215  ] );
216  $foundStart = false;
217  } else {
218  # Non-matching end, write it out
219  $output .= substr( $subject, $inputPos, $tokenOffset + $tokenLength - $outputPos );
220  }
221  $inputPos = $outputPos = $tokenOffset + $tokenLength;
222  } else {
223  throw new InvalidArgumentException( 'Invalid delimiter given to ' . __METHOD__ );
224  }
225  }
226  if ( $outputPos < strlen( $subject ) ) {
227  $output .= substr( $subject, $outputPos );
228  }
229 
230  return $output;
231  }
232 
248  static function delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags = '' ) {
250  $startDelim, $endDelim,
251  function ( array $matches ) use ( $replace ) {
252  return strtr( $replace, [ '$0' => $matches[0], '$1' => $matches[1] ] );
253  },
254  $subject, $flags
255  );
256  }
257 
265  static function explodeMarkup( $separator, $text ) {
266  $placeholder = "\x00";
267 
268  // Remove placeholder instances
269  $text = str_replace( $placeholder, '', $text );
270 
271  // Replace instances of the separator inside HTML-like tags with the placeholder
273  '<', '>',
274  function ( array $matches ) use ( $separator, $placeholder ) {
275  return str_replace( $separator, $placeholder, $matches[0] );
276  },
277  $text
278  );
279 
280  // Explode, then put the replaced separators back in
281  $items = explode( $separator, $cleaned );
282  foreach ( $items as $i => $str ) {
283  $items[$i] = str_replace( $placeholder, $separator, $str );
284  }
285 
286  return $items;
287  }
288 
297  static function replaceMarkup( $search, $replace, $text ) {
298  $placeholder = "\x00";
299 
300  // Remove placeholder instances
301  $text = str_replace( $placeholder, '', $text );
302 
303  // Replace instances of the separator inside HTML-like tags with the placeholder
305  '<', '>',
306  function ( array $matches ) use ( $search, $placeholder ) {
307  return str_replace( $search, $placeholder, $matches[0] );
308  },
309  $text
310  );
311 
312  // Explode, then put the replaced separators back in
313  $cleaned = str_replace( $search, $replace, $cleaned );
314  $text = str_replace( $placeholder, $search, $cleaned );
315 
316  return $text;
317  }
318 
328  public static function isValidPCRERegex( $string ) {
329  AtEase::suppressWarnings();
330  // @phan-suppress-next-line PhanParamSuspiciousOrder False positive
331  $isValid = preg_match( $string, '' );
332  AtEase::restoreWarnings();
333  return $isValid !== false;
334  }
335 
343  static function escapeRegexReplacement( $string ) {
344  $string = str_replace( '\\', '\\\\', $string );
345  $string = str_replace( '$', '\\$', $string );
346  return $string;
347  }
348 
356  static function explode( $separator, $subject ) {
357  if ( substr_count( $subject, $separator ) > 1000 ) {
358  return new ExplodeIterator( $separator, $subject );
359  } else {
360  return new ArrayIterator( explode( $separator, $subject ) );
361  }
362  }
363 }
StringUtils\isUtf8
static isUtf8( $value)
Test whether a string is valid UTF-8.
Definition: StringUtils.php:44
StringUtils\hungryDelimiterReplace
static hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject)
Perform an operation equivalent to preg_replace()
Definition: StringUtils.php:118
StringUtils
A collection of static methods to play with strings.
Definition: StringUtils.php:29
StringUtils\escapeRegexReplacement
static escapeRegexReplacement( $string)
Escape a string to make it suitable for inclusion in a preg_replace() replacement parameter.
Definition: StringUtils.php:343
$s
$s
Definition: mergeMessageFileList.php:185
StringUtils\isValidPCRERegex
static isValidPCRERegex( $string)
Utility function to check if the given string is a valid PCRE regex.
Definition: StringUtils.php:328
StringUtils\replaceMarkup
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <....
Definition: StringUtils.php:297
ExplodeIterator
An iterator which works exactly like:
Definition: ExplodeIterator.php:30
StringUtils\explodeMarkup
static explodeMarkup( $separator, $text)
More or less "markup-safe" explode() Ignores any instances of the separator inside <....
Definition: StringUtils.php:265
$matches
$matches
Definition: NoLocalSettings.php:24
StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition: StringUtils.php:356
$output
$output
Definition: SyntaxHighlight.php:335
StringUtils\delimiterReplaceCallback
static delimiterReplaceCallback( $startDelim, $endDelim, $callback, $subject, $flags='')
Perform an operation equivalent to preg_replace_callback()
Definition: StringUtils.php:157
StringUtils\delimiterExplode
static delimiterExplode( $startDelim, $endDelim, $separator, $subject, $nested=false)
Explode a string, but ignore any instances of the separator inside the given start and end delimiters...
Definition: StringUtils.php:59
StringUtils\delimiterReplace
static delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags='')
Perform an operation equivalent to preg_replace() with flags.
Definition: StringUtils.php:248