MediaWiki  1.23.0
StringUtils.php
Go to the documentation of this file.
1 <?php
26 class StringUtils {
51  static function isUtf8( $value, $disableMbstring = false ) {
52  $value = (string)$value;
53 
54  // If the mbstring extension is loaded, use it. However, before PHP 5.4, values above
55  // U+10FFFF are incorrectly allowed, so we have to check for them separately.
56  if ( !$disableMbstring && function_exists( 'mb_check_encoding' ) ) {
57  static $newPHP;
58  if ( $newPHP === null ) {
59  $newPHP = !mb_check_encoding( "\xf4\x90\x80\x80", 'UTF-8' );
60  }
61 
62  return mb_check_encoding( $value, 'UTF-8' ) &&
63  ( $newPHP || preg_match( "/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $value ) === 0 );
64  }
65 
66  if ( preg_match( "/[\x80-\xff]/S", $value ) === 0 ) {
67  // String contains only ASCII characters, has to be valid
68  return true;
69  }
70 
71  // PCRE implements repetition using recursion; to avoid a stack overflow (and segfault)
72  // for large input, we check for invalid sequences (<= 5 bytes) rather than valid
73  // sequences, which can be as long as the input string is. Multiple short regexes are
74  // used rather than a single long regex for performance.
75  static $regexes;
76  if ( $regexes === null ) {
77  $cont = "[\x80-\xbf]";
78  $after = "(?!$cont)"; // "(?:[^\x80-\xbf]|$)" would work here
79  $regexes = array(
80  // Continuation byte at the start
81  "/^$cont/",
82 
83  // ASCII byte followed by a continuation byte
84  "/[\\x00-\x7f]$cont/S",
85 
86  // Illegal byte
87  "/[\xc0\xc1\xf5-\xff]/S",
88 
89  // Invalid 2-byte sequence, or valid one then an extra continuation byte
90  "/[\xc2-\xdf](?!$cont$after)/S",
91 
92  // Invalid 3-byte sequence, or valid one then an extra continuation byte
93  "/\xe0(?![\xa0-\xbf]$cont$after)/",
94  "/[\xe1-\xec\xee\xef](?!$cont{2}$after)/S",
95  "/\xed(?![\x80-\x9f]$cont$after)/",
96 
97  // Invalid 4-byte sequence, or valid one then an extra continuation byte
98  "/\xf0(?![\x90-\xbf]$cont{2}$after)/",
99  "/[\xf1-\xf3](?!$cont{3}$after)/S",
100  "/\xf4(?![\x80-\x8f]$cont{2}$after)/",
101  );
102  }
103 
104  foreach ( $regexes as $regex ) {
105  if ( preg_match( $regex, $value ) !== 0 ) {
106  return false;
107  }
108  }
109 
110  return true;
111  }
112 
131  static function hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject ) {
132  $segments = explode( $startDelim, $subject );
133  $output = array_shift( $segments );
134  foreach ( $segments as $s ) {
135  $endDelimPos = strpos( $s, $endDelim );
136  if ( $endDelimPos === false ) {
137  $output .= $startDelim . $s;
138  } else {
139  $output .= $replace . substr( $s, $endDelimPos + strlen( $endDelim ) );
140  }
141  }
142 
143  return $output;
144  }
145 
168  static function delimiterReplaceCallback( $startDelim, $endDelim, $callback,
169  $subject, $flags = ''
170  ) {
171  $inputPos = 0;
172  $outputPos = 0;
173  $output = '';
174  $foundStart = false;
175  $encStart = preg_quote( $startDelim, '!' );
176  $encEnd = preg_quote( $endDelim, '!' );
177  $strcmp = strpos( $flags, 'i' ) === false ? 'strcmp' : 'strcasecmp';
178  $endLength = strlen( $endDelim );
179  $m = array();
180 
181  while ( $inputPos < strlen( $subject ) &&
182  preg_match( "!($encStart)|($encEnd)!S$flags", $subject, $m, PREG_OFFSET_CAPTURE, $inputPos )
183  ) {
184  $tokenOffset = $m[0][1];
185  if ( $m[1][0] != '' ) {
186  if ( $foundStart &&
187  $strcmp( $endDelim, substr( $subject, $tokenOffset, $endLength ) ) == 0
188  ) {
189  # An end match is present at the same location
190  $tokenType = 'end';
191  $tokenLength = $endLength;
192  } else {
193  $tokenType = 'start';
194  $tokenLength = strlen( $m[0][0] );
195  }
196  } elseif ( $m[2][0] != '' ) {
197  $tokenType = 'end';
198  $tokenLength = strlen( $m[0][0] );
199  } else {
200  throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
201  }
202 
203  if ( $tokenType == 'start' ) {
204  # Only move the start position if we haven't already found a start
205  # This means that START START END matches outer pair
206  if ( !$foundStart ) {
207  # Found start
208  $inputPos = $tokenOffset + $tokenLength;
209  # Write out the non-matching section
210  $output .= substr( $subject, $outputPos, $tokenOffset - $outputPos );
211  $outputPos = $tokenOffset;
212  $contentPos = $inputPos;
213  $foundStart = true;
214  } else {
215  # Move the input position past the *first character* of START,
216  # to protect against missing END when it overlaps with START
217  $inputPos = $tokenOffset + 1;
218  }
219  } elseif ( $tokenType == 'end' ) {
220  if ( $foundStart ) {
221  # Found match
222  $output .= call_user_func( $callback, array(
223  substr( $subject, $outputPos, $tokenOffset + $tokenLength - $outputPos ),
224  substr( $subject, $contentPos, $tokenOffset - $contentPos )
225  ) );
226  $foundStart = false;
227  } else {
228  # Non-matching end, write it out
229  $output .= substr( $subject, $inputPos, $tokenOffset + $tokenLength - $outputPos );
230  }
231  $inputPos = $outputPos = $tokenOffset + $tokenLength;
232  } else {
233  throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
234  }
235  }
236  if ( $outputPos < strlen( $subject ) ) {
237  $output .= substr( $subject, $outputPos );
238  }
239 
240  return $output;
241  }
242 
256  static function delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags = '' ) {
257  $replacer = new RegexlikeReplacer( $replace );
258 
259  return self::delimiterReplaceCallback( $startDelim, $endDelim,
260  $replacer->cb(), $subject, $flags );
261  }
262 
270  static function explodeMarkup( $separator, $text ) {
271  $placeholder = "\x00";
272 
273  // Remove placeholder instances
274  $text = str_replace( $placeholder, '', $text );
275 
276  // Replace instances of the separator inside HTML-like tags with the placeholder
277  $replacer = new DoubleReplacer( $separator, $placeholder );
278  $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text );
279 
280  // Explode, then put the replaced separators back in
281  $items = explode( $separator, $cleaned );
282  foreach ( $items as $i => $str ) {
283  $items[$i] = str_replace( $placeholder, $separator, $str );
284  }
285 
286  return $items;
287  }
288 
296  static function escapeRegexReplacement( $string ) {
297  $string = str_replace( '\\', '\\\\', $string );
298  $string = str_replace( '$', '\\$', $string );
299 
300  return $string;
301  }
302 
310  static function explode( $separator, $subject ) {
311  if ( substr_count( $subject, $separator ) > 1000 ) {
312  return new ExplodeIterator( $separator, $subject );
313  } else {
314  return new ArrayIterator( explode( $separator, $subject ) );
315  }
316  }
317 }
318 
323 class Replacer {
327  function cb() {
328  return array( &$this, 'replace' );
329  }
330 }
331 
336  private $r;
337 
341  function __construct( $r ) {
342  $this->r = $r;
343  }
344 
349  function replace( $matches ) {
350  $pairs = array();
351  foreach ( $matches as $i => $match ) {
352  $pairs["\$$i"] = $match;
353  }
354 
355  return strtr( $this->r, $pairs );
356  }
357 }
358 
362 class DoubleReplacer extends Replacer {
368  function __construct( $from, $to, $index = 0 ) {
369  $this->from = $from;
370  $this->to = $to;
371  $this->index = $index;
372  }
373 
378  function replace( $matches ) {
379  return str_replace( $this->from, $this->to, $matches[$this->index] );
380  }
381 }
382 
387  private $table, $index;
388 
393  function __construct( $table, $index = 0 ) {
394  $this->table = $table;
395  $this->index = $index;
396  }
397 
402  function replace( $matches ) {
403  return $this->table[$matches[$this->index]];
404  }
405 }
406 
412  private $data = false;
413  private $fss = false;
414 
420  function __construct( $data = array() ) {
421  $this->data = $data;
422  }
423 
427  function __sleep() {
428  return array( 'data' );
429  }
430 
431  function __wakeup() {
432  $this->fss = false;
433  }
434 
439  function setArray( $data ) {
440  $this->data = $data;
441  $this->fss = false;
442  }
443 
447  function getArray() {
448  return $this->data;
449  }
450 
456  function setPair( $from, $to ) {
457  $this->data[$from] = $to;
458  $this->fss = false;
459  }
460 
464  function mergeArray( $data ) {
465  $this->data = array_merge( $this->data, $data );
466  $this->fss = false;
467  }
468 
472  function merge( $other ) {
473  $this->data = array_merge( $this->data, $other->data );
474  $this->fss = false;
475  }
476 
480  function removePair( $from ) {
481  unset( $this->data[$from] );
482  $this->fss = false;
483  }
484 
488  function removeArray( $data ) {
489  foreach ( $data as $from => $to ) {
490  $this->removePair( $from );
491  }
492  $this->fss = false;
493  }
494 
499  function replace( $subject ) {
500  if ( function_exists( 'fss_prep_replace' ) ) {
501  wfProfileIn( __METHOD__ . '-fss' );
502  if ( $this->fss === false ) {
503  $this->fss = fss_prep_replace( $this->data );
504  }
505  $result = fss_exec_replace( $this->fss, $subject );
506  wfProfileOut( __METHOD__ . '-fss' );
507  } else {
508  wfProfileIn( __METHOD__ . '-strtr' );
509  $result = strtr( $subject, $this->data );
510  wfProfileOut( __METHOD__ . '-strtr' );
511  }
512 
513  return $result;
514  }
515 }
516 
526 class ExplodeIterator implements Iterator {
527  // The subject string
529 
530  // The delimiter
532 
533  // The position of the start of the line
534  private $curPos;
535 
536  // The position after the end of the next delimiter
537  private $endPos;
538 
539  // The current token
540  private $current;
541 
547  function __construct( $delim, $subject ) {
548  $this->subject = $subject;
549  $this->delim = $delim;
550 
551  // Micro-optimisation (theoretical)
552  $this->subjectLength = strlen( $subject );
553  $this->delimLength = strlen( $delim );
554 
555  $this->rewind();
556  }
557 
558  function rewind() {
559  $this->curPos = 0;
560  $this->endPos = strpos( $this->subject, $this->delim );
561  $this->refreshCurrent();
562  }
563 
564  function refreshCurrent() {
565  if ( $this->curPos === false ) {
566  $this->current = false;
567  } elseif ( $this->curPos >= $this->subjectLength ) {
568  $this->current = '';
569  } elseif ( $this->endPos === false ) {
570  $this->current = substr( $this->subject, $this->curPos );
571  } else {
572  $this->current = substr( $this->subject, $this->curPos, $this->endPos - $this->curPos );
573  }
574  }
575 
576  function current() {
577  return $this->current;
578  }
579 
583  function key() {
584  return $this->curPos;
585  }
586 
590  function next() {
591  if ( $this->endPos === false ) {
592  $this->curPos = false;
593  } else {
594  $this->curPos = $this->endPos + $this->delimLength;
595  if ( $this->curPos >= $this->subjectLength ) {
596  $this->endPos = false;
597  } else {
598  $this->endPos = strpos( $this->subject, $this->delim, $this->curPos );
599  }
600  }
601  $this->refreshCurrent();
602 
603  return $this->current;
604  }
605 
609  function valid() {
610  return $this->curPos !== false;
611  }
612 }
ExplodeIterator\$subjectLength
$subjectLength
Definition: StringUtils.php:528
$result
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message. Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item. $reader:XMLReader object $logInfo:Array of information Return false to stop further processing of the tag 'ImportHandlePageXMLTag':When parsing a XML tag in a page. $reader:XMLReader object $pageInfo:Array of information Return false to stop further processing of the tag 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision. $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information Return false to stop further processing of the tag 'ImportHandleToplevelXMLTag':When parsing a top level XML tag. $reader:XMLReader object Return false to stop further processing of the tag 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload. $reader:XMLReader object $revisionInfo:Array of information Return false to stop further processing of the tag 'InfoAction':When building information to display on the action=info page. $context:IContextSource object & $pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect. $title:Title object for the current page $request:WebRequest $ignoreRedirect:boolean to skip redirect check $target:Title/string of redirect target $article:Article object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not. Return true without providing an interwiki to continue interwiki search. $prefix:interwiki prefix we are looking for. & $iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InternalParseBeforeSanitize':during Parser 's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings. Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InternalParseBeforeLinks':during Parser 's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InvalidateEmailComplete':Called after a user 's email has been invalidated successfully. $user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification. Callee may modify $url and $query, URL will be constructed as $url . $query & $url:URL to index.php & $query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) $article:article(object) being checked 'IsTrustedProxy':Override the result of wfIsTrustedProxy() $ip:IP being check $result:Change this value to override the result of wfIsTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from & $allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of User::isValidEmailAddr(), for instance to return false if the domain name doesn 't match your organization. $addr:The e-mail address entered by the user & $result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user & $result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we 're looking for a messages file for & $file:The messages file path, you can override this to change the location. 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces. Do not use this hook to add namespaces. Use CanonicalNamespaces for that. & $namespaces:Array of namespaces indexed by their numbers 'LanguageGetMagic':DEPRECATED, use $magicWords in a file listed in $wgExtensionMessagesFiles instead. Use this to define synonyms of magic words depending of the language $magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetSpecialPageAliases':DEPRECATED, use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead. Use to define aliases of special pages names depending of the language $specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names. & $names:array of language code=> language name $code language of the preferred translations 'LanguageLinks':Manipulate a page 's language links. This is called in various places to allow extensions to define the effective language links for a page. $title:The page 's Title. & $links:Associative array mapping language codes to prefixed links of the form "language:title". & $linkFlags:Associative array mapping prefixed links to arrays of flags. Currently unused, but planned to provide support for marking individual language links in the UI, e.g. for featured articles. 'LinkBegin':Used when generating internal and interwiki links in Linker::link(), before processing starts. Return false to skip default processing and return $ret. See documentation for Linker::link() for details on the expected meanings of parameters. $skin:the Skin object $target:the Title that the link is pointing to & $html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1528
data
and how to run hooks for an and one after Each event has a preferably in CamelCase For ArticleDelete hook A clump of code and data that should be run when an event happens This can be either a function and a chunk of data
Definition: hooks.txt:6
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
StringUtils\hungryDelimiterReplace
static hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject)
Perform an operation equivalent to.
Definition: StringUtils.php:131
ExplodeIterator\$delim
$delim
Definition: StringUtils.php:531
ExplodeIterator\key
key()
Definition: StringUtils.php:583
StringUtils
A collection of static methods to play with strings.
Definition: StringUtils.php:26
ExplodeIterator\rewind
rewind()
Definition: StringUtils.php:558
wfProfileIn
wfProfileIn( $functionname)
Begin profiling of a function.
Definition: Profiler.php:33
$from
$from
Definition: importImages.php:90
StringUtils\escapeRegexReplacement
static escapeRegexReplacement( $string)
Escape a string to make it suitable for inclusion in a preg_replace() replacement parameter.
Definition: StringUtils.php:296
$s
$s
Definition: mergeMessageFileList.php:156
ReplacementArray\removeArray
removeArray( $data)
Definition: StringUtils.php:488
$flags
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2113
Replacer\cb
cb()
Definition: StringUtils.php:327
ReplacementArray\getArray
getArray()
Definition: StringUtils.php:447
ReplacementArray\mergeArray
mergeArray( $data)
Definition: StringUtils.php:464
ReplacementArray\__sleep
__sleep()
Definition: StringUtils.php:427
ExplodeIterator
An iterator which works exactly like:
Definition: StringUtils.php:526
StringUtils\explodeMarkup
static explodeMarkup( $separator, $text)
More or less "markup-safe" explode() Ignores any instances of the separator inside <....
Definition: StringUtils.php:270
RegexlikeReplacer\$r
$r
Definition: StringUtils.php:336
MWException
MediaWiki exception.
Definition: MWException.php:26
HashtableReplacer\replace
replace( $matches)
Definition: StringUtils.php:402
table
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs table
Definition: deferred.txt:11
StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition: StringUtils.php:310
HashtableReplacer\__construct
__construct( $table, $index=0)
Definition: StringUtils.php:393
wfProfileOut
wfProfileOut( $functionname='missing')
Stop profiling of a function.
Definition: Profiler.php:46
ReplacementArray\replace
replace( $subject)
Definition: StringUtils.php:499
Replacer
Base class for "replacers", objects used in preg_replace_callback() and StringUtils::delimiterReplace...
Definition: StringUtils.php:323
ExplodeIterator\$endPos
$endPos
Definition: StringUtils.php:537
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
ReplacementArray\__wakeup
__wakeup()
Definition: StringUtils.php:431
StringUtils\delimiterReplaceCallback
static delimiterReplaceCallback( $startDelim, $endDelim, $callback, $subject, $flags='')
Perform an operation equivalent to.
Definition: StringUtils.php:168
DoubleReplacer\replace
replace( $matches)
Definition: StringUtils.php:378
ReplacementArray\setArray
setArray( $data)
Set the whole replacement array at once.
Definition: StringUtils.php:439
HashtableReplacer\$index
$index
Definition: StringUtils.php:387
ExplodeIterator\valid
valid()
Definition: StringUtils.php:609
ReplacementArray
Replacement array for FSS with fallback to strtr() Supports lazy initialisation of FSS resource.
Definition: StringUtils.php:411
$matches
if(!defined( 'MEDIAWIKI')) if(!isset( $wgVersion)) $matches
Definition: NoLocalSettings.php:33
$value
$value
Definition: styleTest.css.php:45
RegexlikeReplacer\__construct
__construct( $r)
Definition: StringUtils.php:341
ExplodeIterator\refreshCurrent
refreshCurrent()
Definition: StringUtils.php:564
ExplodeIterator\next
next()
Definition: StringUtils.php:590
HashtableReplacer\$table
$table
Definition: StringUtils.php:387
ReplacementArray\$fss
$fss
Definition: StringUtils.php:413
ExplodeIterator\$subject
$subject
Definition: StringUtils.php:528
RegexlikeReplacer
Class to replace regex matches with a string similar to that used in preg_replace()
Definition: StringUtils.php:335
DoubleReplacer\__construct
__construct( $from, $to, $index=0)
Definition: StringUtils.php:368
HashtableReplacer
Class to perform replacement based on a simple hashtable lookup.
Definition: StringUtils.php:386
ExplodeIterator\__construct
__construct( $delim, $subject)
Construct a DelimIterator.
Definition: StringUtils.php:547
DoubleReplacer
Class to perform secondary replacement within each replacement string.
Definition: StringUtils.php:362
ReplacementArray\setPair
setPair( $from, $to)
Set an element of the replacement array.
Definition: StringUtils.php:456
$output
& $output
Definition: hooks.txt:375
ReplacementArray\$data
$data
Definition: StringUtils.php:412
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
ReplacementArray\merge
merge( $other)
Definition: StringUtils.php:472
StringUtils\delimiterReplace
static delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags='')
Perform an operation equivalent to.
Definition: StringUtils.php:256
ReplacementArray\removePair
removePair( $from)
Definition: StringUtils.php:480
StringUtils\isUtf8
static isUtf8( $value, $disableMbstring=false)
Test whether a string is valid UTF-8.
Definition: StringUtils.php:51
from
Please log in again after you receive it</td >< td > s a saved copy from
Definition: All_system_messages.txt:3297
ReplacementArray\__construct
__construct( $data=array())
Create an object with the specified replacement array The array should have the same form as the repl...
Definition: StringUtils.php:420
ExplodeIterator\$current
$current
Definition: StringUtils.php:540
ExplodeIterator\$delimLength
$delimLength
Definition: StringUtils.php:531
ExplodeIterator\current
current()
Definition: StringUtils.php:576
RegexlikeReplacer\replace
replace( $matches)
Definition: StringUtils.php:349
ExplodeIterator\$curPos
$curPos
Definition: StringUtils.php:534