28 use RemexHtml\HTMLData;
42 '/&([A-Za-z0-9\x80-\xff]+;)
44 |&\#[xX]([0-9A-Fa-f]+);
101 if ( self::$attribsRegex ===
null ) {
102 $spaceChars =
'\x09\x0a\x0c\x0d\x20';
103 $space =
"[{$spaceChars}]";
104 $attrib =
"[^{$spaceChars}\/>=]";
105 $attribFirst =
"(?:{$attrib}|=)";
106 self::$attribsRegex =
107 "/({$attribFirst}{$attrib}*)
110 # The attribute value: quoted or alone
130 if ( self::$attribNameRegex ===
null ) {
131 $attribFirst =
"[:_\p{L}\p{N}]";
132 $attrib =
"[:_\.\-\p{L}\p{N}]";
133 self::$attribNameRegex =
"/^({$attribFirst}{$attrib}*)$/sxu";
147 static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
148 $htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised;
153 if ( !$staticInitialised || $staticInitialised != $globalContext ) {
154 $htmlpairsStatic = [ # Tags that must be closed
155 'b',
'bdi',
'del',
'i',
'ins',
'u',
'font',
'big',
'small',
'sub',
'sup',
'h1',
156 'h2',
'h3',
'h4',
'h5',
'h6',
'cite',
'code',
'em',
's',
157 'strike',
'strong',
'tt',
'var',
'div',
'center',
158 'blockquote',
'ol',
'ul',
'dl',
'table',
'caption',
'pre',
159 'ruby',
'rb',
'rp',
'rt',
'rtc',
'p',
'span',
'abbr',
'dfn',
160 'kbd',
'samp',
'data',
'time',
'mark'
163 'br',
'wbr',
'hr',
'li',
'dt',
'dd',
'meta',
'link'
166 # Elements that cannot have close tags. This is (not coincidentally)
167 # also the list of tags for which the HTML 5 parsing algorithm
168 # requires you to "acknowledge the token's self-closing flag", i.e.
169 # a self-closing tag like <br/> is not an HTML 5 parse error only
172 'br',
'wbr',
'hr',
'meta',
'link'
175 $htmlnest = [ # Tags that can be nested--??
176 'table',
'tr',
'td',
'th',
'div',
'blockquote',
'ol',
'ul',
177 'li',
'dl',
'dt',
'dd',
'font',
'big',
'small',
'sub',
'sup',
'span',
178 'var',
'kbd',
'samp',
'em',
'strong',
'q',
'ruby',
'bdo'
180 $tabletags = [ # Can only appear inside table, we will close them
183 $htmllist = [ # Tags used by list
186 $listtags = [ # Tags that can appear in a list
192 'is deprecated since MediaWiki 1.35',
'1.35',
false,
false );
193 $htmlsingle[] =
'img';
194 $htmlsingleonly[] =
'img';
197 $htmlsingleallowed = array_unique( array_merge( $htmlsingle, $tabletags ) );
198 $htmlelementsStatic = array_unique( array_merge( $htmlsingle, $htmlpairsStatic, $htmlnest ) );
200 # Convert them all to hashtables for faster lookup
201 $vars = [
'htmlpairsStatic',
'htmlsingle',
'htmlsingleonly',
'htmlnest',
'tabletags',
202 'htmllist',
'listtags',
'htmlsingleallowed',
'htmlelementsStatic' ];
203 foreach ( $vars as $var ) {
204 $$var = array_flip( $$var );
206 $staticInitialised = $globalContext;
209 # Populate $htmlpairs and $htmlelements with the $extratags and $removetags arrays
210 $extratags = array_flip( $extratags );
211 $removetags = array_flip( $removetags );
212 $htmlpairs = array_merge( $extratags, $htmlpairsStatic );
213 $htmlelements = array_diff_key( array_merge( $extratags, $htmlelementsStatic ), $removetags );
216 'htmlpairs' => $htmlpairs,
217 'htmlsingle' => $htmlsingle,
218 'htmlsingleonly' => $htmlsingleonly,
219 'htmlnest' => $htmlnest,
220 'tabletags' => $tabletags,
221 'htmllist' => $htmllist,
222 'listtags' => $listtags,
223 'htmlsingleallowed' => $htmlsingleallowed,
224 'htmlelements' => $htmlelements,
240 $args = [], $extratags = [], $removetags = []
243 $htmlpairs = $tagData[
'htmlpairs'];
244 $htmlsingle = $tagData[
'htmlsingle'];
245 $htmlsingleonly = $tagData[
'htmlsingleonly'];
246 $htmlnest = $tagData[
'htmlnest'];
247 $tabletags = $tagData[
'tabletags'];
248 $htmllist = $tagData[
'htmllist'];
249 $listtags = $tagData[
'listtags'];
250 $htmlsingleallowed = $tagData[
'htmlsingleallowed'];
251 $htmlelements = $tagData[
'htmlelements'];
253 # Remove HTML comments
255 $bits = explode(
'<', $text );
256 $text = str_replace(
'>',
'>', array_shift( $bits ) );
258 # this might be possible using remex tidy itself
259 foreach ( $bits as $x ) {
260 if ( preg_match( self::ELEMENT_BITS_REGEX, $x, $regs ) ) {
261 list( , $slash,
$t, $params, $brace, $rest ) = $regs;
264 $t = strtolower(
$t );
265 if ( isset( $htmlelements[
$t] ) ) {
266 if ( is_callable( $processCallback ) ) {
267 call_user_func_array( $processCallback, [ &$params,
$args ] );
270 if ( $brace ==
'/>' && !( isset( $htmlsingle[
$t] ) || isset( $htmlsingleonly[
$t] ) ) ) {
275 if ( !self::validateTag( $params,
$t ) ) {
281 if ( $brace ===
'/>' && !isset( $htmlsingleonly[
$t] ) ) {
282 # Interpret self-closing tags as empty tags even when
283 # HTML 5 would interpret them as start tags. Such input
284 # is commonly seen on Wikimedia wikis with this intention.
288 $rest = str_replace(
'>',
'>', $rest );
289 $text .=
"<$slash$t$newparams$brace$rest";
294 $text .=
'<' . str_replace(
'>',
'>', $x );
309 while ( ( $start = strpos( $text,
'<!--' ) ) !==
false ) {
310 $end = strpos( $text,
'-->', $start + 4 );
311 if ( $end ===
false ) {
312 # Unterminated comment; bail out
318 # Trim space and newline if the comment is both
319 # preceded and followed by a newline
320 $spaceStart = max( $start - 1, 0 );
321 $spaceLen = $end - $spaceStart;
322 while ( substr( $text, $spaceStart, 1 ) ===
' ' && $spaceStart > 0 ) {
326 while ( substr( $text, $spaceStart + $spaceLen, 1 ) ===
' ' ) {
329 if ( substr( $text, $spaceStart, 1 ) ===
"\n"
330 && substr( $text, $spaceStart + $spaceLen, 1 ) ===
"\n" ) {
331 # Remove the comment, leading and trailing
332 # spaces, and leave only one newline.
333 $text = substr_replace( $text,
"\n", $spaceStart, $spaceLen + 1 );
335 # Remove just the comment.
336 $text = substr_replace( $text,
'', $start, $end - $start );
357 if ( $element ==
'meta' || $element ==
'link' ) {
358 if ( !isset( $params[
'itemprop'] ) ) {
362 if ( $element ==
'meta' && !isset( $params[
'content'] ) ) {
366 if ( $element ==
'link' && !isset( $params[
'href'] ) ) {
392 self::attributesAllowedInternal( $element ) );
414 if ( isset( $allowed[0] ) ) {
417 wfDeprecated( __METHOD__ .
' with sequential array',
'1.35' );
418 $allowed = array_flip( $allowed );
423 foreach ( $attribs as $attribute => $value ) {
424 # Allow XML namespace declaration to allow RDFa
425 if ( preg_match( self::XMLNS_ATTRIBUTE_PATTERN, $attribute ) ) {
426 if ( !preg_match( self::EVIL_URI_PATTERN, $value ) ) {
427 $out[$attribute] = $value;
433 # Allow any attribute beginning with "data-"
435 # * Disallow data attributes used by MediaWiki code
436 # * Ensure that the attribute is not namespaced by banning
439 !preg_match(
'/^data-[^:]*$/i', $attribute ) &&
440 !array_key_exists( $attribute, $allowed )
441 ) || self::isReservedDataAttribute( $attribute ) ) {
445 # Strip javascript "expression" from stylesheets.
446 # https://msdn.microsoft.com/en-us/library/ms537634.aspx
447 if ( $attribute ==
'style' ) {
451 # Escape HTML id attributes
452 if ( $attribute ===
'id' ) {
456 # Escape HTML id reference lists
457 if ( $attribute ===
'aria-describedby'
458 || $attribute ===
'aria-flowto'
459 || $attribute ===
'aria-labelledby'
460 || $attribute ===
'aria-owns'
467 if ( $attribute ===
'rel' || $attribute ===
'rev'
469 || $attribute ===
'about' || $attribute ===
'property'
470 || $attribute ===
'resource' || $attribute ===
'datatype'
471 || $attribute ===
'typeof'
473 || $attribute ===
'itemid' || $attribute ===
'itemprop'
474 || $attribute ===
'itemref' || $attribute ===
'itemscope'
475 || $attribute ===
'itemtype'
478 if ( preg_match( self::EVIL_URI_PATTERN, $value ) ) {
483 # NOTE: even though elements using href/src are not allowed directly, supply
484 # validation code that can be used by tag hook handlers, etc
485 if ( $attribute ===
'href' || $attribute ===
'src' || $attribute ===
'poster' ) {
486 if ( !preg_match( $hrefExp, $value ) ) {
492 if ( $attribute ===
'tabindex' && $value !==
'0' ) {
499 $out[$attribute] = $value;
502 # itemtype, itemid, itemref don't make sense without itemscope
503 if ( !array_key_exists(
'itemscope', $out ) ) {
504 unset( $out[
'itemtype'] );
505 unset( $out[
'itemid'] );
506 unset( $out[
'itemref'] );
508 # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref.
528 return (
bool)preg_match(
'/^data-(ooui|mw|parsoid)/i', $attr );
542 $out = array_merge( $a, $b );
543 if ( isset( $a[
'class'] ) && isset( $b[
'class'] )
544 && is_string( $a[
'class'] ) && is_string( $b[
'class'] )
545 && $a[
'class'] !== $b[
'class']
547 $classes = preg_split(
'/\s+/',
"{$a['class']} {$b['class']}",
548 -1, PREG_SPLIT_NO_EMPTY );
549 $out[
'class'] = implode(
' ', array_unique( $classes ) );
576 if ( !$decodeRegex ) {
577 $space =
'[\\x20\\t\\r\\n\\f]';
578 $nl =
'(?:\\n|\\r\\n|\\r|\\f)';
580 $decodeRegex =
"/ $backslash
582 ($nl) | # 1. Line continuation
583 ([0-9A-Fa-f]{1,6})$space? | # 2. character number
584 (.) | # 3. backslash cancelling special meaning
585 () | # 4. backslash at end of string
588 $value = preg_replace_callback( $decodeRegex,
589 [ __CLASS__,
'cssDecodeCallback' ], $value );
594 if ( !preg_match(
'! ^ \s* /\* [^*\\/]* \*/ \s* $ !x', $value ) ) {
605 $commentPos = strpos( $value,
'/*' );
606 if ( $commentPos !==
false ) {
607 $value = substr( $value, 0, $commentPos );
636 if ( preg_match(
'/[\000-\010\013\016-\037\177]/', $value ) ||
637 strpos( $value, UtfNormal\Constants::UTF8_REPLACEMENT ) !==
false ) {
638 return '/* invalid control char */';
639 } elseif ( preg_match(
649 | attr\s*\([^)]+[\s,]+url
652 return '/* insecure input */';
666 $char = UtfNormal\Utils::codepointToUtf8( hexdec(
$matches[2] ) );
672 if ( $char ==
"\n" || $char ==
'"' || $char ==
"'" || $char ==
'\\' ) {
675 return '\\' . dechex( ord( $char ) ) .
' ';
704 if ( trim( $text ) ==
'' ) {
724 $encValue = htmlspecialchars( $text, ENT_QUOTES );
729 $encValue = strtr( $encValue, [
750 # French spaces, last one Guillemet-left
751 # only if there is something before the space
752 # and a non-word character after the punctuation.
753 '/(?:(?<=\S)|^) (?=[?:;!%»›](?!\w))/u' =>
"$space",
754 # French spaces, Guillemet-right
755 '/([«‹]) /u' =>
"\\1$space",
757 return preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
769 # Templates and links may be expanded in later parsing,
770 # creating invalid or dangerous output. Suppress this.
771 $encValue = strtr( $encValue, [
779 "''" =>
'''',
780 'ISBN' =>
'ISBN',
782 'PMID' =>
'PMID',
788 $encValue = preg_replace_callback(
791 return str_replace(
':',
':',
$matches[1] );
816 if ( $mode === self::ID_PRIMARY ) {
817 throw new UnexpectedValueException(
'$wgFragmentMode is configured with no primary mode' );
843 throw new UnexpectedValueException(
'$wgFragmentMode is configured with no primary mode' );
881 if ( $mode ===
'html5' ) {
882 $id = preg_replace(
'/%([a-fA-F0-9]{2})/',
'%25$1', $id );
897 $id = mb_substr( $id, 0, 1024 );
905 $id = str_replace( [
"\t",
"\n",
"\f",
"\r",
" " ],
'_', $id );
914 $id = urlencode( str_replace(
' ',
'_', $id ) );
915 $id = strtr( $id, $replace );
918 throw new InvalidArgumentException(
"Invalid mode '$mode' passed to '" . __METHOD__ );
947 # Explode the space delimited list string into an array of tokens
948 $references = preg_split(
'/\s+/',
"{$referenceString}", -1, PREG_SPLIT_NO_EMPTY );
950 # Escape each token as an id
951 foreach ( $references as &$ref ) {
955 # Merge the array back to a space delimited list string
956 # If the array is empty, the result will be an empty string ('')
957 $referenceString = implode(
' ', $references );
959 return $referenceString;
975 return rtrim( preg_replace(
976 [
'/(^[0-9\\-])|[\\x00-\\x20!"#$%&\'()*+,.\\/:;<=>?@[\\]^`{|}~]|\\xC2\\xA0/',
'/_+/' ],
990 # It seems wise to escape ' as well as ", as a matter of course. Can't
991 # hurt. Use ENT_SUBSTITUTE so that incorrectly truncated multibyte characters
992 # don't cause the entire string to disappear.
993 $html = htmlspecialchars( $html, ENT_QUOTES | ENT_SUBSTITUTE );
1006 if ( trim( $text ) ==
'' ) {
1011 if ( !preg_match_all(
1012 self::getAttribsRegex(),
1015 PREG_SET_ORDER ) ) {
1020 foreach ( $pairs as $set ) {
1021 $attribute = strtolower( $set[1] );
1024 if ( !preg_match( self::getAttribNameRegex(), $attribute ) ) {
1031 $value = preg_replace(
'/[\t\r\n ]+/',
' ', $value );
1032 $value = trim( $value );
1049 foreach ( $assoc_array as $attribute => $value ) {
1050 $encAttribute = htmlspecialchars( $attribute );
1053 $attribs[] =
"$encAttribute=\"$encValue\"";
1055 return count( $attribs ) ?
' ' . implode(
' ', $attribs ) :
'';
1067 if ( isset( $set[5] ) ) {
1070 } elseif ( isset( $set[4] ) ) {
1073 } elseif ( isset( $set[3] ) ) {
1076 } elseif ( !isset( $set[2] ) ) {
1077 # In XHTML, attributes must have a value so return an empty string.
1078 # See "Empty attribute syntax",
1079 # https://www.w3.org/TR/html5/syntax.html#syntax-attribute-name
1082 throw new MWException(
"Tag conditions not met. This should never happen and is a bug." );
1091 return trim( preg_replace(
1092 '/(?:\r\n|[\x20\x0d\x0a\x09])+/',
1106 return trim( preg_replace(
'/[ _]+/',
' ', $section ) );
1125 return preg_replace_callback(
1126 self::CHAR_REFS_REGEX,
1127 [ self::class,
'normalizeCharReferencesCallback' ],
1144 if ( $ret ===
null ) {
1145 return htmlspecialchars(
$matches[0] );
1162 if ( isset( self::MW_ENTITY_ALIASES[$name] ) ) {
1164 return '&' . self::MW_ENTITY_ALIASES[$name];
1165 } elseif ( in_array( $name, [
'lt;',
'gt;',
'amp;',
'quot;' ],
true ) ) {
1168 } elseif ( isset( HTMLData::$namedEntityTranslations[$name] ) ) {
1170 return preg_replace_callback(
'/./Ssu',
static function ( $m ) {
1171 return '&#' . UtfNormal\Utils::utf8ToCodepoint( $m[0] ) .
';';
1172 }, HTMLData::$namedEntityTranslations[$name] );
1174 return "&$name";
1183 $point = intval( $codepoint );
1184 if ( self::validateCodepoint( $point ) ) {
1185 return sprintf(
'&#%d;', $point );
1196 $point = hexdec( $codepoint );
1197 if ( self::validateCodepoint( $point ) ) {
1198 return sprintf(
'&#x%x;', $point );
1211 # U+000C is valid in HTML5 but not allowed in XML.
1212 # U+000D is valid in XML but not allowed in HTML5.
1213 # U+007F - U+009F are disallowed in HTML5 (control characters).
1214 return $codepoint == 0x09
1215 || $codepoint == 0x0a
1216 || ( $codepoint >= 0x20 && $codepoint <= 0x7e )
1217 || ( $codepoint >= 0xa0 && $codepoint <= 0xd7ff )
1218 || ( $codepoint >= 0xe000 && $codepoint <= 0xfffd )
1219 || ( $codepoint >= 0x10000 && $codepoint <= 0x10ffff );
1230 return preg_replace_callback(
1231 self::CHAR_REFS_REGEX,
1232 [ self::class,
'decodeCharReferencesCallback' ],
1247 $text = preg_replace_callback(
1248 self::CHAR_REFS_REGEX,
1249 [ self::class,
'decodeCharReferencesCallback' ],
1256 return MediaWikiServices::getInstance()->getContentLanguage()->normalize( $text );
1274 # Last case should be an ampersand by itself
1286 if ( self::validateCodepoint( $codepoint ) ) {
1287 return UtfNormal\Utils::codepointToUtf8( $codepoint );
1289 return UtfNormal\Constants::UTF8_REPLACEMENT;
1303 if ( isset( self::MW_ENTITY_ALIASES[$name] ) ) {
1304 $name = self::MW_ENTITY_ALIASES[$name];
1306 $trans = HTMLData::$namedEntityTranslations[$name] ??
null;
1307 return $trans ??
"&$name";
1319 return $list[$element] ?? [];
1332 if ( $allowed !==
null ) {
1338 $merge =
static function ( $a, $b, $c = [] ) {
1339 return array_merge( $a, array_flip( $b ), array_flip( $c ) );
1341 $common = $merge( [], [
1361 # These attributes are specified in section 9 of
1369 # Microdata. These are specified by
1378 $block = $merge( $common, [
'align' ] );
1380 $tablealign = [
'align',
'valign' ];
1388 'nowrap', # deprecated
1389 'width', # deprecated
1390 'height', # deprecated
1391 'bgcolor', # deprecated
1394 # Numbers refer to sections in HTML 4.01 standard describing the element.
1395 # See: https://www.w3.org/TR/html4/
1399 'center' => $common, # deprecated
1418 'strong' => $common,
1429 'blockquote' => $merge( $common, [
'cite' ] ),
1430 'q' => $merge( $common, [
'cite' ] ),
1440 'br' => $merge( $common, [
'clear' ] ),
1442 # https://www.w3.org/TR/html5/text-level-semantics.html#the-wbr-element
1446 'pre' => $merge( $common, [
'width' ] ),
1449 'ins' => $merge( $common, [
'cite',
'datetime' ] ),
1450 'del' => $merge( $common, [
'cite',
'datetime' ] ),
1453 'ul' => $merge( $common, [
'type' ] ),
1454 'ol' => $merge( $common, [
'type',
'start',
'reversed' ] ),
1455 'li' => $merge( $common, [
'type',
'value' ] ),
1463 'table' => $merge( $common,
1464 [
'summary',
'width',
'border',
'frame',
1465 'rules',
'cellspacing',
'cellpadding',
1470 'caption' => $block,
1478 'colgroup' => $merge( $common, [
'span' ] ),
1479 'col' => $merge( $common, [
'span' ] ),
1482 'tr' => $merge( $common, [
'bgcolor' ], $tablealign ),
1485 'td' => $merge( $common, $tablecell, $tablealign ),
1486 'th' => $merge( $common, $tablecell, $tablealign ),
1489 # NOTE: <a> is not allowed directly, but this list of allowed
1490 # attributes is used from the Parser object
1491 'a' => $merge( $common, [
'href',
'rel',
'rev' ] ), # rel/rev esp.
for RDFa
1494 # Not usually allowed, but may be used for extension-style hooks
1495 # such as <math> when it is rasterized, or if $wgAllowImageTag is
1497 'img' => $merge( $common, [
'alt',
'src',
'width',
'height',
'srcset' ] ),
1498 # Attributes for A/V tags added in T163583 / T133673
1499 'audio' => $merge( $common, [
'controls',
'preload',
'width',
'height' ] ),
1500 'video' => $merge( $common, [
'poster',
'controls',
'preload',
'width',
'height' ] ),
1501 'source' => $merge( $common, [
'type',
'src' ] ),
1502 'track' => $merge( $common, [
'type',
'src',
'srclang',
'kind',
'label' ] ),
1510 'strike' => $common,
1515 'font' => $merge( $common, [
'size',
'color',
'face' ] ),
1519 'hr' => $merge( $common, [
'width' ] ),
1521 # HTML Ruby annotation text module, simple ruby only.
1522 # https://www.w3.org/TR/html5/text-level-semantics.html#the-ruby-element
1527 'rt' => $common, # $merge( $common, [
'rbspan' ] ),
1530 # MathML root element, where used for extensions
1531 # 'title' may not be 100% valid here; it's XHTML
1532 # https://www.w3.org/TR/REC-MathML/
1533 'math' => $merge( [], [
'class',
'style',
'id',
'title' ] ),
1536 'figure' => $common,
1537 'figcaption' => $common,
1539 # HTML 5 section 4.6
1542 # HTML5 elements, defined by:
1543 # https://html.spec.whatwg.org/multipage/semantics.html#the-data-element
1544 'data' => $merge( $common, [
'value' ] ),
1545 'time' => $merge( $common, [
'datetime' ] ),
1553 'meta' => $merge( [], [
'itemprop',
'content' ] ),
1554 'link' => $merge( [], [
'itemprop',
'href',
'title' ] ),
1574 $tokenizer =
new RemexHtml\Tokenizer\Tokenizer( $handler, $html, [
1575 'ignoreErrors' =>
true,
1577 'ignoreNulls' =>
true,
1578 'skipPreprocess' =>
true,
1580 $tokenizer->execute();
1581 $text = $handler->getResult();
1599 $out =
"<!DOCTYPE html [\n";
1600 foreach ( HTMLData::$namedEntityTranslations as $entity => $translation ) {
1601 if ( substr( $entity, -1 ) !==
';' ) {
1606 $name = substr( $entity, 0, -1 );
1608 if ( $entity === $expansion ) {
1612 $out .=
"<!ENTITY $name \"$expansion\">";
1623 # Normalize any HTML entities in input. They will be
1624 # re-escaped by makeExternalLink().
1627 # Escape any control characters introduced by the above step
1628 $url = preg_replace_callback(
'/[\][<>"\\x00-\\x20\\x7F\|]/',
1629 [ __CLASS__,
'cleanUrlCallback' ], $url );
1631 # Validate hostname portion
1633 if ( preg_match(
'!^([^:]+:)(//[^/]+)?(.*)$!iD', $url,
$matches ) ) {
1634 list( , $protocol, $host, $rest ) =
$matches;
1640 \\s| # general whitespace
1641 \xc2\xad| # 00ad SOFT HYPHEN
1642 \xe1\xa0\x86| # 1806 MONGOLIAN TODO SOFT HYPHEN
1643 \xe2\x80\x8b| # 200b ZERO WIDTH SPACE
1644 \xe2\x81\xa0| # 2060 WORD JOINER
1645 \xef\xbb\xbf| # feff ZERO WIDTH NO-BREAK SPACE
1646 \xcd\x8f| # 034f COMBINING GRAPHEME JOINER
1647 \xe1\xa0\x8b| # 180b MONGOLIAN FREE VARIATION SELECTOR ONE
1648 \xe1\xa0\x8c| # 180c MONGOLIAN FREE VARIATION SELECTOR TWO
1649 \xe1\xa0\x8d| # 180d MONGOLIAN FREE VARIATION SELECTOR THREE
1650 \xe2\x80\x8c| # 200c ZERO WIDTH NON-JOINER
1651 \xe2\x80\x8d| # 200d ZERO WIDTH JOINER
1652 [\xef\xb8\x80-\xef\xb8\x8f] # fe00-fe0f VARIATION SELECTOR-1-16
1655 $host = preg_replace( $strip,
'', $host );
1658 if ( substr_compare(
"//%5B", $host, 0, 5 ) === 0 &&
1659 preg_match(
'!^//%5B([0-9A-Fa-f:.]+)%5D((:\d+)?)$!', $host,
$matches )
1666 return $protocol . $host . $rest;
1711 if ( !
Hooks::runner()->onIsValidEmailAddr( $addr, $result ) ) {
1718 $rfc5322_atext =
"a-z0-9!#$%&'*+\\-\/=?^_`{|}~";
1719 $rfc1034_ldh_str =
"a-z0-9\\-";
1721 $html5_email_regexp =
"/
1723 [$rfc5322_atext\\.]+ # user part which is liberal :p
1725 [$rfc1034_ldh_str]+ # First domain part
1726 (\\.[$rfc1034_ldh_str]+)* # Following part prefixed with a dot
1730 return (
bool)preg_match( $html5_email_regexp, $addr );