68 $regex = self::makeRegex( $filterEntry, $protocol );
69 return preg_match( $regex, $text );
82 private static function makeRegex( $filterEntry, $protocol ) {
83 $regex =
'!' . preg_quote( $protocol,
'!' );
84 if ( substr( $filterEntry, 0, 2 ) ==
'*.' ) {
85 $regex .=
'(?:[A-Za-z0-9.-]+\.|)';
86 $filterEntry = substr( $filterEntry, 2 );
88 $regex .= preg_quote( $filterEntry,
'!' ) .
'!Si';
97 private static function indexifyHost( $host ) {
101 $host = rawurldecode( $host );
102 if ( $host !==
'' ) {
103 $tmp = idn_to_utf8( $host );
104 if ( $tmp !==
false ) {
108 $okChars =
'a-zA-Z0-9\\-._~!$&\'()*+,;=';
109 if ( StringUtils::isUtf8( $host ) ) {
111 $okChars .=
'\x80-\xf4';
113 $host = preg_replace_callback(
114 '<[^' . $okChars .
']>',
115 static function ( $m ) {
116 return rawurlencode( $m[0] );
122 if ( preg_match(
'/^\[([0-9a-f:*]+)\]$/', rawurldecode( $host ), $m ) ) {
124 if ( IPUtils::isValid( $ip ) ) {
125 return 'V6.' . implode(
'.', explode(
':', IPUtils::sanitizeIP( $ip ) ) ) .
'.';
127 if ( substr( $ip, -2 ) ===
':*' ) {
128 $cutIp = substr( $ip, 0, -2 );
129 if ( IPUtils::isValid(
"{$cutIp}::" ) ) {
131 $ct = count( explode(
':', $ip ) ) - 1;
133 implode(
'.', array_slice( explode(
':', IPUtils::sanitizeIP(
"{$cutIp}::" ) ), 0, $ct ) ) .
136 if ( IPUtils::isValid(
"{$cutIp}:1" ) ) {
139 substr( implode(
'.', explode(
':', IPUtils::sanitizeIP(
"{$cutIp}:1" ) ) ), 0, -1 ) .
147 if ( substr( $host, -1 ) ===
'.' ) {
148 $host = substr( $host, 0, -1 );
152 $b =
'(?:0*25[0-5]|0*2[0-4][0-9]|0*1[0-9][0-9]|0*[0-9]?[0-9])';
153 if ( preg_match(
"/^(?:{$b}\.){3}{$b}$|^(?:{$b}\.){1,3}\*$/", $host ) ) {
154 return 'V4.' . implode(
'.', array_map(
static function ( $v ) {
155 return $v ===
'*' ? $v : (int)$v;
156 }, explode(
'.', $host ) ) ) .
'.';
160 return implode(
'.', array_reverse( explode(
'.', $host ) ) ) .
'.';
186 if ( $bits[
'scheme'] ==
'mailto' ) {
187 $mailparts = explode(
'@', $bits[
'host'], 2 );
188 if ( count( $mailparts ) === 2 ) {
189 $domainpart = self::indexifyHost( $mailparts[1] );
194 $bits[
'host'] = $domainpart .
'@' . $mailparts[0];
196 $bits[
'host'] = self::indexifyHost( $bits[
'host'] );
200 $index = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
202 if ( isset( $bits[
'port'] ) ) {
203 $index .=
':' . $bits[
'port'];
205 $index2 = $bits[
'path'] ??
'/';
206 if ( isset( $bits[
'query'] ) ) {
207 $index2 .=
'?' . $bits[
'query'];
209 if ( isset( $bits[
'fragment'] ) ) {
210 $index2 .=
'#' . $bits[
'fragment'];
213 if ( $bits[
'scheme'] ==
'' ) {
214 return [ [
"http:$index", $index2 ], [
"https:$index", $index2 ] ];
216 return [ [ $index, $index2 ] ];
250 'protocol' =>
'http://',
251 'oneWildcard' =>
false,
257 if ( $like ===
false ) {
263 if ( $options[
'oneWildcard'] ) {
264 $like = $trimmedLike;
266 if ( $trimmedLike[count( $trimmedLike ) - 1] instanceof
LikeMatch ) {
267 array_pop( $trimmedLike );
269 $index = implode(
'', $trimmedLike );
273 $l = strlen( $index );
278 "el_index_60" => substr( $index, 0, 60 ),
279 "el_index" . $db->buildLike( $like ),
286 "el_index_60" . $db->buildLike( $index, $db->anyString() ),
287 "el_index" . $db->buildLike( $like ),
295 if ( $protocol && !in_array( $protocol, $urlProtocols ) ) {
296 foreach ( $urlProtocols as $p ) {
297 if ( str_starts_with( $p, $protocol ) ) {
313 foreach ( $urlProtocols as $p ) {
315 $protocols[] = substr( $p, 0, strpos( $p,
':' ) );
334 public static function makeLikeArray( $filterEntry, $protocol =
'http://' ) {
338 $target = $protocol . $filterEntry;
345 if ( $bits[
'scheme'] ===
'mailto' && strpos( $bits[
'host'],
'@' ) ) {
347 $mailparts = explode(
'@', $bits[
'host'], 2 );
348 $domainpart = self::indexifyHost( $mailparts[1] );
349 if ( $mailparts[0] ===
'*' ) {
351 $bits[
'host'] = $domainpart .
'@';
353 $bits[
'host'] = $domainpart .
'@' . $mailparts[0];
357 $bits[
'host'] = self::indexifyHost( $bits[
'host'] );
358 if ( substr( $bits[
'host'], -3 ) ===
'.*.' ) {
360 $bits[
'host'] = substr( $bits[
'host'], 0, -2 );
364 $like[] = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
367 $like[] = $db->anyString();
370 if ( isset( $bits[
'port'] ) ) {
371 $like[] =
':' . $bits[
'port'];
373 if ( isset( $bits[
'path'] ) ) {
374 $like[] = $bits[
'path'];
375 } elseif ( !$subdomains ) {
378 if ( isset( $bits[
'query'] ) ) {
379 $like[] =
'?' . $bits[
'query'];
381 if ( isset( $bits[
'fragment'] ) ) {
382 $like[] =
'#' . $bits[
'fragment'];
386 foreach ( $like as $likepart ) {
387 if ( !( $likepart instanceof
LikeMatch ) && strpos( $likepart,
'*' ) !==
false ) {
392 if ( !( $like[count( $like ) - 1] instanceof
LikeMatch ) ) {
394 $like[] = $db->anyString();
409 if ( !is_array( $arr ) ) {
413 foreach ( $arr as $key => $value ) {
415 return array_slice( $arr, 0, $key + 1 );
423class_alias( LinkFilter::class,
'LinkFilter' );
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
A class containing constants representing the names of configuration variables.
const UrlProtocols
Name constant for the UrlProtocols setting, for use with Config::get()
A collection of static methods to play with strings.
Content object implementation for representing flat text.
Base interface for representing page content.