30 use Wikimedia\IPUtils;
68 $regex = self::makeRegex( $filterEntry, $protocol );
69 return preg_match( $regex, $text );
82 private static function makeRegex( $filterEntry, $protocol ) {
83 $regex =
'!' . preg_quote( $protocol,
'!' );
84 if ( substr( $filterEntry, 0, 2 ) ==
'*.' ) {
85 $regex .=
'(?:[A-Za-z0-9.-]+\.|)';
86 $filterEntry = substr( $filterEntry, 2 );
88 $regex .= preg_quote( $filterEntry,
'!' ) .
'!Si';
98 private static function indexifyHost( $host, $reverse =
true ) {
102 $host = rawurldecode( $host );
103 if ( $host !==
'' ) {
104 $tmp = idn_to_utf8( $host );
105 if ( $tmp !==
false ) {
109 $okChars =
'a-zA-Z0-9\\-._~!$&\'()*+,;=';
112 $okChars .=
'\x80-\xf4';
114 $host = preg_replace_callback(
115 '<[^' . $okChars .
']>',
116 static function ( $m ) {
117 return rawurlencode( $m[0] );
123 if ( preg_match(
'/^\[([0-9a-f:*]+)\]$/', rawurldecode( $host ), $m ) ) {
125 if ( IPUtils::isValid( $ip ) ) {
127 return '[' . IPUtils::sanitizeIP( $ip ) .
']';
129 return 'V6.' . implode(
'.', explode(
':', IPUtils::sanitizeIP( $ip ) ) ) .
'.';
131 if ( substr( $ip, -2 ) ===
':*' ) {
132 $cutIp = substr( $ip, 0, -2 );
133 if ( IPUtils::isValid(
"{$cutIp}::" ) ) {
135 $ct = count( explode(
':', $ip ) ) - 1;
137 return '[' . IPUtils::sanitizeIP(
"{$cutIp}::" ) .
']';
140 implode(
'.', array_slice( explode(
':', IPUtils::sanitizeIP(
"{$cutIp}::" ) ), 0, $ct ) ) .
143 if ( IPUtils::isValid(
"{$cutIp}:1" ) ) {
146 return '[' . IPUtils::sanitizeIP(
"{$cutIp}:1" ) .
']';
149 substr( implode(
'.', explode(
':', IPUtils::sanitizeIP(
"{$cutIp}:1" ) ) ), 0, -1 ) .
157 if ( substr( $host, -1 ) ===
'.' ) {
158 $host = substr( $host, 0, -1 );
162 $b =
'(?:0*25[0-5]|0*2[0-4][0-9]|0*1[0-9][0-9]|0*[0-9]?[0-9])';
163 if ( preg_match(
"/^(?:{$b}\.){3}{$b}$|^(?:{$b}\.){1,3}\*$/", $host ) ) {
167 return 'V4.' . implode(
'.', array_map(
static function ( $v ) {
168 return $v ===
'*' ? $v : (int)$v;
169 }, explode(
'.', $host ) ) ) .
'.';
174 return implode(
'.', array_reverse( explode(
'.', $host ) ) ) .
'.';
188 public static function makeIndexes( $url, $reverseDomain =
true ) {
203 if ( $bits[
'scheme'] ==
'mailto' ) {
204 $mailparts = explode(
'@', $bits[
'host'], 2 );
205 if ( count( $mailparts ) === 2 ) {
206 $domainpart = self::indexifyHost( $mailparts[1], $reverseDomain );
211 if ( $reverseDomain ) {
212 $bits[
'host'] = $domainpart .
'@' . $mailparts[0];
214 $bits[
'host'] = $mailparts[0] .
'@' . $domainpart;
217 $bits[
'host'] = self::indexifyHost( $bits[
'host'], $reverseDomain );
221 $index = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
223 if ( isset( $bits[
'port'] ) ) {
224 $index .=
':' . $bits[
'port'];
226 $index2 = $bits[
'path'] ??
'/';
227 if ( isset( $bits[
'query'] ) ) {
228 $index2 .=
'?' . $bits[
'query'];
230 if ( isset( $bits[
'fragment'] ) ) {
231 $index2 .=
'#' . $bits[
'fragment'];
234 if ( $bits[
'scheme'] ==
'' ) {
235 return [ [
"https:$index", $index2 ] ];
237 return [ [ $index, $index2 ] ];
255 foreach ( $urls as $url ) {
260 foreach ( $indexes as $index ) {
261 $newLinks[] = $index[0] . $index[1];
275 if ( $bits[
'scheme'] ==
'mailto' ) {
276 $mailparts = explode(
'@', $bits[
'host'], 2 );
277 if ( count( $mailparts ) === 2 ) {
278 $domainpart = rtrim( self::reverseDomain( $mailparts[0] ),
'.' );
283 $bits[
'host'] = $mailparts[1] .
'@' . $domainpart;
285 $bits[
'host'] = rtrim( self::reverseDomain( $bits[
'host'] ),
'.' );
288 return $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
291 private static function reverseDomain( $domain ) {
292 if ( substr( $domain, 0, 3 ) ===
'V6.' ) {
293 $ipv6 = str_replace(
'.',
':', trim( substr( $domain, 3 ),
'.' ) );
294 if ( IPUtils::isValid( $ipv6 ) ) {
295 return '[' . $ipv6 .
']';
297 } elseif ( substr( $domain, 0, 3 ) ===
'V4.' ) {
298 $ipv4 = trim( substr( $domain, 3 ),
'.' );
299 if ( IPUtils::isValid( $ipv4 ) ) {
303 return self::indexifyHost( $domain );
339 return self::getQueryConditionsOld( $filterEntry, $options );
342 'protocol' =>
'http://',
343 'oneWildcard' =>
false,
349 if ( $like ===
false ) {
352 [ $likeDomain, $likePath ] = $like;
357 if ( $trimmedlikeDomain[count( $trimmedlikeDomain ) - 1] instanceof
LikeMatch ) {
358 array_pop( $trimmedlikeDomain );
360 if ( $trimmedlikePath[count( $trimmedlikePath ) - 1] instanceof
LikeMatch ) {
361 array_pop( $trimmedlikePath );
364 $index1 = implode(
'', $trimmedlikeDomain );
365 $index2 = implode(
'', $trimmedlikePath );
368 "el_to_domain_index" . $db->buildLike( $index1, $db->anyString() ),
369 "el_to_path" . $db->buildLike( $index2, $db->anyString() ),
373 private static function getQueryConditionsOld( $filterEntry, array $options = [] ) {
375 'protocol' =>
'http://',
376 'oneWildcard' =>
false,
382 if ( $like ===
false ) {
386 $like = array_merge( $like[0], $like[1] );
390 $like[count( $like ) - 1] instanceof LikeMatch &&
391 $like[count( $like ) - 3] instanceof LikeMatch &&
392 $like[count( $like ) - 2] ==
'/'
401 if ( $options[
'oneWildcard'] ) {
402 $like = $trimmedLike;
404 if ( $trimmedLike[count( $trimmedLike ) - 1] instanceof LikeMatch ) {
405 array_pop( $trimmedLike );
407 $index = implode(
'', $trimmedLike );
411 $l = strlen( $index );
416 "el_index_60" => substr( $index, 0, 60 ),
417 "el_index" . $db->buildLike( $like ),
424 "el_index_60" . $db->buildLike( $index, $db->anyString() ),
425 "el_index" . $db->buildLike( $like ),
433 if ( $protocol && !in_array( $protocol, $urlProtocols ) ) {
434 foreach ( $urlProtocols as $p ) {
435 if ( str_starts_with( $p, $protocol ) ) {
451 foreach ( $urlProtocols as $p ) {
453 $protocols[] = substr( $p, 0, strpos( $p,
':' ) );
472 public static function makeLikeArray( $filterEntry, $protocol =
'http://' ) {
477 $target = $protocol . $filterEntry;
484 if ( $bits[
'scheme'] ===
'mailto' && strpos( $bits[
'host'],
'@' ) ) {
486 $mailparts = explode(
'@', $bits[
'host'], 2 );
487 $domainpart = self::indexifyHost( $mailparts[1] );
488 if ( $mailparts[0] ===
'*' ) {
490 $bits[
'host'] = $domainpart .
'@';
492 $bits[
'host'] = $domainpart .
'@' . $mailparts[0];
496 $bits[
'host'] = self::indexifyHost( $bits[
'host'] );
497 if ( substr( $bits[
'host'], -3 ) ===
'.*.' ) {
499 $bits[
'host'] = substr( $bits[
'host'], 0, -2 );
503 $likeDomain[] = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
506 $likeDomain[] = $db->anyString();
509 if ( isset( $bits[
'port'] ) ) {
510 $likeDomain[] =
':' . $bits[
'port'];
512 if ( isset( $bits[
'path'] ) ) {
513 $likePath[] = $bits[
'path'];
517 if ( isset( $bits[
'query'] ) ) {
518 $likePath[] =
'?' . $bits[
'query'];
520 if ( isset( $bits[
'fragment'] ) ) {
521 $likePath[] =
'#' . $bits[
'fragment'];
523 $likePath[] = $db->anyString();
526 foreach ( array_merge( $likeDomain, $likePath ) as $likepart ) {
527 if ( !( $likepart instanceof
LikeMatch ) && strpos( $likepart,
'*' ) !==
false ) {
532 return [ $likeDomain, $likePath ];
544 if ( !is_array( $arr ) ) {
548 foreach ( $arr as $key => $value ) {
550 return array_slice( $arr, 0, $key + 1 );
558 class_alias( LinkFilter::class,
'LinkFilter' );
const SCHEMA_COMPAT_READ_OLD
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
A class containing constants representing the names of configuration variables.
const UrlProtocols
Name constant for the UrlProtocols setting, for use with Config::get()
const ExternalLinksSchemaMigrationStage
Name constant for the ExternalLinksSchemaMigrationStage setting, for use with Config::get()
A collection of static methods to play with strings.
static isUtf8( $value)
Test whether a string is valid UTF-8.
Content object implementation for representing flat text.
Base interface for representing page content.