40 public static function matchEntry(
Content $content, $filterEntry, $protocol =
'http://' ) {
48 $text = $content->getText();
49 $regex = self::makeRegex( $filterEntry, $protocol );
50 return preg_match( $regex, $text );
62 private static function makeRegex( $filterEntry, $protocol ) {
63 $regex =
'!' . preg_quote( $protocol,
'!' );
64 if ( str_starts_with( $filterEntry,
'*.' ) ) {
65 $regex .=
'(?:[A-Za-z0-9.-]+\.|)';
66 $filterEntry = substr( $filterEntry, 2 );
68 $regex .= preg_quote( $filterEntry,
'!' ) .
'!Si';
79 private static function indexifyHost( $host, $reverse =
true ) {
81 $host = rawurldecode( $host );
83 $tmp = idn_to_utf8( $host );
84 if ( $tmp !==
false ) {
88 $okChars =
'a-zA-Z0-9\\-._~!$&\'()*+,;=';
89 if ( StringUtils::isUtf8( $host ) ) {
91 $okChars .=
'\x80-\xf4';
93 $host = preg_replace_callback(
94 '<[^' . $okChars .
']+>',
95 static fn ( $m ) => rawurlencode( $m[0] ),
100 if ( preg_match(
'/^\[([0-9a-f:*]+)\]$/', rawurldecode( $host ), $m ) ) {
102 if ( IPUtils::isValid( $ip ) ) {
104 return '[' . IPUtils::sanitizeIP( $ip ) .
']';
106 return 'V6.' . implode(
'.', explode(
':', IPUtils::sanitizeIP( $ip ) ) ) .
'.';
108 if ( substr( $ip, -2 ) ===
':*' ) {
109 $cutIp = substr( $ip, 0, -2 );
110 if ( IPUtils::isValid(
"{$cutIp}::" ) ) {
112 $ct = count( explode(
':', $ip ) ) - 1;
114 return '[' . IPUtils::sanitizeIP(
"{$cutIp}::" ) .
']';
117 implode(
'.', array_slice( explode(
':', IPUtils::sanitizeIP(
"{$cutIp}::" ) ), 0, $ct ) ) .
120 if ( IPUtils::isValid(
"{$cutIp}:1" ) ) {
123 return '[' . IPUtils::sanitizeIP(
"{$cutIp}:1" ) .
']';
126 substr( implode(
'.', explode(
':', IPUtils::sanitizeIP(
"{$cutIp}:1" ) ) ), 0, -1 ) .
134 if ( substr( $host, -1 ) ===
'.' ) {
135 $host = substr( $host, 0, -1 );
139 $b =
'(?:0*25[0-5]|0*2[0-4][0-9]|0*1[0-9][0-9]|0*[0-9]?[0-9])';
140 if ( preg_match(
"/^(?:{$b}\.){3}{$b}$|^(?:{$b}\.){1,3}\*$/", $host ) ) {
144 return 'V4.' . implode(
'.', array_map(
static function ( $v ) {
145 return $v ===
'*' ? $v : (int)$v;
146 }, explode(
'.', $host ) ) ) .
'.';
151 return implode(
'.', array_reverse( explode(
'.', $host ) ) ) .
'.';
180 if ( in_array( $bits[
'scheme'], [
'mailto',
'news' ] ) ) {
182 if ( array_key_exists(
'path', $bits ) ) {
183 $bits[
'host'] = $bits[
'path'];
190 if ( $bits[
'scheme'] ==
'mailto' ) {
191 $mailparts = explode(
'@', $bits[
'host'], 2 );
192 if ( count( $mailparts ) === 2 ) {
193 $domainpart = self::indexifyHost( $mailparts[1], $reverseDomain );
198 if ( $reverseDomain ) {
199 $bits[
'host'] = $domainpart .
'@' . $mailparts[0];
201 $bits[
'host'] = $mailparts[0] .
'@' . $domainpart;
204 $bits[
'host'] = self::indexifyHost( $bits[
'host'], $reverseDomain );
208 $index = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
210 if ( isset( $bits[
'port'] ) ) {
211 $index .=
':' . $bits[
'port'];
213 $index2 = $bits[
'path'] ??
'/';
214 if ( isset( $bits[
'query'] ) ) {
215 $index2 .=
'?' . $bits[
'query'];
217 if ( isset( $bits[
'fragment'] ) ) {
218 $index2 .=
'#' . $bits[
'fragment'];
221 if ( $bits[
'scheme'] ==
'' ) {
222 return [ [
"https:$index", $index2 ] ];
224 return [ [ $index, $index2 ] ];
236 foreach ( $urls as
$url ) {
241 foreach ( $indexes as $index ) {
242 $newLinks[] = $index[0] . $index[1];
256 if ( $bits[
'scheme'] ==
'mailto' ) {
257 $mailparts = explode(
'@', $bits[
'path'], 2 );
258 if ( count( $mailparts ) === 2 ) {
259 $domainpart = rtrim( self::reverseDomain( $mailparts[0] ),
'.' );
260 $bits[
'host'] = $mailparts[1] .
'@' . $domainpart;
263 $bits[
'host'] = $mailparts[0];
266 $bits[
'host'] = rtrim( self::reverseDomain( $bits[
'host'] ),
'.' );
269 $index = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
270 if ( isset( $bits[
'port'] ) && $bits[
'port'] ) {
271 $index .=
':' . $bits[
'port'];
276 private static function reverseDomain(
string $domain ): string {
277 if ( str_starts_with( $domain,
'V6.' ) ) {
278 $ipv6 = str_replace(
'.',
':', trim( substr( $domain, 3 ),
'.' ) );
279 if ( IPUtils::isValid( $ipv6 ) ) {
280 return '[' . $ipv6 .
']';
282 } elseif ( str_starts_with( $domain,
'V4.' ) ) {
283 $ipv4 = trim( substr( $domain, 3 ),
'.' );
284 if ( IPUtils::isValid( $ipv4 ) ) {
288 return self::indexifyHost( $domain );
320 'protocol' => [
'http://',
'https://' ],
321 'oneWildcard' =>
false,
324 $domainGaps = MediaWikiServices::getInstance()->getMainConfig()->get(
325 MainConfigNames::ExternalLinksDomainGaps
328 if ( is_string( $options[
'protocol'] ) ) {
329 $options[
'protocol'] = [ $options[
'protocol'] ];
330 } elseif ( $options[
'protocol'] ===
null ) {
331 $options[
'protocol'] = [
'http://',
'https://' ];
334 $domainConditions = [];
335 $db = $options[
'db'] ?: MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase();
336 foreach ( $options[
'protocol'] as $protocol ) {
337 $like = self::makeLikeArray( $filterEntry, $protocol );
338 if ( $like ===
false ) {
341 [ $likeDomain, $likePath ] = $like;
342 $trimmedlikeDomain = self::keepOneWildcard( $likeDomain );
343 if ( $trimmedlikeDomain[count( $trimmedlikeDomain ) - 1] instanceof
LikeMatch ) {
344 array_pop( $trimmedlikeDomain );
346 $index1 = implode(
'', $trimmedlikeDomain );
347 if ( $options[
'oneWildcard'] && $likePath[0] !=
'/' ) {
348 $thisDomainExpr = $db->expr(
'el_to_domain_index',
'=', $index1 );
350 $thisDomainExpr = $db->expr(
351 'el_to_domain_index',
353 new LikeValue( $index1, $db->anyString() )
356 foreach ( $domainGaps[$index1] ?? [] as $from => $to ) {
357 $thisDomainExpr = $thisDomainExpr->andExpr( $db->expr(
'el_id',
'<', $from )->or(
'el_id',
'>', $to ) );
359 $domainConditions[] = $thisDomainExpr;
361 if ( !$domainConditions ) {
365 $trimmedlikePath = self::keepOneWildcard( $likePath );
366 if ( $trimmedlikePath[count( $trimmedlikePath ) - 1] instanceof
LikeMatch ) {
367 array_pop( $trimmedlikePath );
369 $index2 = implode(
'', $trimmedlikePath );
372 $db->orExpr( $domainConditions ),
373 $db->expr(
'el_to_path', IExpression::LIKE,
new LikeValue( $index2, $db->anyString() ) ),
381 if ( $protocol && !in_array( $protocol, $urlProtocols ) ) {
382 foreach ( $urlProtocols as $p ) {
383 if ( str_starts_with( $p, $protocol ) ) {
399 foreach ( $urlProtocols as $p ) {
401 $protocols[] = substr( $p, 0, strpos( $p,
':' ) );
420 public static function makeLikeArray( $filterEntry, $protocol =
'http://' ) {
421 $services = MediaWikiServices::getInstance();
422 $db = $services->getConnectionProvider()->getReplicaDatabase();
426 $target = $protocol . $filterEntry;
427 $bits = $services->getUrlUtils()->parse( $target );
435 if ( in_array( $bits[
'scheme'], [
'mailto',
'news' ] ) ) {
437 if ( array_key_exists(
'path', $bits ) ) {
438 $bits[
'host'] = $bits[
'path'];
444 if ( $bits[
'scheme'] ===
'mailto' && strpos( $bits[
'host'],
'@' ) ) {
446 $mailparts = explode(
'@', $bits[
'host'], 2 );
447 $domainpart = self::indexifyHost( $mailparts[1] );
448 if ( $mailparts[0] ===
'*' ) {
450 $bits[
'host'] = $domainpart .
'@';
452 $bits[
'host'] = $domainpart .
'@' . $mailparts[0];
456 $bits[
'host'] = self::indexifyHost( $bits[
'host'] );
457 if ( substr( $bits[
'host'], -3 ) ===
'.*.' ) {
459 $bits[
'host'] = substr( $bits[
'host'], 0, -2 );
463 $likeDomain[] = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
466 $likeDomain[] = $db->anyString();
469 if ( isset( $bits[
'port'] ) ) {
470 $likeDomain[] =
':' . $bits[
'port'];
472 if ( isset( $bits[
'path'] ) ) {
473 $likePath[] = $bits[
'path'];
477 if ( isset( $bits[
'query'] ) ) {
478 $likePath[] =
'?' . $bits[
'query'];
480 if ( isset( $bits[
'fragment'] ) ) {
481 $likePath[] =
'#' . $bits[
'fragment'];
483 $likePath[] = $db->anyString();
486 foreach ( array_merge( $likeDomain, $likePath ) as $likepart ) {
487 if ( !( $likepart instanceof
LikeMatch ) && str_contains( $likepart,
'*' ) ) {
492 return [ $likeDomain, $likePath ];
504 if ( !is_array( $arr ) ) {
508 foreach ( $arr as $key => $value ) {
510 return array_slice( $arr, 0, $key + 1 );