54 public static function matchEntry(
Content $content, $filterEntry, $protocol =
'http://' ) {
62 $text = $content->getText();
63 $regex = self::makeRegex( $filterEntry, $protocol );
64 return preg_match( $regex, $text );
76 private static function makeRegex( $filterEntry, $protocol ) {
77 $regex =
'!' . preg_quote( $protocol,
'!' );
78 if ( substr( $filterEntry, 0, 2 ) ==
'*.' ) {
79 $regex .=
'(?:[A-Za-z0-9.-]+\.|)';
80 $filterEntry = substr( $filterEntry, 2 );
82 $regex .= preg_quote( $filterEntry,
'!' ) .
'!Si';
93 private static function indexifyHost( $host, $reverse =
true ) {
95 $host = rawurldecode( $host );
97 $tmp = idn_to_utf8( $host );
98 if ( $tmp !==
false ) {
102 $okChars =
'a-zA-Z0-9\\-._~!$&\'()*+,;=';
103 if ( StringUtils::isUtf8( $host ) ) {
105 $okChars .=
'\x80-\xf4';
107 $host = preg_replace_callback(
108 '<[^' . $okChars .
']+>',
109 static fn ( $m ) => rawurlencode( $m[0] ),
114 if ( preg_match(
'/^\[([0-9a-f:*]+)\]$/', rawurldecode( $host ), $m ) ) {
116 if ( IPUtils::isValid( $ip ) ) {
118 return '[' . IPUtils::sanitizeIP( $ip ) .
']';
120 return 'V6.' . implode(
'.', explode(
':', IPUtils::sanitizeIP( $ip ) ) ) .
'.';
122 if ( substr( $ip, -2 ) ===
':*' ) {
123 $cutIp = substr( $ip, 0, -2 );
124 if ( IPUtils::isValid(
"{$cutIp}::" ) ) {
126 $ct = count( explode(
':', $ip ) ) - 1;
128 return '[' . IPUtils::sanitizeIP(
"{$cutIp}::" ) .
']';
131 implode(
'.', array_slice( explode(
':', IPUtils::sanitizeIP(
"{$cutIp}::" ) ), 0, $ct ) ) .
134 if ( IPUtils::isValid(
"{$cutIp}:1" ) ) {
137 return '[' . IPUtils::sanitizeIP(
"{$cutIp}:1" ) .
']';
140 substr( implode(
'.', explode(
':', IPUtils::sanitizeIP(
"{$cutIp}:1" ) ) ), 0, -1 ) .
148 if ( substr( $host, -1 ) ===
'.' ) {
149 $host = substr( $host, 0, -1 );
153 $b =
'(?:0*25[0-5]|0*2[0-4][0-9]|0*1[0-9][0-9]|0*[0-9]?[0-9])';
154 if ( preg_match(
"/^(?:{$b}\.){3}{$b}$|^(?:{$b}\.){1,3}\*$/", $host ) ) {
158 return 'V4.' . implode(
'.', array_map(
static function ( $v ) {
159 return $v ===
'*' ? $v : (int)$v;
160 }, explode(
'.', $host ) ) ) .
'.';
165 return implode(
'.', array_reverse( explode(
'.', $host ) ) ) .
'.';
194 if ( in_array( $bits[
'scheme'], [
'mailto',
'news' ] ) ) {
196 if ( array_key_exists(
'path', $bits ) ) {
197 $bits[
'host'] = $bits[
'path'];
204 if ( $bits[
'scheme'] ==
'mailto' ) {
205 $mailparts = explode(
'@', $bits[
'host'], 2 );
206 if ( count( $mailparts ) === 2 ) {
207 $domainpart = self::indexifyHost( $mailparts[1], $reverseDomain );
212 if ( $reverseDomain ) {
213 $bits[
'host'] = $domainpart .
'@' . $mailparts[0];
215 $bits[
'host'] = $mailparts[0] .
'@' . $domainpart;
218 $bits[
'host'] = self::indexifyHost( $bits[
'host'], $reverseDomain );
222 $index = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
224 if ( isset( $bits[
'port'] ) ) {
225 $index .=
':' . $bits[
'port'];
227 $index2 = $bits[
'path'] ??
'/';
228 if ( isset( $bits[
'query'] ) ) {
229 $index2 .=
'?' . $bits[
'query'];
231 if ( isset( $bits[
'fragment'] ) ) {
232 $index2 .=
'#' . $bits[
'fragment'];
235 if ( $bits[
'scheme'] ==
'' ) {
236 return [ [
"https:$index", $index2 ] ];
238 return [ [ $index, $index2 ] ];
250 foreach ( $urls as
$url ) {
255 foreach ( $indexes as $index ) {
256 $newLinks[] = $index[0] . $index[1];
270 if ( $bits[
'scheme'] ==
'mailto' ) {
271 $mailparts = explode(
'@', $bits[
'path'], 2 );
272 if ( count( $mailparts ) === 2 ) {
273 $domainpart = rtrim( self::reverseDomain( $mailparts[0] ),
'.' );
278 $bits[
'host'] = $mailparts[1] .
'@' . $domainpart;
280 $bits[
'host'] = rtrim( self::reverseDomain( $bits[
'host'] ),
'.' );
283 $index = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
284 if ( isset( $bits[
'port'] ) && $bits[
'port'] ) {
285 $index .=
':' . $bits[
'port'];
290 private static function reverseDomain( $domain ) {
291 if ( substr( $domain, 0, 3 ) ===
'V6.' ) {
292 $ipv6 = str_replace(
'.',
':', trim( substr( $domain, 3 ),
'.' ) );
293 if ( IPUtils::isValid( $ipv6 ) ) {
294 return '[' . $ipv6 .
']';
296 } elseif ( substr( $domain, 0, 3 ) ===
'V4.' ) {
297 $ipv4 = trim( substr( $domain, 3 ),
'.' );
298 if ( IPUtils::isValid( $ipv4 ) ) {
302 return self::indexifyHost( $domain );
334 'protocol' => [
'http://',
'https://' ],
335 'oneWildcard' =>
false,
342 if ( is_string( $options[
'protocol'] ) ) {
343 $options[
'protocol'] = [ $options[
'protocol'] ];
344 } elseif ( $options[
'protocol'] ===
null ) {
345 $options[
'protocol'] = [
'http://',
'https://' ];
348 $domainConditions = [];
350 foreach ( $options[
'protocol'] as $protocol ) {
352 if ( $like ===
false ) {
355 [ $likeDomain, $likePath ] = $like;
357 if ( $trimmedlikeDomain[count( $trimmedlikeDomain ) - 1] instanceof
LikeMatch ) {
358 array_pop( $trimmedlikeDomain );
360 $index1 = implode(
'', $trimmedlikeDomain );
361 if ( $options[
'oneWildcard'] && $likePath[0] !=
'/' ) {
362 $thisDomainExpr = $db->expr(
'el_to_domain_index',
'=', $index1 );
364 $thisDomainExpr = $db->expr(
365 'el_to_domain_index',
367 new LikeValue( $index1, $db->anyString() )
370 foreach ( $domainGaps[$index1] ?? [] as $from => $to ) {
371 $thisDomainExpr = $thisDomainExpr->andExpr( $db->expr(
'el_id',
'<', $from )->or(
'el_id',
'>', $to ) );
373 $domainConditions[] = $thisDomainExpr;
375 if ( !$domainConditions ) {
380 if ( $trimmedlikePath[count( $trimmedlikePath ) - 1] instanceof
LikeMatch ) {
381 array_pop( $trimmedlikePath );
383 $index2 = implode(
'', $trimmedlikePath );
386 $db->orExpr( $domainConditions ),
387 $db->expr(
'el_to_path', IExpression::LIKE,
new LikeValue( $index2, $db->anyString() ) ),
395 if ( $protocol && !in_array( $protocol, $urlProtocols ) ) {
396 foreach ( $urlProtocols as $p ) {
397 if ( str_starts_with( $p, $protocol ) ) {
413 foreach ( $urlProtocols as $p ) {
415 $protocols[] = substr( $p, 0, strpos( $p,
':' ) );
434 public static function makeLikeArray( $filterEntry, $protocol =
'http://' ) {
436 $db = $services->getConnectionProvider()->getReplicaDatabase();
440 $target = $protocol . $filterEntry;
441 $bits = $services->getUrlUtils()->parse( $target );
449 if ( in_array( $bits[
'scheme'], [
'mailto',
'news' ] ) ) {
451 if ( array_key_exists(
'path', $bits ) ) {
452 $bits[
'host'] = $bits[
'path'];
458 if ( $bits[
'scheme'] ===
'mailto' && strpos( $bits[
'host'],
'@' ) ) {
460 $mailparts = explode(
'@', $bits[
'host'], 2 );
461 $domainpart = self::indexifyHost( $mailparts[1] );
462 if ( $mailparts[0] ===
'*' ) {
464 $bits[
'host'] = $domainpart .
'@';
466 $bits[
'host'] = $domainpart .
'@' . $mailparts[0];
470 $bits[
'host'] = self::indexifyHost( $bits[
'host'] );
471 if ( substr( $bits[
'host'], -3 ) ===
'.*.' ) {
473 $bits[
'host'] = substr( $bits[
'host'], 0, -2 );
477 $likeDomain[] = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
480 $likeDomain[] = $db->anyString();
483 if ( isset( $bits[
'port'] ) ) {
484 $likeDomain[] =
':' . $bits[
'port'];
486 if ( isset( $bits[
'path'] ) ) {
487 $likePath[] = $bits[
'path'];
491 if ( isset( $bits[
'query'] ) ) {
492 $likePath[] =
'?' . $bits[
'query'];
494 if ( isset( $bits[
'fragment'] ) ) {
495 $likePath[] =
'#' . $bits[
'fragment'];
497 $likePath[] = $db->anyString();
500 foreach ( array_merge( $likeDomain, $likePath ) as $likepart ) {
501 if ( !( $likepart instanceof
LikeMatch ) && strpos( $likepart,
'*' ) !==
false ) {
506 return [ $likeDomain, $likePath ];
518 if ( !is_array( $arr ) ) {
522 foreach ( $arr as $key => $value ) {
524 return array_slice( $arr, 0, $key + 1 );