65 $regex = self::makeRegex( $filterEntry, $protocol );
66 return preg_match( $regex, $text );
78 private static function makeRegex( $filterEntry, $protocol ) {
79 $regex =
'!' . preg_quote( $protocol,
'!' );
80 if ( substr( $filterEntry, 0, 2 ) ==
'*.' ) {
81 $regex .=
'(?:[A-Za-z0-9.-]+\.|)';
82 $filterEntry = substr( $filterEntry, 2 );
84 $regex .= preg_quote( $filterEntry,
'!' ) .
'!Si';
95 private static function indexifyHost( $host, $reverse =
true ) {
97 $host = rawurldecode( $host );
99 $tmp = idn_to_utf8( $host );
100 if ( $tmp !==
false ) {
104 $okChars =
'a-zA-Z0-9\\-._~!$&\'()*+,;=';
105 if ( StringUtils::isUtf8( $host ) ) {
107 $okChars .=
'\x80-\xf4';
109 $host = preg_replace_callback(
110 '<[^' . $okChars .
']+>',
111 static fn ( $m ) => rawurlencode( $m[0] ),
116 if ( preg_match(
'/^\[([0-9a-f:*]+)\]$/', rawurldecode( $host ), $m ) ) {
118 if ( IPUtils::isValid( $ip ) ) {
120 return '[' . IPUtils::sanitizeIP( $ip ) .
']';
122 return 'V6.' . implode(
'.', explode(
':', IPUtils::sanitizeIP( $ip ) ) ) .
'.';
124 if ( substr( $ip, -2 ) ===
':*' ) {
125 $cutIp = substr( $ip, 0, -2 );
126 if ( IPUtils::isValid(
"{$cutIp}::" ) ) {
128 $ct = count( explode(
':', $ip ) ) - 1;
130 return '[' . IPUtils::sanitizeIP(
"{$cutIp}::" ) .
']';
133 implode(
'.', array_slice( explode(
':', IPUtils::sanitizeIP(
"{$cutIp}::" ) ), 0, $ct ) ) .
136 if ( IPUtils::isValid(
"{$cutIp}:1" ) ) {
139 return '[' . IPUtils::sanitizeIP(
"{$cutIp}:1" ) .
']';
142 substr( implode(
'.', explode(
':', IPUtils::sanitizeIP(
"{$cutIp}:1" ) ) ), 0, -1 ) .
150 if ( substr( $host, -1 ) ===
'.' ) {
151 $host = substr( $host, 0, -1 );
155 $b =
'(?:0*25[0-5]|0*2[0-4][0-9]|0*1[0-9][0-9]|0*[0-9]?[0-9])';
156 if ( preg_match(
"/^(?:{$b}\.){3}{$b}$|^(?:{$b}\.){1,3}\*$/", $host ) ) {
160 return 'V4.' . implode(
'.', array_map(
static function ( $v ) {
161 return $v ===
'*' ? $v : (int)$v;
162 }, explode(
'.', $host ) ) ) .
'.';
167 return implode(
'.', array_reverse( explode(
'.', $host ) ) ) .
'.';
182 public static function makeIndexes( $url, $reverseDomain =
true ) {
196 if ( in_array( $bits[
'scheme'], [
'mailto',
'news' ] ) ) {
198 if ( array_key_exists(
'path', $bits ) ) {
199 $bits[
'host'] = $bits[
'path'];
206 if ( $bits[
'scheme'] ==
'mailto' ) {
207 $mailparts = explode(
'@', $bits[
'host'], 2 );
208 if ( count( $mailparts ) === 2 ) {
209 $domainpart = self::indexifyHost( $mailparts[1], $reverseDomain );
214 if ( $reverseDomain ) {
215 $bits[
'host'] = $domainpart .
'@' . $mailparts[0];
217 $bits[
'host'] = $mailparts[0] .
'@' . $domainpart;
220 $bits[
'host'] = self::indexifyHost( $bits[
'host'], $reverseDomain );
224 $index = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
226 if ( isset( $bits[
'port'] ) ) {
227 $index .=
':' . $bits[
'port'];
229 $index2 = $bits[
'path'] ??
'/';
230 if ( isset( $bits[
'query'] ) ) {
231 $index2 .=
'?' . $bits[
'query'];
233 if ( isset( $bits[
'fragment'] ) ) {
234 $index2 .=
'#' . $bits[
'fragment'];
237 if ( $bits[
'scheme'] ==
'' ) {
238 return [ [
"https:$index", $index2 ] ];
240 return [ [ $index, $index2 ] ];
252 foreach ( $urls as $url ) {
257 foreach ( $indexes as $index ) {
258 $newLinks[] = $index[0] . $index[1];
272 if ( $bits[
'scheme'] ==
'mailto' ) {
273 $mailparts = explode(
'@', $bits[
'path'], 2 );
274 if ( count( $mailparts ) === 2 ) {
275 $domainpart = rtrim( self::reverseDomain( $mailparts[0] ),
'.' );
280 $bits[
'host'] = $mailparts[1] .
'@' . $domainpart;
282 $bits[
'host'] = rtrim( self::reverseDomain( $bits[
'host'] ),
'.' );
285 $index = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
286 if ( isset( $bits[
'port'] ) && $bits[
'port'] ) {
287 $index .=
':' . $bits[
'port'];
292 private static function reverseDomain( $domain ) {
293 if ( substr( $domain, 0, 3 ) ===
'V6.' ) {
294 $ipv6 = str_replace(
'.',
':', trim( substr( $domain, 3 ),
'.' ) );
295 if ( IPUtils::isValid( $ipv6 ) ) {
296 return '[' . $ipv6 .
']';
298 } elseif ( substr( $domain, 0, 3 ) ===
'V4.' ) {
299 $ipv4 = trim( substr( $domain, 3 ),
'.' );
300 if ( IPUtils::isValid( $ipv4 ) ) {
304 return self::indexifyHost( $domain );
336 'protocol' => [
'http://',
'https://' ],
337 'oneWildcard' =>
false,
344 if ( is_string( $options[
'protocol'] ) ) {
345 $options[
'protocol'] = [ $options[
'protocol'] ];
346 } elseif ( $options[
'protocol'] ===
null ) {
347 $options[
'protocol'] = [
'http://',
'https://' ];
350 $domainConditions = [];
352 foreach ( $options[
'protocol'] as $protocol ) {
354 if ( $like ===
false ) {
357 [ $likeDomain, $likePath ] = $like;
359 if ( $trimmedlikeDomain[count( $trimmedlikeDomain ) - 1] instanceof
LikeMatch ) {
360 array_pop( $trimmedlikeDomain );
362 $index1 = implode(
'', $trimmedlikeDomain );
363 $thisDomainConditions = [];
364 if ( $options[
'oneWildcard'] && $likePath[0] !=
'/' ) {
365 $thisDomainConditions[] = $db->expr(
'el_to_domain_index',
'=', $index1 );
367 $thisDomainConditions[] = $db->expr(
368 'el_to_domain_index',
370 new LikeValue( $index1, $db->anyString() )
373 foreach ( $domainGaps[$index1] ?? [] as $from => $to ) {
374 $thisDomainConditions[] = $db->expr(
'el_id',
'<', $from )->or(
'el_id',
'>', $to );
379 if ( !$domainConditions ) {
384 if ( $trimmedlikePath[count( $trimmedlikePath ) - 1] instanceof
LikeMatch ) {
385 array_pop( $trimmedlikePath );
387 $index2 = implode(
'', $trimmedlikePath );
391 $db->expr(
'el_to_path', IExpression::LIKE,
new LikeValue( $index2, $db->anyString() ) ),
399 if ( $protocol && !in_array( $protocol, $urlProtocols ) ) {
400 foreach ( $urlProtocols as $p ) {
401 if ( str_starts_with( $p, $protocol ) ) {
417 foreach ( $urlProtocols as $p ) {
419 $protocols[] = substr( $p, 0, strpos( $p,
':' ) );
438 public static function makeLikeArray( $filterEntry, $protocol =
'http://' ) {
443 $target = $protocol . $filterEntry;
452 if ( in_array( $bits[
'scheme'], [
'mailto',
'news' ] ) ) {
453 $bits[
'host'] = $bits[
'path'];
458 if ( $bits[
'scheme'] ===
'mailto' && strpos( $bits[
'host'],
'@' ) ) {
460 $mailparts = explode(
'@', $bits[
'host'], 2 );
461 $domainpart = self::indexifyHost( $mailparts[1] );
462 if ( $mailparts[0] ===
'*' ) {
464 $bits[
'host'] = $domainpart .
'@';
466 $bits[
'host'] = $domainpart .
'@' . $mailparts[0];
470 $bits[
'host'] = self::indexifyHost( $bits[
'host'] );
471 if ( substr( $bits[
'host'], -3 ) ===
'.*.' ) {
473 $bits[
'host'] = substr( $bits[
'host'], 0, -2 );
477 $likeDomain[] = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
480 $likeDomain[] = $db->anyString();
483 if ( isset( $bits[
'port'] ) ) {
484 $likeDomain[] =
':' . $bits[
'port'];
486 if ( isset( $bits[
'path'] ) ) {
487 $likePath[] = $bits[
'path'];
491 if ( isset( $bits[
'query'] ) ) {
492 $likePath[] =
'?' . $bits[
'query'];
494 if ( isset( $bits[
'fragment'] ) ) {
495 $likePath[] =
'#' . $bits[
'fragment'];
497 $likePath[] = $db->anyString();
500 foreach ( array_merge( $likeDomain, $likePath ) as $likepart ) {
501 if ( !( $likepart instanceof
LikeMatch ) && strpos( $likepart,
'*' ) !==
false ) {
506 return [ $likeDomain, $likePath ];
518 if ( !is_array( $arr ) ) {
522 foreach ( $arr as $key => $value ) {
524 return array_slice( $arr, 0, $key + 1 );
Content object implementation for representing flat text.
Base interface for representing page content.