55 public static function matchEntry(
Content $content, $filterEntry, $protocol =
'http://' ) {
63 $text = $content->getText();
64 $regex = self::makeRegex( $filterEntry, $protocol );
65 return preg_match( $regex, $text );
77 private static function makeRegex( $filterEntry, $protocol ) {
78 $regex =
'!' . preg_quote( $protocol,
'!' );
79 if ( substr( $filterEntry, 0, 2 ) ==
'*.' ) {
80 $regex .=
'(?:[A-Za-z0-9.-]+\.|)';
81 $filterEntry = substr( $filterEntry, 2 );
83 $regex .= preg_quote( $filterEntry,
'!' ) .
'!Si';
94 private static function indexifyHost( $host, $reverse =
true ) {
96 $host = rawurldecode( $host );
98 $tmp = idn_to_utf8( $host );
99 if ( $tmp !==
false ) {
103 $okChars =
'a-zA-Z0-9\\-._~!$&\'()*+,;=';
104 if ( StringUtils::isUtf8( $host ) ) {
106 $okChars .=
'\x80-\xf4';
108 $host = preg_replace_callback(
109 '<[^' . $okChars .
']+>',
110 static fn ( $m ) => rawurlencode( $m[0] ),
115 if ( preg_match(
'/^\[([0-9a-f:*]+)\]$/', rawurldecode( $host ), $m ) ) {
117 if ( IPUtils::isValid( $ip ) ) {
119 return '[' . IPUtils::sanitizeIP( $ip ) .
']';
121 return 'V6.' . implode(
'.', explode(
':', IPUtils::sanitizeIP( $ip ) ) ) .
'.';
123 if ( substr( $ip, -2 ) ===
':*' ) {
124 $cutIp = substr( $ip, 0, -2 );
125 if ( IPUtils::isValid(
"{$cutIp}::" ) ) {
127 $ct = count( explode(
':', $ip ) ) - 1;
129 return '[' . IPUtils::sanitizeIP(
"{$cutIp}::" ) .
']';
132 implode(
'.', array_slice( explode(
':', IPUtils::sanitizeIP(
"{$cutIp}::" ) ), 0, $ct ) ) .
135 if ( IPUtils::isValid(
"{$cutIp}:1" ) ) {
138 return '[' . IPUtils::sanitizeIP(
"{$cutIp}:1" ) .
']';
141 substr( implode(
'.', explode(
':', IPUtils::sanitizeIP(
"{$cutIp}:1" ) ) ), 0, -1 ) .
149 if ( substr( $host, -1 ) ===
'.' ) {
150 $host = substr( $host, 0, -1 );
154 $b =
'(?:0*25[0-5]|0*2[0-4][0-9]|0*1[0-9][0-9]|0*[0-9]?[0-9])';
155 if ( preg_match(
"/^(?:{$b}\.){3}{$b}$|^(?:{$b}\.){1,3}\*$/", $host ) ) {
159 return 'V4.' . implode(
'.', array_map(
static function ( $v ) {
160 return $v ===
'*' ? $v : (int)$v;
161 }, explode(
'.', $host ) ) ) .
'.';
166 return implode(
'.', array_reverse( explode(
'.', $host ) ) ) .
'.';
181 public static function makeIndexes( $url, $reverseDomain =
true ) {
195 if ( in_array( $bits[
'scheme'], [
'mailto',
'news' ] ) ) {
197 if ( array_key_exists(
'path', $bits ) ) {
198 $bits[
'host'] = $bits[
'path'];
205 if ( $bits[
'scheme'] ==
'mailto' ) {
206 $mailparts = explode(
'@', $bits[
'host'], 2 );
207 if ( count( $mailparts ) === 2 ) {
208 $domainpart = self::indexifyHost( $mailparts[1], $reverseDomain );
213 if ( $reverseDomain ) {
214 $bits[
'host'] = $domainpart .
'@' . $mailparts[0];
216 $bits[
'host'] = $mailparts[0] .
'@' . $domainpart;
219 $bits[
'host'] = self::indexifyHost( $bits[
'host'], $reverseDomain );
223 $index = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
225 if ( isset( $bits[
'port'] ) ) {
226 $index .=
':' . $bits[
'port'];
228 $index2 = $bits[
'path'] ??
'/';
229 if ( isset( $bits[
'query'] ) ) {
230 $index2 .=
'?' . $bits[
'query'];
232 if ( isset( $bits[
'fragment'] ) ) {
233 $index2 .=
'#' . $bits[
'fragment'];
236 if ( $bits[
'scheme'] ==
'' ) {
237 return [ [
"https:$index", $index2 ] ];
239 return [ [ $index, $index2 ] ];
251 foreach ( $urls as $url ) {
256 foreach ( $indexes as $index ) {
257 $newLinks[] = $index[0] . $index[1];
271 if ( $bits[
'scheme'] ==
'mailto' ) {
272 $mailparts = explode(
'@', $bits[
'path'], 2 );
273 if ( count( $mailparts ) === 2 ) {
274 $domainpart = rtrim( self::reverseDomain( $mailparts[0] ),
'.' );
279 $bits[
'host'] = $mailparts[1] .
'@' . $domainpart;
281 $bits[
'host'] = rtrim( self::reverseDomain( $bits[
'host'] ),
'.' );
284 $index = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
285 if ( isset( $bits[
'port'] ) && $bits[
'port'] ) {
286 $index .=
':' . $bits[
'port'];
291 private static function reverseDomain( $domain ) {
292 if ( substr( $domain, 0, 3 ) ===
'V6.' ) {
293 $ipv6 = str_replace(
'.',
':', trim( substr( $domain, 3 ),
'.' ) );
294 if ( IPUtils::isValid( $ipv6 ) ) {
295 return '[' . $ipv6 .
']';
297 } elseif ( substr( $domain, 0, 3 ) ===
'V4.' ) {
298 $ipv4 = trim( substr( $domain, 3 ),
'.' );
299 if ( IPUtils::isValid( $ipv4 ) ) {
303 return self::indexifyHost( $domain );
335 'protocol' => [
'http://',
'https://' ],
336 'oneWildcard' =>
false,
343 if ( is_string( $options[
'protocol'] ) ) {
344 $options[
'protocol'] = [ $options[
'protocol'] ];
345 } elseif ( $options[
'protocol'] ===
null ) {
346 $options[
'protocol'] = [
'http://',
'https://' ];
349 $domainConditions = [];
351 foreach ( $options[
'protocol'] as $protocol ) {
353 if ( $like ===
false ) {
356 [ $likeDomain, $likePath ] = $like;
358 if ( $trimmedlikeDomain[count( $trimmedlikeDomain ) - 1] instanceof
LikeMatch ) {
359 array_pop( $trimmedlikeDomain );
361 $index1 = implode(
'', $trimmedlikeDomain );
362 if ( $options[
'oneWildcard'] && $likePath[0] !=
'/' ) {
363 $thisDomainExpr = $db->expr(
'el_to_domain_index',
'=', $index1 );
365 $thisDomainExpr = $db->expr(
366 'el_to_domain_index',
368 new LikeValue( $index1, $db->anyString() )
371 foreach ( $domainGaps[$index1] ?? [] as $from => $to ) {
372 $thisDomainExpr = $thisDomainExpr->andExpr( $db->expr(
'el_id',
'<', $from )->or(
'el_id',
'>', $to ) );
374 $domainConditions[] = $thisDomainExpr;
376 if ( !$domainConditions ) {
381 if ( $trimmedlikePath[count( $trimmedlikePath ) - 1] instanceof
LikeMatch ) {
382 array_pop( $trimmedlikePath );
384 $index2 = implode(
'', $trimmedlikePath );
388 $db->expr(
'el_to_path', IExpression::LIKE,
new LikeValue( $index2, $db->anyString() ) ),
396 if ( $protocol && !in_array( $protocol, $urlProtocols ) ) {
397 foreach ( $urlProtocols as $p ) {
398 if ( str_starts_with( $p, $protocol ) ) {
414 foreach ( $urlProtocols as $p ) {
416 $protocols[] = substr( $p, 0, strpos( $p,
':' ) );
435 public static function makeLikeArray( $filterEntry, $protocol =
'http://' ) {
437 $db = $services->getConnectionProvider()->getReplicaDatabase();
441 $target = $protocol . $filterEntry;
442 $bits = $services->getUrlUtils()->parse( $target );
450 if ( in_array( $bits[
'scheme'], [
'mailto',
'news' ] ) ) {
451 $bits[
'host'] = $bits[
'path'];
456 if ( $bits[
'scheme'] ===
'mailto' && strpos( $bits[
'host'],
'@' ) ) {
458 $mailparts = explode(
'@', $bits[
'host'], 2 );
459 $domainpart = self::indexifyHost( $mailparts[1] );
460 if ( $mailparts[0] ===
'*' ) {
462 $bits[
'host'] = $domainpart .
'@';
464 $bits[
'host'] = $domainpart .
'@' . $mailparts[0];
468 $bits[
'host'] = self::indexifyHost( $bits[
'host'] );
469 if ( substr( $bits[
'host'], -3 ) ===
'.*.' ) {
471 $bits[
'host'] = substr( $bits[
'host'], 0, -2 );
475 $likeDomain[] = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
478 $likeDomain[] = $db->anyString();
481 if ( isset( $bits[
'port'] ) ) {
482 $likeDomain[] =
':' . $bits[
'port'];
484 if ( isset( $bits[
'path'] ) ) {
485 $likePath[] = $bits[
'path'];
489 if ( isset( $bits[
'query'] ) ) {
490 $likePath[] =
'?' . $bits[
'query'];
492 if ( isset( $bits[
'fragment'] ) ) {
493 $likePath[] =
'#' . $bits[
'fragment'];
495 $likePath[] = $db->anyString();
498 foreach ( array_merge( $likeDomain, $likePath ) as $likepart ) {
499 if ( !( $likepart instanceof
LikeMatch ) && strpos( $likepart,
'*' ) !==
false ) {
504 return [ $likeDomain, $likePath ];
516 if ( !is_array( $arr ) ) {
520 foreach ( $arr as $key => $value ) {
522 return array_slice( $arr, 0, $key + 1 );
Content object implementation for representing flat text.
Base interface for representing page content.