61 $regex = self::makeRegex( $filterEntry, $protocol );
62 return preg_match( $regex, $text );
75 private static function makeRegex( $filterEntry, $protocol ) {
76 $regex =
'!' . preg_quote( $protocol,
'!' );
77 if ( substr( $filterEntry, 0, 2 ) ==
'*.' ) {
78 $regex .=
'(?:[A-Za-z0-9.-]+\.|)';
79 $filterEntry = substr( $filterEntry, 2 );
81 $regex .= preg_quote( $filterEntry,
'!' ) .
'!Si';
91 return is_callable(
'idn_to_utf8' ) && defined(
'INTL_IDNA_VARIANT_UTS46' );
99 private static function indexifyHost( $host ) {
103 $host = rawurldecode( $host );
104 if ( $host !==
'' && self::supportsIDN() ) {
106 $tmp = idn_to_utf8( $host, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46 );
107 if ( $tmp !==
false ) {
111 $okChars =
'a-zA-Z0-9\\-._~!$&\'()*+,;=';
114 $okChars .=
'\x80-\xf4';
116 $host = preg_replace_callback(
117 '<[^' . $okChars .
']>',
118 static function ( $m ) {
119 return rawurlencode( $m[0] );
125 if ( preg_match(
'/^\[([0-9a-f:*]+)\]$/', rawurldecode( $host ), $m ) ) {
127 if ( IPUtils::isValid( $ip ) ) {
128 return 'V6.' . implode(
'.', explode(
':', IPUtils::sanitizeIP( $ip ) ) ) .
'.';
130 if ( substr( $ip, -2 ) ===
':*' ) {
131 $cutIp = substr( $ip, 0, -2 );
132 if ( IPUtils::isValid(
"{$cutIp}::" ) ) {
134 $ct = count( explode(
':', $ip ) ) - 1;
136 implode(
'.', array_slice( explode(
':', IPUtils::sanitizeIP(
"{$cutIp}::" ) ), 0, $ct ) ) .
139 if ( IPUtils::isValid(
"{$cutIp}:1" ) ) {
142 substr( implode(
'.', explode(
':', IPUtils::sanitizeIP(
"{$cutIp}:1" ) ) ), 0, -1 ) .
150 if ( substr( $host, -1 ) ===
'.' ) {
151 $host = substr( $host, 0, -1 );
155 $b =
'(?:0*25[0-5]|0*2[0-4][0-9]|0*1[0-9][0-9]|0*[0-9]?[0-9])';
156 if ( preg_match(
"/^(?:{$b}\.){3}{$b}$|^(?:{$b}\.){1,3}\*$/", $host ) ) {
157 return 'V4.' . implode(
'.', array_map(
static function ( $v ) {
158 return $v ===
'*' ? $v : (int)$v;
159 }, explode(
'.', $host ) ) ) .
'.';
163 return implode(
'.', array_reverse( explode(
'.', $host ) ) ) .
'.';
188 if ( $bits[
'scheme'] ==
'mailto' ) {
189 $mailparts = explode(
'@', $bits[
'host'], 2 );
190 if ( count( $mailparts ) === 2 ) {
191 $domainpart = self::indexifyHost( $mailparts[1] );
196 $bits[
'host'] = $domainpart .
'@' . $mailparts[0];
198 $bits[
'host'] = self::indexifyHost( $bits[
'host'] );
202 $index = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
204 if ( isset( $bits[
'port'] ) ) {
205 $index .=
':' . $bits[
'port'];
207 $index .= $bits[
'path'] ??
'/';
208 if ( isset( $bits[
'query'] ) ) {
209 $index .=
'?' . $bits[
'query'];
211 if ( isset( $bits[
'fragment'] ) ) {
212 $index .=
'#' . $bits[
'fragment'];
215 if ( $bits[
'scheme'] ==
'' ) {
216 return [
"http:$index",
"https:$index" ];
254 'protocol' =>
'http://',
255 'oneWildcard' =>
false,
261 $like = self::makeLikeArray( $filterEntry, $options[
'protocol'] );
262 if ( $like ===
false ) {
267 $trimmedLike = self::keepOneWildcard( $like );
268 if ( $options[
'oneWildcard'] ) {
269 $like = $trimmedLike;
271 if ( $trimmedLike[count( $trimmedLike ) - 1] instanceof
LikeMatch ) {
272 array_pop( $trimmedLike );
274 $index = implode(
'', $trimmedLike );
276 $p = $options[
'prefix'];
280 $l = strlen( $index );
285 "{$p}_index_60" => substr( $index, 0, 60 ),
286 "{$p}_index" . $db->buildLike( $like ),
293 "{$p}_index_60" . $db->buildLike( $index, $db->anyString() ),
294 "{$p}_index" . $db->buildLike( $like ),
310 public static function makeLikeArray( $filterEntry, $protocol =
'http://' ) {
314 $target = $protocol . $filterEntry;
321 if ( $bits[
'scheme'] ===
'mailto' && strpos( $bits[
'host'],
'@' ) ) {
323 $mailparts = explode(
'@', $bits[
'host'], 2 );
324 $domainpart = self::indexifyHost( $mailparts[1] );
325 if ( $mailparts[0] ===
'*' ) {
327 $bits[
'host'] = $domainpart .
'@';
329 $bits[
'host'] = $domainpart .
'@' . $mailparts[0];
333 $bits[
'host'] = self::indexifyHost( $bits[
'host'] );
334 if ( substr( $bits[
'host'], -3 ) ===
'.*.' ) {
336 $bits[
'host'] = substr( $bits[
'host'], 0, -2 );
340 $like[] = $bits[
'scheme'] . $bits[
'delimiter'] . $bits[
'host'];
343 $like[] = $db->anyString();
346 if ( isset( $bits[
'port'] ) ) {
347 $like[] =
':' . $bits[
'port'];
349 if ( isset( $bits[
'path'] ) ) {
350 $like[] = $bits[
'path'];
351 } elseif ( !$subdomains ) {
354 if ( isset( $bits[
'query'] ) ) {
355 $like[] =
'?' . $bits[
'query'];
357 if ( isset( $bits[
'fragment'] ) ) {
358 $like[] =
'#' . $bits[
'fragment'];
362 foreach ( $like as $likepart ) {
363 if ( !( $likepart instanceof
LikeMatch ) && strpos( $likepart,
'*' ) !==
false ) {
368 if ( !( $like[count( $like ) - 1] instanceof
LikeMatch ) ) {
370 $like[] = $db->anyString();
385 if ( !is_array( $arr ) ) {
389 foreach ( $arr as $key => $value ) {
391 return array_slice( $arr, 0, $key + 1 );
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Some functions to help implement an external link filter for spam control.
static makeIndexes( $url)
Converts a URL into a format for el_index.
static makeLikeArray( $filterEntry, $protocol='http://')
Make an array to be used for calls to Database::buildLike(), which will match the specified string.
static getQueryConditions( $filterEntry, array $options=[])
Return query conditions which will match the specified string.
static supportsIDN()
Indicate whether LinkFilter IDN support is available.
const VERSION
Increment this when makeIndexes output changes.
static matchEntry(Content $content, $filterEntry, $protocol='http://')
Check whether $content contains a link to $filterEntry.
static keepOneWildcard( $arr)
Filters an array returned by makeLikeArray(), removing everything past first pattern placeholder.
static isUtf8( $value)
Test whether a string is valid UTF-8.
Content object implementation for representing flat text.
Base interface for content objects.