3 use \MediaWiki\MediaWikiServices;
28 $text = str_replace(
'.',
'.', $text );
45 public function filter( array $links,
Title $title =
null, $preventLog =
false, $mode =
'check' ) {
46 $statsd = MediaWikiServices::getInstance()->getStatsdDataFactory();
58 sha1( implode(
"\n", $links ) ),
62 $knownNonMatchAsOf =
$cache->get( $key );
63 if ( $mode ===
'check' ) {
64 if ( $knownNonMatchAsOf ) {
65 $statsd->increment(
'spamblacklist.check-stash.hit' );
69 $statsd->increment(
'spamblacklist.check-stash.miss' );
71 } elseif ( $mode ===
'stash' ) {
72 if ( $knownNonMatchAsOf && ( time() - $knownNonMatchAsOf ) < self::STASH_AGE_DYING ) {
80 if ( count( $blacklists ) ) {
82 $newLinks = array_map( [ $this,
'antiSpoof' ], $links );
87 $addedLinks = array_diff( $newLinks, $oldLinks );
90 $addedLinks = $newLinks;
93 wfDebugLog(
'SpamBlacklist',
"Old URLs: " . implode(
', ', $oldLinks ) );
94 wfDebugLog(
'SpamBlacklist',
"New URLs: " . implode(
', ', $newLinks ) );
95 wfDebugLog(
'SpamBlacklist',
"Added URLs: " . implode(
', ', $addedLinks ) );
97 $links = implode(
"\n", $addedLinks );
99 # Strip whitelisted URLs from the match
100 if ( is_array( $whitelists ) ) {
101 wfDebugLog(
'SpamBlacklist',
"Excluding whitelisted URLs from " . count( $whitelists ) .
102 " regexes: " . implode(
', ', $whitelists ) .
"\n" );
103 foreach ( $whitelists as $regex ) {
104 Wikimedia\suppressWarnings();
105 $newLinks = preg_replace( $regex,
'', $links );
106 Wikimedia\restoreWarnings();
107 if ( is_string( $newLinks ) ) {
115 wfDebugLog(
'SpamBlacklist',
"Checking text against " . count( $blacklists ) .
116 " regexes: " . implode(
', ', $blacklists ) .
"\n" );
118 foreach ( $blacklists as $regex ) {
119 Wikimedia\suppressWarnings();
121 $check = ( preg_match_all( $regex, $links,
$matches ) > 0 );
122 Wikimedia\restoreWarnings();
128 $fullLineRegex = substr( $regex, 0, strrpos( $regex,
'/' ) ) .
'.*/Sim';
129 preg_match_all( $fullLineRegex, $links, $fullUrls );
130 $imploded = implode(
' ', $fullUrls[0] );
131 wfDebugLog(
'SpamBlacklistHit',
"$ip caught submitting spam: $imploded\n" );
132 if ( !$preventLog ) {
135 if ( $retVal ===
false ) {
138 $retVal = array_merge( $retVal, $fullUrls[1] );
141 if ( is_array( $retVal ) ) {
142 $retVal = array_unique( $retVal );
148 if ( $retVal ===
false ) {
150 $cache->set( $key, time(), self::STASH_TTL );
151 if ( $mode ===
'stash' ) {
152 $statsd->increment(
'spamblacklist.check-stash.store' );
168 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
170 return $cache->getWithSetCallback(
172 $cache->makeKey(
'external-link-list',
$title->getLatestRevID() ),
174 function ( $oldValue, &$ttl, array &$setOpts ) use (
$title, $fname ) {
176 $setOpts += Database::getCacheSetOptions(
$dbr );
178 return $dbr->selectFieldValues(
181 [
'el_from' =>
$title->getArticleID() ],
198 return '/(?:https?:)?\/\/+[a-z0-9_\-.]*(';
208 return ')' . parent::getRegexEnd( $batchSize );
219 global $wgUser, $wgLogSpamBlacklistHits;
220 if ( $wgLogSpamBlacklistHits ) {
222 $logEntry->setPerformer( $wgUser );
223 $logEntry->setTarget(
$title );
224 $logEntry->setParameters( [
227 $logid = $logEntry->insert();
228 $log =
new LogPage(
'spamblacklist' );
229 if ( $log->isRestricted() ) {
233 && class_exists( CheckUserHooks::class )
235 $rc = $logEntry->getRecentChange( $logid );
236 CheckUserHooks::updateCheckUserData( $rc );
241 $logEntry->publish( $logid,
"rc" );