47 'spam' =>
'SpamBlacklist',
48 'email' =>
'EmailBlacklist',
64 foreach ( $settings as $name => $value ) {
65 $this->$name = $value;
84 self::$blacklistTypes[
$type] = $class;
119 if ( !isset( self::$blacklistTypes[
$type] ) ) {
120 throw new Exception(
"Invalid blacklist type '$type' passed to " . __METHOD__ );
123 if ( !isset( self::$instances[
$type] ) ) {
124 global $wgBlacklistSettings;
127 if ( !isset( $wgBlacklistSettings[
$type] ) ) {
128 $wgBlacklistSettings[
$type] = [];
131 $class = self::$blacklistTypes[
$type];
132 self::$instances[
$type] =
new $class( $wgBlacklistSettings[
$type] );
135 return self::$instances[
$type];
156 foreach ( self::$blacklistTypes as
$type => $class ) {
164 if ( in_array(
$title->getDBkey(), $sources ) ) {
170 $thisHttpRegex =
'/^' . preg_quote( $thisHttp,
'/' ) .
'(?:&.*)?$/';
173 foreach ( self::$blacklistTypes as
$type => $class ) {
174 if ( isset( $wgBlacklistSettings[
$type][
'files'] ) ) {
180 foreach (
$files as $fileName ) {
182 if ( preg_match(
'/^DB: (\w*) (.*)$/', $fileName,
$matches ) ) {
189 } elseif ( preg_match( $thisHttpRegex, $fileName ) ) {
206 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
208 $types = array_map( [ $contLang,
'ucfirst' ], array_keys( self::$blacklistTypes ) );
209 $regex =
'/(' . implode(
'|', $types ) .
')-(?:blacklist|whitelist)/';
211 if ( preg_match( $regex,
$title->getDBkey(), $m ) ) {
212 return strtolower( $m[1] );
224 if ( $this->regexes ===
false ) {
225 $this->regexes = array_merge(
241 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
243 return $cache->getWithSetCallback(
244 $cache->makeKey(
'spamblacklist',
$type,
'blacklist-regex' ),
246 function () use ( $that,
$type ) {
260 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
262 return $cache->getWithSetCallback(
263 $cache->makeKey(
'spamblacklist',
$type,
'whitelist-regex' ),
265 function () use ( $that,
$type ) {
278 wfDebugLog(
'SpamBlacklist',
"Loading $listType regex..." );
280 if ( !$this->files ) {
282 wfDebugLog(
'SpamBlacklist',
"no files specified\n" );
289 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
293 $cache->makeKey(
'spamblacklist', $listType,
'shared-blacklist-regex' ),
295 function () use ( $that, &$miss ) {
297 return $that->buildSharedBlacklists();
302 wfDebugLog(
'SpamBlacklist',
"Got shared spam regexes from cache\n" );
314 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
315 $cache->delete(
$cache->makeKey(
'spamblacklist', $listType,
'shared-blacklist-regex' ) );
316 $cache->delete(
$cache->makeKey(
'spamblacklist', $listType,
'blacklist-regex' ) );
317 $cache->delete(
$cache->makeKey(
'spamblacklist', $listType,
'whitelist-regex' ) );
319 wfDebugLog(
'SpamBlacklist',
"$listType blacklist local cache cleared.\n" );
326 wfDebugLog(
'SpamBlacklist',
"Constructing $listType blacklist\n" );
327 foreach ( $this->files as $fileName ) {
329 if ( preg_match(
'/^DB: ([\w-]*) (.*)$/', $fileName,
$matches ) ) {
331 } elseif ( preg_match(
'/^(https?:)?\/\//', $fileName ) ) {
334 $text = file_get_contents( $fileName );
335 wfDebugLog(
'SpamBlacklist',
"got from file $fileName\n" );
357 # To keep requests to a minimum, we save results into $messageMemc, which is
358 # similar to $wgMemc except almost certain to exist. By default, it is stored
360 # There are two keys, when the warning key expires, a random thread will refresh
361 # the real key. This reduces the chance of multiple requests under high traffic
363 $key =
"{$listType}_blacklist_file:$fileName";
364 $warningKey =
"$wgDBname:{$listType}filewarning:$fileName";
368 if ( !is_string( $httpText ) || ( !$warning && !mt_rand( 0, $this->warningChance ) ) ) {
369 wfDebugLog(
'SpamBlacklist',
"Loading $listType blacklist from $fileName\n" );
370 $httpText = Http::get( $fileName );
371 if ( $httpText ===
false ) {
372 wfDebugLog(
'SpamBlacklist',
"Error loading $listType blacklist from $fileName\n" );
374 $messageMemc->set( $warningKey, 1, $this->warningTime );
375 $messageMemc->set( $key, $httpText, $this->expiryTime );
377 wfDebugLog(
'SpamBlacklist',
"Got $listType blacklist from HTTP cache for $fileName\n" );
391 "Fetching {$this->getBlacklistType()} blacklist from '$pagename' on '$wiki'...\n" );
393 $services = MediaWikiServices::getInstance();
396 $title = $services->getTitleParser()->parseTitle( $pagename );
397 $store = $services->getRevisionStoreFactory()->getRevisionStore( $wiki );
398 $rev = $store->getRevisionByTitle(
$title );
400 $content = $rev ? $rev->getContent( SlotRecord::MAIN ) :
null;
415 return '/[a-z0-9_\-.]*';
425 return ( $batchSize > 0 ) ?
'/Sim' :
'/im';
$wgDBname
Current wiki database name.
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Base class for different kinds of blacklists.
static array $blacklistTypes
Array containing blacklists that extend BaseBlacklist.
getLocalBlacklists()
Returns the local blacklist.
static getBlacklistTypes()
Return the array of blacklist types currently defined.
array $files
Array of blacklist sources.
static getEmailBlacklist()
__construct( $settings=[])
Constructor.
static getSpamBlacklist()
getBlacklists()
Fetch local and (possibly cached) remote blacklists.
clearCache()
Clear all primary blacklist cache keys.
filter(array $links, Title $title, $preventLog=false)
getWhitelists()
Returns the (local) whitelist.
getSharedBlacklists()
Fetch (possibly cached) remote blacklists.
getBlacklistType()
Returns the code for the blacklist implementation.
getRegexStart()
Returns the start of the regex for matches.
getArticleText( $wiki, $pagename)
Fetch an article from this or another local MediaWiki database.
static getInstance( $type)
Returns an instance of the given blacklist.
static getTypeFromTitle(Title $title)
Returns the type of blacklist from the given title.
bool array $regexes
Array containing regexes to test against.
static isLocalSource(Title $title)
Check if the given local page title is a spam regex source.
static array $instances
Array of blacklist instances.
static addBlacklistType( $type, $class)
Adds a blacklist class to the registry.
int $warningChance
Chance of receiving a warning when the filter is hit.
getRegexEnd( $batchSize)
Returns the end of the regex for matches.
warmCachesForFilter(Title $title, array $entries)
static regexesFromMessage( $message, BaseBlacklist $blacklist)
Build a set of regular expressions from a MediaWiki message.
static regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false)
Build a set of regular expressions from the given multiline input text, with empty lines and comments...
Content object implementation for representing flat text.
Represents a title within MediaWiki.