MediaWiki REL1_32
BaseBlacklist.php
Go to the documentation of this file.
1<?php
2
6abstract class BaseBlacklist {
12 public $files = [];
13
19 protected $regexes = false;
20
26 public $warningChance = 100;
27
31 public $warningTime = 600;
32
36 public $expiryTime = 900;
37
43 private static $blacklistTypes = [
44 'spam' => 'SpamBlacklist',
45 'email' => 'EmailBlacklist',
46 ];
47
53 private static $instances = [];
54
60 function __construct( $settings = [] ) {
61 foreach ( $settings as $name => $value ) {
62 $this->$name = $value;
63 }
64 }
65
72 abstract public function filter( array $links, Title $title, $preventLog = false );
73
80 public static function addBlacklistType( $type, $class ) {
81 self::$blacklistTypes[$type] = $class;
82 }
83
89 public static function getBlacklistTypes() {
91 }
92
96 public static function getSpamBlacklist() {
97 return self::getInstance( 'spam' );
98 }
99
103 public static function getEmailBlacklist() {
104 return self::getInstance( 'email' );
105 }
106
115 public static function getInstance( $type ) {
116 if ( !isset( self::$blacklistTypes[$type] ) ) {
117 throw new Exception( "Invalid blacklist type '$type' passed to " . __METHOD__ );
118 }
119
120 if ( !isset( self::$instances[$type] ) ) {
121 global $wgBlacklistSettings;
122
123 // Prevent notices
124 if ( !isset( $wgBlacklistSettings[$type] ) ) {
125 $wgBlacklistSettings[$type] = [];
126 }
127
128 $class = self::$blacklistTypes[$type];
129 self::$instances[$type] = new $class( $wgBlacklistSettings[$type] );
130 }
131
132 return self::$instances[$type];
133 }
134
140 abstract protected function getBlacklistType();
141
148 public static function isLocalSource( Title $title ) {
149 global $wgDBname, $wgBlacklistSettings;
150
151 if ( $title->getNamespace() == NS_MEDIAWIKI ) {
152 $sources = [];
153 foreach ( self::$blacklistTypes as $type => $class ) {
154 $type = ucfirst( $type );
155 $sources += [
156 "$type-blacklist",
157 "$type-whitelist"
158 ];
159 }
160
161 if ( in_array( $title->getDBkey(), $sources ) ) {
162 return true;
163 }
164 }
165
166 $thisHttp = wfExpandUrl( $title->getFullUrl( 'action=raw' ), PROTO_HTTP );
167 $thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/';
168
169 $files = [];
170 foreach ( self::$blacklistTypes as $type => $class ) {
171 if ( isset( $wgBlacklistSettings[$type]['files'] ) ) {
172 $files += $wgBlacklistSettings[$type]['files'];
173 }
174 }
175
176 foreach ( $files as $fileName ) {
177 $matches = [];
178 if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
179 if ( $wgDBname == $matches[1] ) {
180 if ( $matches[2] == $title->getPrefixedDbKey() ) {
181 // Local DB fetch of this page...
182 return true;
183 }
184 }
185 } elseif ( preg_match( $thisHttpRegex, $fileName ) ) {
186 // Raw view of this page
187 return true;
188 }
189 }
190
191 return false;
192 }
193
201 public static function getTypeFromTitle( Title $title ) {
202 global $wgContLang;
203
204 $types = array_map( [ $wgContLang, 'ucfirst' ], array_keys( self::$blacklistTypes ) );
205 $regex = '/(' . implode( '|', $types ) . ')-(?:blacklist|whitelist)/';
206
207 if ( preg_match( $regex, $title->getDBkey(), $m ) ) {
208 return strtolower( $m[1] );
209 }
210
211 return false;
212 }
213
219 function getBlacklists() {
220 if ( $this->regexes === false ) {
221 $this->regexes = array_merge(
222 $this->getLocalBlacklists(),
223 $this->getSharedBlacklists() );
224 }
225 return $this->regexes;
226 }
227
233 public function getLocalBlacklists() {
234 $that = $this;
235 $type = $this->getBlacklistType();
236 $cache = ObjectCache::getMainWANInstance();
237
238 return $cache->getWithSetCallback(
239 $cache->makeKey( 'spamblacklist', $type, 'blacklist-regex' ),
240 $this->expiryTime,
241 function () use ( $that, $type ) {
242 return SpamRegexBatch::regexesFromMessage( "{$type}-blacklist", $that );
243 }
244 );
245 }
246
252 public function getWhitelists() {
253 $that = $this;
254 $type = $this->getBlacklistType();
255 $cache = ObjectCache::getMainWANInstance();
256
257 return $cache->getWithSetCallback(
258 $cache->makeKey( 'spamblacklist', $type, 'whitelist-regex' ),
259 $this->expiryTime,
260 function () use ( $that, $type ) {
261 return SpamRegexBatch::regexesFromMessage( "{$type}-whitelist", $that );
262 }
263 );
264 }
265
271 $listType = $this->getBlacklistType();
272
273 wfDebugLog( 'SpamBlacklist', "Loading $listType regex..." );
274
275 if ( count( $this->files ) == 0 ) {
276 # No lists
277 wfDebugLog( 'SpamBlacklist', "no files specified\n" );
278 return [];
279 }
280
281 $miss = false;
282
283 $that = $this;
284 $cache = ObjectCache::getMainWANInstance();
285 $regexes = $cache->getWithSetCallback(
286 // This used to be cached per-site, but that could be bad on a shared
287 // server where not all wikis have the same configuration.
288 $cache->makeKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ),
289 $this->expiryTime,
290 function () use ( $that, &$miss ) {
291 $miss = true;
292 return $that->buildSharedBlacklists();
293 }
294 );
295
296 if ( !$miss ) {
297 wfDebugLog( 'SpamBlacklist', "Got shared spam regexes from cache\n" );
298 }
299
300 return $regexes;
301 }
302
308 function clearCache() {
309 $listType = $this->getBlacklistType();
310
311 $cache = ObjectCache::getMainWANInstance();
312 $cache->delete( $cache->makeKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ) );
313 $cache->delete( $cache->makeKey( 'spamblacklist', $listType, 'blacklist-regex' ) );
314 $cache->delete( $cache->makeKey( 'spamblacklist', $listType, 'whitelist-regex' ) );
315
316 wfDebugLog( 'SpamBlacklist', "$listType blacklist local cache cleared.\n" );
317 }
318
320 $regexes = [];
321 $listType = $this->getBlacklistType();
322 # Load lists
323 wfDebugLog( 'SpamBlacklist', "Constructing $listType blacklist\n" );
324 foreach ( $this->files as $fileName ) {
325 $matches = [];
326 if ( preg_match( '/^DB: ([\w-]*) (.*)$/', $fileName, $matches ) ) {
327 $text = $this->getArticleText( $matches[1], $matches[2] );
328 } elseif ( preg_match( '/^(https?:)?\/\//', $fileName ) ) {
329 $text = $this->getHttpText( $fileName );
330 } else {
331 $text = file_get_contents( $fileName );
332 wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" );
333 }
334
335 // Build a separate batch of regexes from each source.
336 // While in theory we could squeeze a little efficiency
337 // out of combining multiple sources in one regex, if
338 // there's a bad line in one of them we'll gain more
339 // from only having to break that set into smaller pieces.
340 $regexes = array_merge( $regexes,
341 SpamRegexBatch::regexesFromText( $text, $this, $fileName ) );
342 }
343
344 return $regexes;
345 }
346
347 function getHttpText( $fileName ) {
348 global $wgDBname, $messageMemc;
349 $listType = $this->getBlacklistType();
350
351 # HTTP request
352 # To keep requests to a minimum, we save results into $messageMemc, which is
353 # similar to $wgMemc except almost certain to exist. By default, it is stored
354 # in the database
355 # There are two keys, when the warning key expires, a random thread will refresh
356 # the real key. This reduces the chance of multiple requests under high traffic
357 # conditions.
358 $key = "{$listType}_blacklist_file:$fileName";
359 $warningKey = "$wgDBname:{$listType}filewarning:$fileName";
360 $httpText = $messageMemc->get( $key );
361 $warning = $messageMemc->get( $warningKey );
362
363 if ( !is_string( $httpText ) || ( !$warning && !mt_rand( 0, $this->warningChance ) ) ) {
364 wfDebugLog( 'SpamBlacklist', "Loading $listType blacklist from $fileName\n" );
365 $httpText = Http::get( $fileName );
366 if ( $httpText === false ) {
367 wfDebugLog( 'SpamBlacklist', "Error loading $listType blacklist from $fileName\n" );
368 }
369 $messageMemc->set( $warningKey, 1, $this->warningTime );
370 $messageMemc->set( $key, $httpText, $this->expiryTime );
371 } else {
372 wfDebugLog( 'SpamBlacklist', "Got $listType blacklist from HTTP cache for $fileName\n" );
373 }
374 return $httpText;
375 }
376
385 function getArticleText( $wiki, $article ) {
386 wfDebugLog( 'SpamBlacklist',
387 "Fetching {$this->getBlacklistType()} blacklist from '$article' on '$wiki'...\n" );
388
389 $title = Title::newFromText( $article );
390 // Load all the relevant tables from the correct DB.
391 // This assumes that old_text is the actual text or
392 // that the external store system is at least unified.
393 if ( is_callable( [ Revision::class, 'getQueryInfo' ] ) ) {
394 $revQuery = Revision::getQueryInfo( [ 'page', 'text' ] );
395 } else {
396 $revQuery = [
397 'tables' => [ 'revision', 'page', 'text' ],
398 'fields' => array_merge(
402 ),
403 'joins' => [
404 'text' => [ 'JOIN', 'old_id=rev_text_id' ]
405 ],
406 ];
407 }
408 $row = wfGetDB( DB_REPLICA, [], $wiki )->selectRow(
409 $revQuery['tables'],
410 $revQuery['fields'],
411 [
412 'page_namespace' => $title->getNamespace(), // assume NS IDs match
413 'page_title' => $title->getDBkey(), // assume same case rules
414 ],
415 __METHOD__,
416 [],
417 [ 'page' => [ 'JOIN', 'rev_id=page_latest' ] ] + $revQuery['joins']
418 );
419
420 return $row
421 ? ContentHandler::getContentText( Revision::newFromRow( $row )->getContent() )
422 : false;
423 }
424
430 public function getRegexStart() {
431 return '/[a-z0-9_\-.]*';
432 }
433
440 public function getRegexEnd( $batchSize ) {
441 return ( $batchSize > 0 ) ? '/Sim' : '/im';
442 }
443
448 public function warmCachesForFilter( Title $title, array $entries ) {
449 // subclass this
450 }
451}
c Accompany it with the information you received as to the offer to distribute corresponding source complete source code means all the source code for all modules it plus any associated interface definition files
Definition COPYING.txt:158
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
$messageMemc
Definition Setup.php:788
$wgContLang
Definition Setup.php:809
Base class for different kinds of blacklists.
static array $blacklistTypes
Array containing blacklists that extend BaseBlacklist.
getLocalBlacklists()
Returns the local blacklist.
static getBlacklistTypes()
Return the array of blacklist types currently defined.
array $files
Array of blacklist sources.
static getEmailBlacklist()
__construct( $settings=[])
Constructor.
static getSpamBlacklist()
getBlacklists()
Fetch local and (possibly cached) remote blacklists.
clearCache()
Clear all primary blacklist cache keys.
filter(array $links, Title $title, $preventLog=false)
getWhitelists()
Returns the (local) whitelist.
getSharedBlacklists()
Fetch (possibly cached) remote blacklists.
getBlacklistType()
Returns the code for the blacklist implementation.
getRegexStart()
Returns the start of the regex for matches.
static getInstance( $type)
Returns an instance of the given blacklist.
getArticleText( $wiki, $article)
Fetch an article from this or another local MediaWiki database.
static getTypeFromTitle(Title $title)
Returns the type of blacklist from the given title.
bool array $regexes
Array containing regexes to test against.
static isLocalSource(Title $title)
Check if the given local page title is a spam regex source.
static array $instances
Array of blacklist instances.
getHttpText( $fileName)
static addBlacklistType( $type, $class)
Adds a blacklist class to the registry.
int $warningChance
Chance of receiving a warning when the filter is hit.
getRegexEnd( $batchSize)
Returns the end of the regex for matches.
warmCachesForFilter(Title $title, array $entries)
static get( $url, $options=[], $caller=__METHOD__)
Simple wrapper for Http::request( 'GET' )
Definition Http.php:98
static selectTextFields()
Return the list of text fields that should be selected to read the revision text.
Definition Revision.php:472
static selectPageFields()
Return the list of page fields that should be selected from page table.
Definition Revision.php:485
static selectFields()
Return the list of revision fields that should be selected to create a new revision.
Definition Revision.php:352
static newFromRow( $row)
Definition Revision.php:218
static getQueryInfo( $options=[])
Return the tables, fields, and join conditions to be selected to create a new revision object.
Definition Revision.php:521
static regexesFromMessage( $message, BaseBlacklist $blacklist)
Build a set of regular expressions from a MediaWiki message.
static regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false)
Build a set of regular expressions from the given multiline input text, with empty lines and comments...
Represents a title within MediaWiki.
Definition Title.php:39
const PROTO_HTTP
Definition Defines.php:219
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback function
Definition injection.txt:30
$cache
Definition mcc.php:33
controlled by the following MediaWiki still creates a BagOStuff but calls it to it are no ops If the cache daemon can t be it should also disable itself fairly $wgDBname
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
const DB_REPLICA
Definition defines.php:25