MediaWiki  1.31.0
BaseBlacklist.php
Go to the documentation of this file.
1 <?php
2 
6 abstract class BaseBlacklist {
12  public $files = [];
13 
19  protected $regexes = false;
20 
26  public $warningChance = 100;
27 
31  public $warningTime = 600;
32 
36  public $expiryTime = 900;
37 
43  private static $blacklistTypes = [
44  'spam' => 'SpamBlacklist',
45  'email' => 'EmailBlacklist',
46  ];
47 
53  private static $instances = [];
54 
60  function __construct( $settings = [] ) {
61  foreach ( $settings as $name => $value ) {
62  $this->$name = $value;
63  }
64  }
65 
72  abstract public function filter( array $links, Title $title, $preventLog = false );
73 
80  public static function addBlacklistType( $type, $class ) {
81  self::$blacklistTypes[$type] = $class;
82  }
83 
89  public static function getBlacklistTypes() {
90  return self::$blacklistTypes;
91  }
92 
96  public static function getSpamBlacklist() {
97  return self::getInstance( 'spam' );
98  }
99 
103  public static function getEmailBlacklist() {
104  return self::getInstance( 'email' );
105  }
106 
115  public static function getInstance( $type ) {
116  if ( !isset( self::$blacklistTypes[$type] ) ) {
117  throw new Exception( "Invalid blacklist type '$type' passed to " . __METHOD__ );
118  }
119 
120  if ( !isset( self::$instances[$type] ) ) {
121  global $wgBlacklistSettings;
122 
123  // Prevent notices
124  if ( !isset( $wgBlacklistSettings[$type] ) ) {
125  $wgBlacklistSettings[$type] = [];
126  }
127 
128  $class = self::$blacklistTypes[$type];
129  self::$instances[$type] = new $class( $wgBlacklistSettings[$type] );
130  }
131 
132  return self::$instances[$type];
133  }
134 
140  abstract protected function getBlacklistType();
141 
148  public static function isLocalSource( Title $title ) {
149  global $wgDBname, $wgBlacklistSettings;
150 
151  if ( $title->getNamespace() == NS_MEDIAWIKI ) {
152  $sources = [];
153  foreach ( self::$blacklistTypes as $type => $class ) {
154  $type = ucfirst( $type );
155  $sources += [
156  "$type-blacklist",
157  "$type-whitelist"
158  ];
159  }
160 
161  if ( in_array( $title->getDBkey(), $sources ) ) {
162  return true;
163  }
164  }
165 
166  $thisHttp = wfExpandUrl( $title->getFullUrl( 'action=raw' ), PROTO_HTTP );
167  $thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/';
168 
169  $files = [];
170  foreach ( self::$blacklistTypes as $type => $class ) {
171  if ( isset( $wgBlacklistSettings[$type]['files'] ) ) {
172  $files += $wgBlacklistSettings[$type]['files'];
173  }
174  }
175 
176  foreach ( $files as $fileName ) {
177  $matches = [];
178  if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
179  if ( $wgDBname == $matches[1] ) {
180  if ( $matches[2] == $title->getPrefixedDbKey() ) {
181  // Local DB fetch of this page...
182  return true;
183  }
184  }
185  } elseif ( preg_match( $thisHttpRegex, $fileName ) ) {
186  // Raw view of this page
187  return true;
188  }
189  }
190 
191  return false;
192  }
193 
201  public static function getTypeFromTitle( Title $title ) {
203 
204  $types = array_map( [ $wgContLang, 'ucfirst' ], array_keys( self::$blacklistTypes ) );
205  $regex = '/(' . implode( '|', $types ). ')-(?:blacklist|whitelist)/';
206 
207  if ( preg_match( $regex, $title->getDBkey(), $m ) ) {
208  return strtolower( $m[1] );
209  }
210 
211  return false;
212  }
213 
219  function getBlacklists() {
220  if ( $this->regexes === false ) {
221  $this->regexes = array_merge(
222  $this->getLocalBlacklists(),
223  $this->getSharedBlacklists() );
224  }
225  return $this->regexes;
226  }
227 
233  public function getLocalBlacklists() {
234  $that = $this;
235  $type = $this->getBlacklistType();
236 
237  return ObjectCache::getMainWANInstance()->getWithSetCallback(
238  wfMemcKey( 'spamblacklist', $type, 'blacklist-regex' ),
239  $this->expiryTime,
240  function () use ( $that, $type ) {
241  return SpamRegexBatch::regexesFromMessage( "{$type}-blacklist", $that );
242  }
243  );
244  }
245 
251  public function getWhitelists() {
252  $that = $this;
253  $type = $this->getBlacklistType();
254 
255  return ObjectCache::getMainWANInstance()->getWithSetCallback(
256  wfMemcKey( 'spamblacklist', $type, 'whitelist-regex' ),
257  $this->expiryTime,
258  function () use ( $that, $type ) {
259  return SpamRegexBatch::regexesFromMessage( "{$type}-whitelist", $that );
260  }
261  );
262  }
263 
268  function getSharedBlacklists() {
269  $listType = $this->getBlacklistType();
270 
271  wfDebugLog( 'SpamBlacklist', "Loading $listType regex..." );
272 
273  if ( count( $this->files ) == 0 ) {
274  # No lists
275  wfDebugLog( 'SpamBlacklist', "no files specified\n" );
276  return [];
277  }
278 
279  $miss = false;
280 
281  $that = $this;
282  $regexes = ObjectCache::getMainWANInstance()->getWithSetCallback(
283  // This used to be cached per-site, but that could be bad on a shared
284  // server where not all wikis have the same configuration.
285  wfMemcKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ),
286  $this->expiryTime,
287  function () use ( $that, &$miss ) {
288  $miss = true;
289  return $that->buildSharedBlacklists();
290  }
291  );
292 
293  if ( !$miss ) {
294  wfDebugLog( 'SpamBlacklist', "Got shared spam regexes from cache\n" );
295  }
296 
297  return $regexes;
298  }
299 
305  function clearCache() {
306  $listType = $this->getBlacklistType();
307 
309  $cache->delete( wfMemcKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ) );
310  $cache->delete( wfMemcKey( 'spamblacklist', $listType, 'blacklist-regex' ) );
311  $cache->delete( wfMemcKey( 'spamblacklist', $listType, 'whitelist-regex' ) );
312 
313  wfDebugLog( 'SpamBlacklist', "$listType blacklist local cache cleared.\n" );
314  }
315 
317  $regexes = [];
318  $listType = $this->getBlacklistType();
319  # Load lists
320  wfDebugLog( 'SpamBlacklist', "Constructing $listType blacklist\n" );
321  foreach ( $this->files as $fileName ) {
322  $matches = [];
323  if ( preg_match( '/^DB: ([\w-]*) (.*)$/', $fileName, $matches ) ) {
324  $text = $this->getArticleText( $matches[1], $matches[2] );
325  } elseif ( preg_match( '/^(https?:)?\/\//', $fileName ) ) {
326  $text = $this->getHttpText( $fileName );
327  } else {
328  $text = file_get_contents( $fileName );
329  wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" );
330  }
331 
332  // Build a separate batch of regexes from each source.
333  // While in theory we could squeeze a little efficiency
334  // out of combining multiple sources in one regex, if
335  // there's a bad line in one of them we'll gain more
336  // from only having to break that set into smaller pieces.
337  $regexes = array_merge( $regexes,
338  SpamRegexBatch::regexesFromText( $text, $this, $fileName ) );
339  }
340 
341  return $regexes;
342  }
343 
344  function getHttpText( $fileName ) {
346  $listType = $this->getBlacklistType();
347 
348  # HTTP request
349  # To keep requests to a minimum, we save results into $messageMemc, which is
350  # similar to $wgMemc except almost certain to exist. By default, it is stored
351  # in the database
352  # There are two keys, when the warning key expires, a random thread will refresh
353  # the real key. This reduces the chance of multiple requests under high traffic
354  # conditions.
355  $key = "{$listType}_blacklist_file:$fileName";
356  $warningKey = "$wgDBname:{$listType}filewarning:$fileName";
357  $httpText = $messageMemc->get( $key );
358  $warning = $messageMemc->get( $warningKey );
359 
360  if ( !is_string( $httpText ) || ( !$warning && !mt_rand( 0, $this->warningChance ) ) ) {
361  wfDebugLog( 'SpamBlacklist', "Loading $listType blacklist from $fileName\n" );
362  $httpText = Http::get( $fileName );
363  if ( $httpText === false ) {
364  wfDebugLog( 'SpamBlacklist', "Error loading $listType blacklist from $fileName\n" );
365  }
366  $messageMemc->set( $warningKey, 1, $this->warningTime );
367  $messageMemc->set( $key, $httpText, $this->expiryTime );
368  } else {
369  wfDebugLog( 'SpamBlacklist', "Got $listType blacklist from HTTP cache for $fileName\n" );
370  }
371  return $httpText;
372  }
373 
382  function getArticleText( $wiki, $article ) {
383  wfDebugLog( 'SpamBlacklist',
384  "Fetching {$this->getBlacklistType()} blacklist from '$article' on '$wiki'...\n" );
385 
387  // Load all the relevant tables from the correct DB.
388  // This assumes that old_text is the actual text or
389  // that the external store system is at least unified.
390  if ( is_callable( [ Revision::class, 'getQueryInfo' ] ) ) {
391  $revQuery = Revision::getQueryInfo( [ 'page', 'text' ] );
392  } else {
393  $revQuery = [
394  'tables' => [ 'revision', 'page', 'text' ],
395  'fields' => array_merge(
399  ),
400  'joins' => [
401  'text' => [ 'JOIN', 'old_id=rev_text_id' ]
402  ],
403  ];
404  }
405  $row = wfGetDB( DB_REPLICA, [], $wiki )->selectRow(
406  $revQuery['tables'],
407  $revQuery['fields'],
408  [
409  'page_namespace' => $title->getNamespace(), // assume NS IDs match
410  'page_title' => $title->getDBkey(), // assume same case rules
411  ],
412  __METHOD__,
413  [],
414  [ 'page' => [ 'JOIN', 'rev_id=page_latest' ] ] + $revQuery['joins']
415  );
416 
417  return $row
418  ? ContentHandler::getContentText( Revision::newFromRow( $row )->getContent() )
419  : false;
420  }
421 
427  public function getRegexStart() {
428  return '/[a-z0-9_\-.]*';
429  }
430 
437  public function getRegexEnd( $batchSize ) {
438  return ( $batchSize > 0 ) ? '/Sim' : '/im';
439  }
440 
445  public function warmCachesForFilter( Title $title, array $entries ) {
446  // subclass this
447  }
448 }
BaseBlacklist\getSpamBlacklist
static getSpamBlacklist()
Definition: BaseBlacklist.php:96
BaseBlacklist\getBlacklistTypes
static getBlacklistTypes()
Return the array of blacklist types currently defined.
Definition: BaseBlacklist.php:89
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:273
BaseBlacklist\getInstance
static getInstance( $type)
Returns an instance of the given blacklist.
Definition: BaseBlacklist.php:115
BaseBlacklist\getRegexEnd
getRegexEnd( $batchSize)
Returns the end of the regex for matches.
Definition: BaseBlacklist.php:437
BaseBlacklist\getBlacklists
getBlacklists()
Fetch local and (possibly cached) remote blacklists.
Definition: BaseBlacklist.php:219
BaseBlacklist\getHttpText
getHttpText( $fileName)
Definition: BaseBlacklist.php:344
captcha-old.count
count
Definition: captcha-old.py:249
BaseBlacklist\getBlacklistType
getBlacklistType()
Returns the code for the blacklist implementation.
BaseBlacklist\getArticleText
getArticleText( $wiki, $article)
Fetch an article from this or another local MediaWiki database.
Definition: BaseBlacklist.php:382
BaseBlacklist\$files
array $files
Array of blacklist sources.
Definition: BaseBlacklist.php:12
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
BaseBlacklist\isLocalSource
static isLocalSource(Title $title)
Check if the given local page title is a spam regex source.
Definition: BaseBlacklist.php:148
$messageMemc
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest to get request data $messageMemc
Definition: globals.txt:25
BaseBlacklist\addBlacklistType
static addBlacklistType( $type, $class)
Adds a blacklist class to the registry.
Definition: BaseBlacklist.php:80
BaseBlacklist\getTypeFromTitle
static getTypeFromTitle(Title $title)
Returns the type of blacklist from the given title.
Definition: BaseBlacklist.php:201
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:302
SpamRegexBatch\regexesFromText
static regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false)
Build a set of regular expressions from the given multiline input text, with empty lines and comments...
Definition: SpamRegexBatch.php:154
$revQuery
$revQuery
Definition: testCompression.php:51
wfDebugLog
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Definition: GlobalFunctions.php:1075
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
Revision\selectTextFields
static selectTextFields()
Return the list of text fields that should be selected to read the revision text.
Definition: Revision.php:443
BaseBlacklist\$blacklistTypes
static array $blacklistTypes
Array containing blacklists that extend BaseBlacklist.
Definition: BaseBlacklist.php:43
Revision\getQueryInfo
static getQueryInfo( $options=[])
Return the tables, fields, and join conditions to be selected to create a new revision object.
Definition: Revision.php:492
wfMemcKey
wfMemcKey()
Make a cache key for the local wiki.
Definition: GlobalFunctions.php:2700
$title
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:934
$wgDBname
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname
Definition: memcached.txt:96
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2800
$matches
$matches
Definition: NoLocalSettings.php:24
$article
Using a hook running we can avoid having all this option specific stuff in our mainline code Using the function array $article
Definition: hooks.txt:77
files
c Accompany it with the information you received as to the offer to distribute corresponding source complete source code means all the source code for all modules it plus any associated interface definition files
Definition: COPYING.txt:157
BaseBlacklist\getLocalBlacklists
getLocalBlacklists()
Returns the local blacklist.
Definition: BaseBlacklist.php:233
BaseBlacklist
Base class for different kinds of blacklists.
Definition: BaseBlacklist.php:6
BaseBlacklist\clearCache
clearCache()
Clear all primary blacklist cache keys.
Definition: BaseBlacklist.php:305
SpamRegexBatch\regexesFromMessage
static regexesFromMessage( $message, BaseBlacklist $blacklist)
Build a set of regular expressions from a MediaWiki message.
Definition: SpamRegexBatch.php:167
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
BaseBlacklist\getSharedBlacklists
getSharedBlacklists()
Fetch (possibly cached) remote blacklists.
Definition: BaseBlacklist.php:268
Revision\selectPageFields
static selectPageFields()
Return the list of page fields that should be selected from page table.
Definition: Revision.php:456
Http\get
static get( $url, $options=[], $caller=__METHOD__)
Simple wrapper for Http::request( 'GET' )
Definition: Http.php:98
BaseBlacklist\$regexes
bool array $regexes
Array containing regexes to test against.
Definition: BaseBlacklist.php:19
$value
$value
Definition: styleTest.css.php:45
BaseBlacklist\warmCachesForFilter
warmCachesForFilter(Title $title, array $entries)
Definition: BaseBlacklist.php:445
BaseBlacklist\filter
filter(array $links, Title $title, $preventLog=false)
BaseBlacklist\$instances
static array $instances
Array of blacklist instances.
Definition: BaseBlacklist.php:53
Revision\newFromRow
static newFromRow( $row)
Definition: Revision.php:218
PROTO_HTTP
const PROTO_HTTP
Definition: Defines.php:220
Title
Represents a title within MediaWiki.
Definition: Title.php:39
BaseBlacklist\buildSharedBlacklists
buildSharedBlacklists()
Definition: BaseBlacklist.php:316
ContentHandler\getContentText
static getContentText(Content $content=null)
Convenience function for getting flat text from a Content object.
Definition: ContentHandler.php:79
$cache
$cache
Definition: mcc.php:33
ObjectCache\getMainWANInstance
static getMainWANInstance()
Get the main WAN cache object.
Definition: ObjectCache.php:380
BaseBlacklist\$warningChance
int $warningChance
Chance of receiving a warning when the filter is hit.
Definition: BaseBlacklist.php:26
BaseBlacklist\getEmailBlacklist
static getEmailBlacklist()
Definition: BaseBlacklist.php:103
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
BaseBlacklist\__construct
__construct( $settings=[])
Constructor.
Definition: BaseBlacklist.php:60
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:73
class
you have access to all of the normal MediaWiki so you can get a DB use the etc For full docs on the Maintenance class
Definition: maintenance.txt:52
Revision\selectFields
static selectFields()
Return the list of revision fields that should be selected to create a new revision.
Definition: Revision.php:351
BaseBlacklist\$warningTime
int $warningTime
Definition: BaseBlacklist.php:31
BaseBlacklist\getWhitelists
getWhitelists()
Returns the (local) whitelist.
Definition: BaseBlacklist.php:251
wfExpandUrl
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
Definition: GlobalFunctions.php:521
array
the array() calling protocol came about after MediaWiki 1.4rc1.
BaseBlacklist\getRegexStart
getRegexStart()
Returns the start of the regex for matches.
Definition: BaseBlacklist.php:427
$wgContLang
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the content language as $wgContLang
Definition: design.txt:56
$type
$type
Definition: testCompression.php:48
BaseBlacklist\$expiryTime
int $expiryTime
Definition: BaseBlacklist.php:36