MediaWiki  1.30.0
BaseBlacklist.php
Go to the documentation of this file.
1 <?php
2 
6 abstract class BaseBlacklist {
12  public $files = [];
13 
19  protected $regexes = false;
20 
26  public $warningChance = 100;
27 
31  public $warningTime = 600;
32 
36  public $expiryTime = 900;
37 
43  private static $blacklistTypes = [
44  'spam' => 'SpamBlacklist',
45  'email' => 'EmailBlacklist',
46  ];
47 
53  private static $instances = [];
54 
60  function __construct( $settings = [] ) {
61  foreach ( $settings as $name => $value ) {
62  $this->$name = $value;
63  }
64  }
65 
72  abstract public function filter( array $links, Title $title, $preventLog = false );
73 
80  public static function addBlacklistType( $type, $class ) {
81  self::$blacklistTypes[$type] = $class;
82  }
83 
89  public static function getBlacklistTypes() {
90  return self::$blacklistTypes;
91  }
92 
100  public static function getInstance( $type ) {
101  if ( !isset( self::$blacklistTypes[$type] ) ) {
102  throw new Exception( "Invalid blacklist type '$type' passed to " . __METHOD__ );
103  }
104 
105  if ( !isset( self::$instances[$type] ) ) {
106  global $wgBlacklistSettings;
107 
108  // Prevent notices
109  if ( !isset( $wgBlacklistSettings[$type] ) ) {
110  $wgBlacklistSettings[$type] = [];
111  }
112 
113  $class = self::$blacklistTypes[$type];
114  self::$instances[$type] = new $class( $wgBlacklistSettings[$type] );
115  }
116 
117  return self::$instances[$type];
118  }
119 
125  abstract protected function getBlacklistType();
126 
133  public static function isLocalSource( Title $title ) {
134  global $wgDBname, $wgBlacklistSettings;
135 
136  if ( $title->getNamespace() == NS_MEDIAWIKI ) {
137  $sources = [];
138  foreach ( self::$blacklistTypes as $type => $class ) {
139  $type = ucfirst( $type );
140  $sources += [
141  "$type-blacklist",
142  "$type-whitelist"
143  ];
144  }
145 
146  if ( in_array( $title->getDBkey(), $sources ) ) {
147  return true;
148  }
149  }
150 
151  $thisHttp = wfExpandUrl( $title->getFullUrl( 'action=raw' ), PROTO_HTTP );
152  $thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/';
153 
154  $files = [];
155  foreach ( self::$blacklistTypes as $type => $class ) {
156  if ( isset( $wgBlacklistSettings[$type]['files'] ) ) {
157  $files += $wgBlacklistSettings[$type]['files'];
158  }
159  }
160 
161  foreach ( $files as $fileName ) {
162  $matches = [];
163  if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
164  if ( $wgDBname == $matches[1] ) {
165  if ( $matches[2] == $title->getPrefixedDbKey() ) {
166  // Local DB fetch of this page...
167  return true;
168  }
169  }
170  } elseif ( preg_match( $thisHttpRegex, $fileName ) ) {
171  // Raw view of this page
172  return true;
173  }
174  }
175 
176  return false;
177  }
178 
186  public static function getTypeFromTitle( Title $title ) {
188 
189  $types = array_map( [ $wgContLang, 'ucfirst' ], array_keys( self::$blacklistTypes ) );
190  $regex = '/(' . implode( '|', $types ). ')-(?:blacklist|whitelist)/';
191 
192  if ( preg_match( $regex, $title->getDBkey(), $m ) ) {
193  return strtolower( $m[1] );
194  }
195 
196  return false;
197  }
198 
204  function getBlacklists() {
205  if ( $this->regexes === false ) {
206  $this->regexes = array_merge(
207  $this->getLocalBlacklists(),
208  $this->getSharedBlacklists() );
209  }
210  return $this->regexes;
211  }
212 
218  public function getLocalBlacklists() {
219  $that = $this;
220  $type = $this->getBlacklistType();
221 
222  return ObjectCache::getMainWANInstance()->getWithSetCallback(
223  wfMemcKey( 'spamblacklist', $type, 'blacklist-regex' ),
224  $this->expiryTime,
225  function () use ( $that, $type ) {
226  return SpamRegexBatch::regexesFromMessage( "{$type}-blacklist", $that );
227  }
228  );
229  }
230 
236  public function getWhitelists() {
237  $that = $this;
238  $type = $this->getBlacklistType();
239 
240  return ObjectCache::getMainWANInstance()->getWithSetCallback(
241  wfMemcKey( 'spamblacklist', $type, 'whitelist-regex' ),
242  $this->expiryTime,
243  function () use ( $that, $type ) {
244  return SpamRegexBatch::regexesFromMessage( "{$type}-whitelist", $that );
245  }
246  );
247  }
248 
253  function getSharedBlacklists() {
254  $listType = $this->getBlacklistType();
255 
256  wfDebugLog( 'SpamBlacklist', "Loading $listType regex..." );
257 
258  if ( count( $this->files ) == 0 ) {
259  # No lists
260  wfDebugLog( 'SpamBlacklist', "no files specified\n" );
261  return [];
262  }
263 
264  $miss = false;
265 
266  $that = $this;
267  $regexes = ObjectCache::getMainWANInstance()->getWithSetCallback(
268  // This used to be cached per-site, but that could be bad on a shared
269  // server where not all wikis have the same configuration.
270  wfMemcKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ),
271  $this->expiryTime,
272  function () use ( $that, &$miss ) {
273  $miss = true;
274  return $that->buildSharedBlacklists();
275  }
276  );
277 
278  if ( !$miss ) {
279  wfDebugLog( 'SpamBlacklist', "Got shared spam regexes from cache\n" );
280  }
281 
282  return $regexes;
283  }
284 
290  function clearCache() {
291  $listType = $this->getBlacklistType();
292 
294  $cache->delete( wfMemcKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ) );
295  $cache->delete( wfMemcKey( 'spamblacklist', $listType, 'blacklist-regex' ) );
296  $cache->delete( wfMemcKey( 'spamblacklist', $listType, 'whitelist-regex' ) );
297 
298  wfDebugLog( 'SpamBlacklist', "$listType blacklist local cache cleared.\n" );
299  }
300 
302  $regexes = [];
303  $listType = $this->getBlacklistType();
304  # Load lists
305  wfDebugLog( 'SpamBlacklist', "Constructing $listType blacklist\n" );
306  foreach ( $this->files as $fileName ) {
307  $matches = [];
308  if ( preg_match( '/^DB: ([\w-]*) (.*)$/', $fileName, $matches ) ) {
309  $text = $this->getArticleText( $matches[1], $matches[2] );
310  } elseif ( preg_match( '/^(https?:)?\/\//', $fileName ) ) {
311  $text = $this->getHttpText( $fileName );
312  } else {
313  $text = file_get_contents( $fileName );
314  wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" );
315  }
316 
317  // Build a separate batch of regexes from each source.
318  // While in theory we could squeeze a little efficiency
319  // out of combining multiple sources in one regex, if
320  // there's a bad line in one of them we'll gain more
321  // from only having to break that set into smaller pieces.
322  $regexes = array_merge( $regexes,
323  SpamRegexBatch::regexesFromText( $text, $this, $fileName ) );
324  }
325 
326  return $regexes;
327  }
328 
329  function getHttpText( $fileName ) {
331  $listType = $this->getBlacklistType();
332 
333  # HTTP request
334  # To keep requests to a minimum, we save results into $messageMemc, which is
335  # similar to $wgMemc except almost certain to exist. By default, it is stored
336  # in the database
337  # There are two keys, when the warning key expires, a random thread will refresh
338  # the real key. This reduces the chance of multiple requests under high traffic
339  # conditions.
340  $key = "{$listType}_blacklist_file:$fileName";
341  $warningKey = "$wgDBname:{$listType}filewarning:$fileName";
342  $httpText = $messageMemc->get( $key );
343  $warning = $messageMemc->get( $warningKey );
344 
345  if ( !is_string( $httpText ) || ( !$warning && !mt_rand( 0, $this->warningChance ) ) ) {
346  wfDebugLog( 'SpamBlacklist', "Loading $listType blacklist from $fileName\n" );
347  $httpText = Http::get( $fileName );
348  if ( $httpText === false ) {
349  wfDebugLog( 'SpamBlacklist', "Error loading $listType blacklist from $fileName\n" );
350  }
351  $messageMemc->set( $warningKey, 1, $this->warningTime );
352  $messageMemc->set( $key, $httpText, $this->expiryTime );
353  } else {
354  wfDebugLog( 'SpamBlacklist', "Got $listType blacklist from HTTP cache for $fileName\n" );
355  }
356  return $httpText;
357  }
358 
367  function getArticleText( $wiki, $article ) {
368  wfDebugLog( 'SpamBlacklist',
369  "Fetching {$this->getBlacklistType()} blacklist from '$article' on '$wiki'...\n" );
370 
372  // Load all the relevant tables from the correct DB.
373  // This assumes that old_text is the actual text or
374  // that the external store system is at least unified.
375  $row = wfGetDB( DB_SLAVE, [], $wiki )->selectRow(
376  [ 'page', 'revision', 'text' ],
377  array_merge(
381  ),
382  [
383  'page_namespace' => $title->getNamespace(), // assume NS IDs match
384  'page_title' => $title->getDBkey(), // assume same case rules
385  'rev_id=page_latest',
386  'old_id=rev_text_id'
387  ],
388  __METHOD__
389  );
390 
391  return $row
392  ? ContentHandler::getContentText( Revision::newFromRow( $row )->getContent() )
393  : false;
394  }
395 
401  public function getRegexStart() {
402  return '/[a-z0-9_\-.]*';
403  }
404 
411  public function getRegexEnd( $batchSize ) {
412  return ( $batchSize > 0 ) ? '/Sim' : '/im';
413  }
414 
419  public function warmCachesForFilter( Title $title, array $entries ) {
420  // subclass this
421  }
422 }
BaseBlacklist\getBlacklistTypes
static getBlacklistTypes()
Return the array of blacklist types currently defined.
Definition: BaseBlacklist.php:89
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:268
BaseBlacklist\getInstance
static getInstance( $type)
Returns an instance of the given blacklist.
Definition: BaseBlacklist.php:100
BaseBlacklist\getRegexEnd
getRegexEnd( $batchSize)
Returns the end of the regex for matches.
Definition: BaseBlacklist.php:411
BaseBlacklist\getBlacklists
getBlacklists()
Fetch local and (possibly cached) remote blacklists.
Definition: BaseBlacklist.php:204
BaseBlacklist\getHttpText
getHttpText( $fileName)
Definition: BaseBlacklist.php:329
captcha-old.count
count
Definition: captcha-old.py:249
BaseBlacklist\getBlacklistType
getBlacklistType()
Returns the code for the blacklist implementation.
BaseBlacklist\getArticleText
getArticleText( $wiki, $article)
Fetch an article from this or another local MediaWiki database.
Definition: BaseBlacklist.php:367
BaseBlacklist\$files
array $files
Array of blacklist sources.
Definition: BaseBlacklist.php:12
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
BaseBlacklist\isLocalSource
static isLocalSource(Title $title)
Check if the given local page title is a spam regex source.
Definition: BaseBlacklist.php:133
$messageMemc
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest to get request data $messageMemc
Definition: globals.txt:25
DB_SLAVE
const DB_SLAVE
Definition: Defines.php:37
BaseBlacklist\addBlacklistType
static addBlacklistType( $type, $class)
Adds a blacklist class to the registry.
Definition: BaseBlacklist.php:80
BaseBlacklist\getTypeFromTitle
static getTypeFromTitle(Title $title)
Returns the type of blacklist from the given title.
Definition: BaseBlacklist.php:186
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:302
SpamRegexBatch\regexesFromText
static regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false)
Build a set of regular expressions from the given multiline input text, with empty lines and comments...
Definition: SpamRegexBatch.php:154
wfDebugLog
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Definition: GlobalFunctions.php:1140
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
Revision\selectTextFields
static selectTextFields()
Return the list of text fields that should be selected to read the revision text.
Definition: Revision.php:518
BaseBlacklist\$blacklistTypes
static array $blacklistTypes
Array containing blacklists that extend BaseBlacklist.
Definition: BaseBlacklist.php:43
wfMemcKey
wfMemcKey()
Make a cache key for the local wiki.
Definition: GlobalFunctions.php:2756
$title
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:932
$wgDBname
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname
Definition: memcached.txt:96
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2856
$matches
$matches
Definition: NoLocalSettings.php:24
$article
Using a hook running we can avoid having all this option specific stuff in our mainline code Using the function array $article
Definition: hooks.txt:77
files
c Accompany it with the information you received as to the offer to distribute corresponding source complete source code means all the source code for all modules it plus any associated interface definition files
Definition: COPYING.txt:157
BaseBlacklist\getLocalBlacklists
getLocalBlacklists()
Returns the local blacklist.
Definition: BaseBlacklist.php:218
BaseBlacklist
Base class for different kinds of blacklists.
Definition: BaseBlacklist.php:6
BaseBlacklist\clearCache
clearCache()
Clear all primary blacklist cache keys.
Definition: BaseBlacklist.php:290
SpamRegexBatch\regexesFromMessage
static regexesFromMessage( $message, BaseBlacklist $blacklist)
Build a set of regular expressions from a MediaWiki message.
Definition: SpamRegexBatch.php:167
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
BaseBlacklist\getSharedBlacklists
getSharedBlacklists()
Fetch (possibly cached) remote blacklists.
Definition: BaseBlacklist.php:253
Revision\selectPageFields
static selectPageFields()
Return the list of page fields that should be selected from page table.
Definition: Revision.php:529
Http\get
static get( $url, $options=[], $caller=__METHOD__)
Simple wrapper for Http::request( 'GET' )
Definition: Http.php:98
BaseBlacklist\$regexes
bool array $regexes
Array containing regexes to test against.
Definition: BaseBlacklist.php:19
$value
$value
Definition: styleTest.css.php:45
BaseBlacklist\warmCachesForFilter
warmCachesForFilter(Title $title, array $entries)
Definition: BaseBlacklist.php:419
BaseBlacklist\filter
filter(array $links, Title $title, $preventLog=false)
BaseBlacklist\$instances
static array $instances
Array of blacklist instances.
Definition: BaseBlacklist.php:53
Revision\newFromRow
static newFromRow( $row)
Definition: Revision.php:238
PROTO_HTTP
const PROTO_HTTP
Definition: Defines.php:220
Title
Represents a title within MediaWiki.
Definition: Title.php:39
BaseBlacklist\buildSharedBlacklists
buildSharedBlacklists()
Definition: BaseBlacklist.php:301
ContentHandler\getContentText
static getContentText(Content $content=null)
Convenience function for getting flat text from a Content object.
Definition: ContentHandler.php:79
$cache
$cache
Definition: mcc.php:33
ObjectCache\getMainWANInstance
static getMainWANInstance()
Get the main WAN cache object.
Definition: ObjectCache.php:370
BaseBlacklist\$warningChance
int $warningChance
Chance of receiving a warning when the filter is hit.
Definition: BaseBlacklist.php:26
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
BaseBlacklist\__construct
__construct( $settings=[])
Constructor.
Definition: BaseBlacklist.php:60
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:73
Revision\selectFields
static selectFields()
Return the list of revision fields that should be selected to create a new revision.
Definition: Revision.php:452
BaseBlacklist\$warningTime
int $warningTime
Definition: BaseBlacklist.php:31
BaseBlacklist\getWhitelists
getWhitelists()
Returns the (local) whitelist.
Definition: BaseBlacklist.php:236
wfExpandUrl
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
Definition: GlobalFunctions.php:586
array
the array() calling protocol came about after MediaWiki 1.4rc1.
BaseBlacklist\getRegexStart
getRegexStart()
Returns the start of the regex for matches.
Definition: BaseBlacklist.php:401
$wgContLang
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the content language as $wgContLang
Definition: design.txt:56
$type
$type
Definition: testCompression.php:48
BaseBlacklist\$expiryTime
int $expiryTime
Definition: BaseBlacklist.php:36