MediaWiki  1.29.1
BaseBlacklist.php
Go to the documentation of this file.
1 <?php
2 
6 abstract class BaseBlacklist {
12  public $files = array();
13 
19  protected $regexes = false;
20 
26  public $warningChance = 100;
27 
31  public $warningTime = 600;
32 
36  public $expiryTime = 900;
37 
43  private static $blacklistTypes = array(
44  'spam' => 'SpamBlacklist',
45  'email' => 'EmailBlacklist',
46  );
47 
53  private static $instances = array();
54 
60  function __construct( $settings = array() ) {
61  foreach ( $settings as $name => $value ) {
62  $this->$name = $value;
63  }
64  }
65 
72  abstract public function filter( array $links, Title $title, $preventLog = false );
73 
80  public static function addBlacklistType( $type, $class ) {
81  self::$blacklistTypes[$type] = $class;
82  }
83 
89  public static function getBlacklistTypes() {
90  return self::$blacklistTypes;
91  }
92 
100  public static function getInstance( $type ) {
101  if ( !isset( self::$blacklistTypes[$type] ) ) {
102  throw new Exception( "Invalid blacklist type '$type' passed to " . __METHOD__ );
103  }
104 
105  if ( !isset( self::$instances[$type] ) ) {
106  global $wgBlacklistSettings;
107 
108  // Prevent notices
109  if ( !isset( $wgBlacklistSettings[$type] ) ) {
110  $wgBlacklistSettings[$type] = array();
111  }
112 
113  $class = self::$blacklistTypes[$type];
114  self::$instances[$type] = new $class( $wgBlacklistSettings[$type] );
115  }
116 
117  return self::$instances[$type];
118  }
119 
125  abstract protected function getBlacklistType();
126 
133  public static function isLocalSource( Title $title ) {
134  global $wgDBname, $wgBlacklistSettings;
135 
136  if( $title->getNamespace() == NS_MEDIAWIKI ) {
137  $sources = array();
138  foreach ( self::$blacklistTypes as $type => $class ) {
139  $type = ucfirst( $type );
140  $sources += array(
141  "$type-blacklist",
142  "$type-whitelist"
143  );
144  }
145 
146  if( in_array( $title->getDBkey(), $sources ) ) {
147  return true;
148  }
149  }
150 
151  $thisHttp = wfExpandUrl( $title->getFullUrl( 'action=raw' ), PROTO_HTTP );
152  $thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/';
153 
154  $files = array();
155  foreach ( self::$blacklistTypes as $type => $class ) {
156  if ( isset( $wgBlacklistSettings[$type]['files'] ) ) {
157  $files += $wgBlacklistSettings[$type]['files'];
158  }
159  }
160 
161  foreach( $files as $fileName ) {
162  $matches = array();
163  if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
164  if ( $wgDBname == $matches[1] ) {
165  if( $matches[2] == $title->getPrefixedDbKey() ) {
166  // Local DB fetch of this page...
167  return true;
168  }
169  }
170  } elseif( preg_match( $thisHttpRegex, $fileName ) ) {
171  // Raw view of this page
172  return true;
173  }
174  }
175 
176  return false;
177  }
178 
185  public static function getTypeFromTitle( Title $title ) {
186  $types = array_map( 'preg_quote', array_keys( self::$blacklistTypes ), array( '/' ) );
187  $regex = '/(' . implode( '|', $types ). ')-(?:Blacklist|Whitelist)/';
188 
189  if ( preg_match( $regex, $title->getDBkey(), $m ) ) {
190  return strtolower( $m[1] );
191  }
192 
193  return false;
194  }
195 
201  function getBlacklists() {
202  if( $this->regexes === false ) {
203  $this->regexes = array_merge(
204  $this->getLocalBlacklists(),
205  $this->getSharedBlacklists() );
206  }
207  return $this->regexes;
208  }
209 
215  public function getLocalBlacklists() {
216  $that = $this;
217  $type = $this->getBlacklistType();
218 
219  return ObjectCache::getMainWANInstance()->getWithSetCallback(
220  wfMemcKey( 'spamblacklist', $type, 'blacklist-regex' ),
221  $this->expiryTime,
222  function () use ( $that, $type ) {
223  return SpamRegexBatch::regexesFromMessage( "{$type}-blacklist", $that );
224  }
225  );
226  }
227 
233  public function getWhitelists() {
234  $that = $this;
235  $type = $this->getBlacklistType();
236 
237  return ObjectCache::getMainWANInstance()->getWithSetCallback(
238  wfMemcKey( 'spamblacklist', $type, 'whitelist-regex' ),
239  $this->expiryTime,
240  function () use ( $that, $type ) {
241  return SpamRegexBatch::regexesFromMessage( "{$type}-whitelist", $that );
242  }
243  );
244  }
245 
250  function getSharedBlacklists() {
251  $listType = $this->getBlacklistType();
252 
253  wfDebugLog( 'SpamBlacklist', "Loading $listType regex..." );
254 
255  if ( count( $this->files ) == 0 ){
256  # No lists
257  wfDebugLog( 'SpamBlacklist', "no files specified\n" );
258  return array();
259  }
260 
261  $miss = false;
262 
263  $that = $this;
264  $regexes = ObjectCache::getMainWANInstance()->getWithSetCallback(
265  // This used to be cached per-site, but that could be bad on a shared
266  // server where not all wikis have the same configuration.
267  wfMemcKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ),
268  $this->expiryTime,
269  function () use ( $that, &$miss ) {
270  $miss = true;
271  return $that->buildSharedBlacklists();
272  }
273  );
274 
275  if ( !$miss ) {
276  wfDebugLog( 'SpamBlacklist', "Got shared spam regexes from cache\n" );
277  }
278 
279  return $regexes;
280  }
281 
287  function clearCache() {
288  $listType = $this->getBlacklistType();
289 
291  $cache->delete( wfMemcKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ) );
292  $cache->delete( wfMemcKey( 'spamblacklist', $listType, 'blacklist-regex' ) );
293  $cache->delete( wfMemcKey( 'spamblacklist', $listType, 'whitelist-regex' ) );
294 
295  wfDebugLog( 'SpamBlacklist', "$listType blacklist local cache cleared.\n" );
296  }
297 
299  $regexes = array();
300  $listType = $this->getBlacklistType();
301  # Load lists
302  wfDebugLog( 'SpamBlacklist', "Constructing $listType blacklist\n" );
303  foreach ( $this->files as $fileName ) {
304  $matches = array();
305  if ( preg_match( '/^DB: ([\w-]*) (.*)$/', $fileName, $matches ) ) {
306  $text = $this->getArticleText( $matches[1], $matches[2] );
307  } elseif ( preg_match( '/^(https?:)?\/\//', $fileName ) ) {
308  $text = $this->getHttpText( $fileName );
309  } else {
310  $text = file_get_contents( $fileName );
311  wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" );
312  }
313 
314  // Build a separate batch of regexes from each source.
315  // While in theory we could squeeze a little efficiency
316  // out of combining multiple sources in one regex, if
317  // there's a bad line in one of them we'll gain more
318  // from only having to break that set into smaller pieces.
319  $regexes = array_merge( $regexes,
320  SpamRegexBatch::regexesFromText( $text, $this, $fileName ) );
321  }
322 
323  return $regexes;
324  }
325 
326  function getHttpText( $fileName ) {
328  $listType = $this->getBlacklistType();
329 
330  # HTTP request
331  # To keep requests to a minimum, we save results into $messageMemc, which is
332  # similar to $wgMemc except almost certain to exist. By default, it is stored
333  # in the database
334  #
335  # There are two keys, when the warning key expires, a random thread will refresh
336  # the real key. This reduces the chance of multiple requests under high traffic
337  # conditions.
338  $key = "{$listType}_blacklist_file:$fileName";
339  $warningKey = "$wgDBname:{$listType}filewarning:$fileName";
340  $httpText = $messageMemc->get( $key );
341  $warning = $messageMemc->get( $warningKey );
342 
343  if ( !is_string( $httpText ) || ( !$warning && !mt_rand( 0, $this->warningChance ) ) ) {
344  wfDebugLog( 'SpamBlacklist', "Loading $listType blacklist from $fileName\n" );
345  $httpText = Http::get( $fileName );
346  if( $httpText === false ) {
347  wfDebugLog( 'SpamBlacklist', "Error loading $listType blacklist from $fileName\n" );
348  }
349  $messageMemc->set( $warningKey, 1, $this->warningTime );
350  $messageMemc->set( $key, $httpText, $this->expiryTime );
351  } else {
352  wfDebugLog( 'SpamBlacklist', "Got $listType blacklist from HTTP cache for $fileName\n" );
353  }
354  return $httpText;
355  }
356 
365  function getArticleText( $wiki, $article ) {
366  wfDebugLog( 'SpamBlacklist',
367  "Fetching {$this->getBlacklistType()} blacklist from '$article' on '$wiki'...\n" );
368 
370  // Load all the relevant tables from the correct DB.
371  // This assumes that old_text is the actual text or
372  // that the external store system is at least unified.
373  $row = wfGetDB( DB_SLAVE, array(), $wiki )->selectRow(
374  array( 'page', 'revision', 'text' ),
375  array_merge(
379  ),
380  array(
381  'page_namespace' => $title->getNamespace(), // assume NS IDs match
382  'page_title' => $title->getDBkey(), // assume same case rules
383  'rev_id=page_latest',
384  'old_id=rev_text_id'
385  ),
386  __METHOD__
387  );
388 
389  return $row
390  ? ContentHandler::getContentText( Revision::newFromRow( $row )->getContent() )
391  : false;
392  }
393 
399  public function getRegexStart() {
400  return '/[a-z0-9_\-.]*';
401  }
402 
409  public function getRegexEnd( $batchSize ) {
410  return ($batchSize > 0 ) ? '/Sim' : '/im';
411  }
412 
417  public function warmCachesForFilter( Title $title, array $entries ) {
418  // subclass this
419  }
420 }
BaseBlacklist\getBlacklistTypes
static getBlacklistTypes()
Return the array of blacklist types currently defined.
Definition: BaseBlacklist.php:89
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:265
BaseBlacklist\getInstance
static getInstance( $type)
Returns an instance of the given blacklist.
Definition: BaseBlacklist.php:100
BaseBlacklist\getRegexEnd
getRegexEnd( $batchSize)
Returns the end of the regex for matches.
Definition: BaseBlacklist.php:409
BaseBlacklist\getBlacklists
getBlacklists()
Fetch local and (possibly cached) remote blacklists.
Definition: BaseBlacklist.php:201
BaseBlacklist\getHttpText
getHttpText( $fileName)
Definition: BaseBlacklist.php:326
BaseBlacklist\__construct
__construct( $settings=array())
Constructor.
Definition: BaseBlacklist.php:60
captcha-old.count
count
Definition: captcha-old.py:225
BaseBlacklist\getBlacklistType
getBlacklistType()
Returns the code for the blacklist implementation.
BaseBlacklist\getArticleText
getArticleText( $wiki, $article)
Fetch an article from this or another local MediaWiki database.
Definition: BaseBlacklist.php:365
BaseBlacklist\$files
array $files
Array of blacklist sources.
Definition: BaseBlacklist.php:12
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
BaseBlacklist\isLocalSource
static isLocalSource(Title $title)
Check if the given local page title is a spam regex source.
Definition: BaseBlacklist.php:133
$messageMemc
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest to get request data $messageMemc
Definition: globals.txt:25
DB_SLAVE
const DB_SLAVE
Definition: Defines.php:34
BaseBlacklist\addBlacklistType
static addBlacklistType( $type, $class)
Adds a blacklist class to the registry.
Definition: BaseBlacklist.php:80
BaseBlacklist\getTypeFromTitle
static getTypeFromTitle(Title $title)
Returns the type of blacklist from the given title.
Definition: BaseBlacklist.php:185
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:304
$type
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2536
SpamRegexBatch\regexesFromText
static regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false)
Build a set of regular expressions from the given multiline input text, with empty lines and comments...
Definition: SpamRegexBatch.php:154
wfDebugLog
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Definition: GlobalFunctions.php:1092
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
Revision\selectTextFields
static selectTextFields()
Return the list of text fields that should be selected to read the revision text.
Definition: Revision.php:510
BaseBlacklist\$blacklistTypes
static array $blacklistTypes
Array containing blacklists that extend BaseBlacklist.
Definition: BaseBlacklist.php:43
wfMemcKey
wfMemcKey()
Make a cache key for the local wiki.
Definition: GlobalFunctions.php:2961
$title
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:934
$wgDBname
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname
Definition: memcached.txt:96
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:3060
$matches
$matches
Definition: NoLocalSettings.php:24
files
c Accompany it with the information you received as to the offer to distribute corresponding source complete source code means all the source code for all modules it plus any associated interface definition files
Definition: COPYING.txt:157
BaseBlacklist\getLocalBlacklists
getLocalBlacklists()
Returns the local blacklist.
Definition: BaseBlacklist.php:215
BaseBlacklist
Base class for different kinds of blacklists.
Definition: BaseBlacklist.php:6
BaseBlacklist\clearCache
clearCache()
Clear all primary blacklist cache keys.
Definition: BaseBlacklist.php:287
SpamRegexBatch\regexesFromMessage
static regexesFromMessage( $message, BaseBlacklist $blacklist)
Build a set of regular expressions from a MediaWiki message.
Definition: SpamRegexBatch.php:167
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
BaseBlacklist\getSharedBlacklists
getSharedBlacklists()
Fetch (possibly cached) remote blacklists.
Definition: BaseBlacklist.php:250
Revision\selectPageFields
static selectPageFields()
Return the list of page fields that should be selected from page table.
Definition: Revision.php:521
Http\get
static get( $url, $options=[], $caller=__METHOD__)
Simple wrapper for Http::request( 'GET' )
Definition: Http.php:98
BaseBlacklist\$regexes
bool array $regexes
Array containing regexes to test against.
Definition: BaseBlacklist.php:19
$value
$value
Definition: styleTest.css.php:45
BaseBlacklist\warmCachesForFilter
warmCachesForFilter(Title $title, array $entries)
Definition: BaseBlacklist.php:417
BaseBlacklist\filter
filter(array $links, Title $title, $preventLog=false)
BaseBlacklist\$instances
static array $instances
Array of blacklist instances.
Definition: BaseBlacklist.php:53
Revision\newFromRow
static newFromRow( $row)
Definition: Revision.php:236
PROTO_HTTP
const PROTO_HTTP
Definition: Defines.php:217
Title
Represents a title within MediaWiki.
Definition: Title.php:39
BaseBlacklist\buildSharedBlacklists
buildSharedBlacklists()
Definition: BaseBlacklist.php:298
ContentHandler\getContentText
static getContentText(Content $content=null)
Convenience function for getting flat text from a Content object.
Definition: ContentHandler.php:79
$cache
$cache
Definition: mcc.php:33
ObjectCache\getMainWANInstance
static getMainWANInstance()
Get the main WAN cache object.
Definition: ObjectCache.php:370
BaseBlacklist\$warningChance
int $warningChance
Chance of receiving a warning when the filter is hit.
Definition: BaseBlacklist.php:26
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$article
Using a hook running we can avoid having all this option specific stuff in our mainline code Using the function array $article
Definition: hooks.txt:78
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:70
Revision\selectFields
static selectFields()
Return the list of revision fields that should be selected to create a new revision.
Definition: Revision.php:448
BaseBlacklist\$warningTime
int $warningTime
Definition: BaseBlacklist.php:31
BaseBlacklist\getWhitelists
getWhitelists()
Returns the (local) whitelist.
Definition: BaseBlacklist.php:233
wfExpandUrl
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
Definition: GlobalFunctions.php:552
array
the array() calling protocol came about after MediaWiki 1.4rc1.
BaseBlacklist\getRegexStart
getRegexStart()
Returns the start of the regex for matches.
Definition: BaseBlacklist.php:399
BaseBlacklist\$expiryTime
int $expiryTime
Definition: BaseBlacklist.php:36