MediaWiki  1.32.0
BaseBlacklist.php
Go to the documentation of this file.
1 <?php
2 
6 abstract class BaseBlacklist {
12  public $files = [];
13 
19  protected $regexes = false;
20 
26  public $warningChance = 100;
27 
31  public $warningTime = 600;
32 
36  public $expiryTime = 900;
37 
43  private static $blacklistTypes = [
44  'spam' => 'SpamBlacklist',
45  'email' => 'EmailBlacklist',
46  ];
47 
53  private static $instances = [];
54 
60  function __construct( $settings = [] ) {
61  foreach ( $settings as $name => $value ) {
62  $this->$name = $value;
63  }
64  }
65 
72  abstract public function filter( array $links, Title $title, $preventLog = false );
73 
80  public static function addBlacklistType( $type, $class ) {
81  self::$blacklistTypes[$type] = $class;
82  }
83 
89  public static function getBlacklistTypes() {
90  return self::$blacklistTypes;
91  }
92 
96  public static function getSpamBlacklist() {
97  return self::getInstance( 'spam' );
98  }
99 
103  public static function getEmailBlacklist() {
104  return self::getInstance( 'email' );
105  }
106 
115  public static function getInstance( $type ) {
116  if ( !isset( self::$blacklistTypes[$type] ) ) {
117  throw new Exception( "Invalid blacklist type '$type' passed to " . __METHOD__ );
118  }
119 
120  if ( !isset( self::$instances[$type] ) ) {
121  global $wgBlacklistSettings;
122 
123  // Prevent notices
124  if ( !isset( $wgBlacklistSettings[$type] ) ) {
125  $wgBlacklistSettings[$type] = [];
126  }
127 
128  $class = self::$blacklistTypes[$type];
129  self::$instances[$type] = new $class( $wgBlacklistSettings[$type] );
130  }
131 
132  return self::$instances[$type];
133  }
134 
140  abstract protected function getBlacklistType();
141 
148  public static function isLocalSource( Title $title ) {
149  global $wgDBname, $wgBlacklistSettings;
150 
151  if ( $title->getNamespace() == NS_MEDIAWIKI ) {
152  $sources = [];
153  foreach ( self::$blacklistTypes as $type => $class ) {
154  $type = ucfirst( $type );
155  $sources += [
156  "$type-blacklist",
157  "$type-whitelist"
158  ];
159  }
160 
161  if ( in_array( $title->getDBkey(), $sources ) ) {
162  return true;
163  }
164  }
165 
166  $thisHttp = wfExpandUrl( $title->getFullUrl( 'action=raw' ), PROTO_HTTP );
167  $thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/';
168 
169  $files = [];
170  foreach ( self::$blacklistTypes as $type => $class ) {
171  if ( isset( $wgBlacklistSettings[$type]['files'] ) ) {
172  $files += $wgBlacklistSettings[$type]['files'];
173  }
174  }
175 
176  foreach ( $files as $fileName ) {
177  $matches = [];
178  if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
179  if ( $wgDBname == $matches[1] ) {
180  if ( $matches[2] == $title->getPrefixedDbKey() ) {
181  // Local DB fetch of this page...
182  return true;
183  }
184  }
185  } elseif ( preg_match( $thisHttpRegex, $fileName ) ) {
186  // Raw view of this page
187  return true;
188  }
189  }
190 
191  return false;
192  }
193 
201  public static function getTypeFromTitle( Title $title ) {
202  global $wgContLang;
203 
204  $types = array_map( [ $wgContLang, 'ucfirst' ], array_keys( self::$blacklistTypes ) );
205  $regex = '/(' . implode( '|', $types ) . ')-(?:blacklist|whitelist)/';
206 
207  if ( preg_match( $regex, $title->getDBkey(), $m ) ) {
208  return strtolower( $m[1] );
209  }
210 
211  return false;
212  }
213 
219  function getBlacklists() {
220  if ( $this->regexes === false ) {
221  $this->regexes = array_merge(
222  $this->getLocalBlacklists(),
223  $this->getSharedBlacklists() );
224  }
225  return $this->regexes;
226  }
227 
233  public function getLocalBlacklists() {
234  $that = $this;
235  $type = $this->getBlacklistType();
237 
238  return $cache->getWithSetCallback(
239  $cache->makeKey( 'spamblacklist', $type, 'blacklist-regex' ),
241  function () use ( $that, $type ) {
242  return SpamRegexBatch::regexesFromMessage( "{$type}-blacklist", $that );
243  }
244  );
245  }
246 
252  public function getWhitelists() {
253  $that = $this;
254  $type = $this->getBlacklistType();
256 
257  return $cache->getWithSetCallback(
258  $cache->makeKey( 'spamblacklist', $type, 'whitelist-regex' ),
260  function () use ( $that, $type ) {
261  return SpamRegexBatch::regexesFromMessage( "{$type}-whitelist", $that );
262  }
263  );
264  }
265 
270  function getSharedBlacklists() {
271  $listType = $this->getBlacklistType();
272 
273  wfDebugLog( 'SpamBlacklist', "Loading $listType regex..." );
274 
275  if ( count( $this->files ) == 0 ) {
276  # No lists
277  wfDebugLog( 'SpamBlacklist', "no files specified\n" );
278  return [];
279  }
280 
281  $miss = false;
282 
283  $that = $this;
285  $regexes = $cache->getWithSetCallback(
286  // This used to be cached per-site, but that could be bad on a shared
287  // server where not all wikis have the same configuration.
288  $cache->makeKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ),
290  function () use ( $that, &$miss ) {
291  $miss = true;
292  return $that->buildSharedBlacklists();
293  }
294  );
295 
296  if ( !$miss ) {
297  wfDebugLog( 'SpamBlacklist', "Got shared spam regexes from cache\n" );
298  }
299 
300  return $regexes;
301  }
302 
308  function clearCache() {
309  $listType = $this->getBlacklistType();
310 
312  $cache->delete( $cache->makeKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ) );
313  $cache->delete( $cache->makeKey( 'spamblacklist', $listType, 'blacklist-regex' ) );
314  $cache->delete( $cache->makeKey( 'spamblacklist', $listType, 'whitelist-regex' ) );
315 
316  wfDebugLog( 'SpamBlacklist', "$listType blacklist local cache cleared.\n" );
317  }
318 
320  $regexes = [];
321  $listType = $this->getBlacklistType();
322  # Load lists
323  wfDebugLog( 'SpamBlacklist', "Constructing $listType blacklist\n" );
324  foreach ( $this->files as $fileName ) {
325  $matches = [];
326  if ( preg_match( '/^DB: ([\w-]*) (.*)$/', $fileName, $matches ) ) {
327  $text = $this->getArticleText( $matches[1], $matches[2] );
328  } elseif ( preg_match( '/^(https?:)?\/\//', $fileName ) ) {
329  $text = $this->getHttpText( $fileName );
330  } else {
331  $text = file_get_contents( $fileName );
332  wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" );
333  }
334 
335  // Build a separate batch of regexes from each source.
336  // While in theory we could squeeze a little efficiency
337  // out of combining multiple sources in one regex, if
338  // there's a bad line in one of them we'll gain more
339  // from only having to break that set into smaller pieces.
340  $regexes = array_merge( $regexes,
341  SpamRegexBatch::regexesFromText( $text, $this, $fileName ) );
342  }
343 
344  return $regexes;
345  }
346 
347  function getHttpText( $fileName ) {
348  global $wgDBname, $messageMemc;
349  $listType = $this->getBlacklistType();
350 
351  # HTTP request
352  # To keep requests to a minimum, we save results into $messageMemc, which is
353  # similar to $wgMemc except almost certain to exist. By default, it is stored
354  # in the database
355  # There are two keys, when the warning key expires, a random thread will refresh
356  # the real key. This reduces the chance of multiple requests under high traffic
357  # conditions.
358  $key = "{$listType}_blacklist_file:$fileName";
359  $warningKey = "$wgDBname:{$listType}filewarning:$fileName";
360  $httpText = $messageMemc->get( $key );
361  $warning = $messageMemc->get( $warningKey );
362 
363  if ( !is_string( $httpText ) || ( !$warning && !mt_rand( 0, $this->warningChance ) ) ) {
364  wfDebugLog( 'SpamBlacklist', "Loading $listType blacklist from $fileName\n" );
365  $httpText = Http::get( $fileName );
366  if ( $httpText === false ) {
367  wfDebugLog( 'SpamBlacklist', "Error loading $listType blacklist from $fileName\n" );
368  }
369  $messageMemc->set( $warningKey, 1, $this->warningTime );
370  $messageMemc->set( $key, $httpText, $this->expiryTime );
371  } else {
372  wfDebugLog( 'SpamBlacklist', "Got $listType blacklist from HTTP cache for $fileName\n" );
373  }
374  return $httpText;
375  }
376 
385  function getArticleText( $wiki, $article ) {
386  wfDebugLog( 'SpamBlacklist',
387  "Fetching {$this->getBlacklistType()} blacklist from '$article' on '$wiki'...\n" );
388 
390  // Load all the relevant tables from the correct DB.
391  // This assumes that old_text is the actual text or
392  // that the external store system is at least unified.
393  if ( is_callable( [ Revision::class, 'getQueryInfo' ] ) ) {
394  $revQuery = Revision::getQueryInfo( [ 'page', 'text' ] );
395  } else {
396  $revQuery = [
397  'tables' => [ 'revision', 'page', 'text' ],
398  'fields' => array_merge(
402  ),
403  'joins' => [
404  'text' => [ 'JOIN', 'old_id=rev_text_id' ]
405  ],
406  ];
407  }
408  $row = wfGetDB( DB_REPLICA, [], $wiki )->selectRow(
409  $revQuery['tables'],
410  $revQuery['fields'],
411  [
412  'page_namespace' => $title->getNamespace(), // assume NS IDs match
413  'page_title' => $title->getDBkey(), // assume same case rules
414  ],
415  __METHOD__,
416  [],
417  [ 'page' => [ 'JOIN', 'rev_id=page_latest' ] ] + $revQuery['joins']
418  );
419 
420  return $row
421  ? ContentHandler::getContentText( Revision::newFromRow( $row )->getContent() )
422  : false;
423  }
424 
430  public function getRegexStart() {
431  return '/[a-z0-9_\-.]*';
432  }
433 
440  public function getRegexEnd( $batchSize ) {
441  return ( $batchSize > 0 ) ? '/Sim' : '/im';
442  }
443 
448  public function warmCachesForFilter( Title $title, array $entries ) {
449  // subclass this
450  }
451 }
BaseBlacklist\getSpamBlacklist
static getSpamBlacklist()
Definition: BaseBlacklist.php:96
BaseBlacklist\getBlacklistTypes
static getBlacklistTypes()
Return the array of blacklist types currently defined.
Definition: BaseBlacklist.php:89
$article
return true to allow those checks to and false if checking is done remove or add to the links of a group of changes in EnhancedChangesList Hook subscribers can return false to omit this line from recentchanges use this to change the tables headers change it to an object instance and return false override the list derivative used the name of the old file & $article
Definition: hooks.txt:1515
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:280
BaseBlacklist\getInstance
static getInstance( $type)
Returns an instance of the given blacklist.
Definition: BaseBlacklist.php:115
BaseBlacklist\getRegexEnd
getRegexEnd( $batchSize)
Returns the end of the regex for matches.
Definition: BaseBlacklist.php:440
$wgDBname
controlled by the following MediaWiki still creates a BagOStuff but calls it to it are no ops If the cache daemon can t be it should also disable itself fairly $wgDBname
Definition: memcached.txt:93
BaseBlacklist\getBlacklists
getBlacklists()
Fetch local and (possibly cached) remote blacklists.
Definition: BaseBlacklist.php:219
BaseBlacklist\getHttpText
getHttpText( $fileName)
Definition: BaseBlacklist.php:347
captcha-old.count
count
Definition: captcha-old.py:249
BaseBlacklist\getBlacklistType
getBlacklistType()
Returns the code for the blacklist implementation.
BaseBlacklist\getArticleText
getArticleText( $wiki, $article)
Fetch an article from this or another local MediaWiki database.
Definition: BaseBlacklist.php:385
BaseBlacklist\$files
array $files
Array of blacklist sources.
Definition: BaseBlacklist.php:12
BaseBlacklist\isLocalSource
static isLocalSource(Title $title)
Check if the given local page title is a spam regex source.
Definition: BaseBlacklist.php:148
BaseBlacklist\addBlacklistType
static addBlacklistType( $type, $class)
Adds a blacklist class to the registry.
Definition: BaseBlacklist.php:80
BaseBlacklist\getTypeFromTitle
static getTypeFromTitle(Title $title)
Returns the type of blacklist from the given title.
Definition: BaseBlacklist.php:201
SpamRegexBatch\regexesFromText
static regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false)
Build a set of regular expressions from the given multiline input text, with empty lines and comments...
Definition: SpamRegexBatch.php:154
$revQuery
$revQuery
Definition: testCompression.php:51
wfDebugLog
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Definition: GlobalFunctions.php:1082
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
Revision\selectTextFields
static selectTextFields()
Return the list of text fields that should be selected to read the revision text.
Definition: Revision.php:472
BaseBlacklist\$blacklistTypes
static array $blacklistTypes
Array containing blacklists that extend BaseBlacklist.
Definition: BaseBlacklist.php:43
Revision\getQueryInfo
static getQueryInfo( $options=[])
Return the tables, fields, and join conditions to be selected to create a new revision object.
Definition: Revision.php:521
$title
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:964
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2693
$matches
$matches
Definition: NoLocalSettings.php:24
files
c Accompany it with the information you received as to the offer to distribute corresponding source complete source code means all the source code for all modules it plus any associated interface definition files
Definition: COPYING.txt:157
BaseBlacklist\getLocalBlacklists
getLocalBlacklists()
Returns the local blacklist.
Definition: BaseBlacklist.php:233
BaseBlacklist
Base class for different kinds of blacklists.
Definition: BaseBlacklist.php:6
BaseBlacklist\clearCache
clearCache()
Clear all primary blacklist cache keys.
Definition: BaseBlacklist.php:308
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
SpamRegexBatch\regexesFromMessage
static regexesFromMessage( $message, BaseBlacklist $blacklist)
Build a set of regular expressions from a MediaWiki message.
Definition: SpamRegexBatch.php:167
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
array
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
BaseBlacklist\getSharedBlacklists
getSharedBlacklists()
Fetch (possibly cached) remote blacklists.
Definition: BaseBlacklist.php:270
Revision\selectPageFields
static selectPageFields()
Return the list of page fields that should be selected from page table.
Definition: Revision.php:485
Http\get
static get( $url, $options=[], $caller=__METHOD__)
Simple wrapper for Http::request( 'GET' )
Definition: Http.php:98
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:302
BaseBlacklist\$regexes
bool array $regexes
Array containing regexes to test against.
Definition: BaseBlacklist.php:19
$messageMemc
$messageMemc
Definition: Setup.php:788
$value
$value
Definition: styleTest.css.php:49
BaseBlacklist\warmCachesForFilter
warmCachesForFilter(Title $title, array $entries)
Definition: BaseBlacklist.php:448
BaseBlacklist\filter
filter(array $links, Title $title, $preventLog=false)
BaseBlacklist\$instances
static array $instances
Array of blacklist instances.
Definition: BaseBlacklist.php:53
Revision\newFromRow
static newFromRow( $row)
Definition: Revision.php:218
PROTO_HTTP
const PROTO_HTTP
Definition: Defines.php:219
Title
Represents a title within MediaWiki.
Definition: Title.php:39
BaseBlacklist\buildSharedBlacklists
buildSharedBlacklists()
Definition: BaseBlacklist.php:319
ContentHandler\getContentText
static getContentText(Content $content=null)
Convenience function for getting flat text from a Content object.
Definition: ContentHandler.php:83
$cache
$cache
Definition: mcc.php:33
ObjectCache\getMainWANInstance
static getMainWANInstance()
Get the main WAN cache object.
Definition: ObjectCache.php:378
BaseBlacklist\$warningChance
int $warningChance
Chance of receiving a warning when the filter is hit.
Definition: BaseBlacklist.php:26
BaseBlacklist\getEmailBlacklist
static getEmailBlacklist()
Definition: BaseBlacklist.php:103
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
BaseBlacklist\__construct
__construct( $settings=[])
Constructor.
Definition: BaseBlacklist.php:60
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:72
class
you have access to all of the normal MediaWiki so you can get a DB use the etc For full docs on the Maintenance class
Definition: maintenance.txt:52
Revision\selectFields
static selectFields()
Return the list of revision fields that should be selected to create a new revision.
Definition: Revision.php:352
BaseBlacklist\$warningTime
int $warningTime
Definition: BaseBlacklist.php:31
$wgContLang
$wgContLang
Definition: Setup.php:809
BaseBlacklist\getWhitelists
getWhitelists()
Returns the (local) whitelist.
Definition: BaseBlacklist.php:252
wfExpandUrl
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
Definition: GlobalFunctions.php:512
BaseBlacklist\getRegexStart
getRegexStart()
Returns the start of the regex for matches.
Definition: BaseBlacklist.php:430
$type
$type
Definition: testCompression.php:48
BaseBlacklist\$expiryTime
int $expiryTime
Definition: BaseBlacklist.php:36