MediaWiki  1.33.0
BaseBlacklist.php
Go to the documentation of this file.
1 <?php
2 
6 abstract class BaseBlacklist {
12  public $files = [];
13 
19  protected $regexes = false;
20 
26  public $warningChance = 100;
27 
31  public $warningTime = 600;
32 
36  public $expiryTime = 900;
37 
43  private static $blacklistTypes = [
44  'spam' => 'SpamBlacklist',
45  'email' => 'EmailBlacklist',
46  ];
47 
53  private static $instances = [];
54 
60  public function __construct( $settings = [] ) {
61  foreach ( $settings as $name => $value ) {
62  $this->$name = $value;
63  }
64  }
65 
72  abstract public function filter( array $links, Title $title, $preventLog = false );
73 
80  public static function addBlacklistType( $type, $class ) {
81  self::$blacklistTypes[$type] = $class;
82  }
83 
89  public static function getBlacklistTypes() {
90  return self::$blacklistTypes;
91  }
92 
96  public static function getSpamBlacklist() {
97  return self::getInstance( 'spam' );
98  }
99 
103  public static function getEmailBlacklist() {
104  return self::getInstance( 'email' );
105  }
106 
115  public static function getInstance( $type ) {
116  if ( !isset( self::$blacklistTypes[$type] ) ) {
117  throw new Exception( "Invalid blacklist type '$type' passed to " . __METHOD__ );
118  }
119 
120  if ( !isset( self::$instances[$type] ) ) {
121  global $wgBlacklistSettings;
122 
123  // Prevent notices
124  if ( !isset( $wgBlacklistSettings[$type] ) ) {
125  $wgBlacklistSettings[$type] = [];
126  }
127 
128  $class = self::$blacklistTypes[$type];
129  self::$instances[$type] = new $class( $wgBlacklistSettings[$type] );
130  }
131 
132  return self::$instances[$type];
133  }
134 
140  abstract protected function getBlacklistType();
141 
148  public static function isLocalSource( Title $title ) {
149  global $wgDBname, $wgBlacklistSettings;
150 
151  if ( $title->getNamespace() == NS_MEDIAWIKI ) {
152  $sources = [];
153  foreach ( self::$blacklistTypes as $type => $class ) {
154  $type = ucfirst( $type );
155  $sources += [
156  "$type-blacklist",
157  "$type-whitelist"
158  ];
159  }
160 
161  if ( in_array( $title->getDBkey(), $sources ) ) {
162  return true;
163  }
164  }
165 
166  $thisHttp = wfExpandUrl( $title->getFullUrl( 'action=raw' ), PROTO_HTTP );
167  $thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/';
168 
169  $files = [];
170  foreach ( self::$blacklistTypes as $type => $class ) {
171  if ( isset( $wgBlacklistSettings[$type]['files'] ) ) {
172  $files += $wgBlacklistSettings[$type]['files'];
173  }
174  }
175 
176  // @phan-suppress-next-line PhanTypeMismatchForeach += makes Phan think $files is a number
177  foreach ( $files as $fileName ) {
178  $matches = [];
179  if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
180  if ( $wgDBname == $matches[1] ) {
181  if ( $matches[2] == $title->getPrefixedDbKey() ) {
182  // Local DB fetch of this page...
183  return true;
184  }
185  }
186  } elseif ( preg_match( $thisHttpRegex, $fileName ) ) {
187  // Raw view of this page
188  return true;
189  }
190  }
191 
192  return false;
193  }
194 
202  public static function getTypeFromTitle( Title $title ) {
203  global $wgContLang;
204 
205  $types = array_map( [ $wgContLang, 'ucfirst' ], array_keys( self::$blacklistTypes ) );
206  $regex = '/(' . implode( '|', $types ) . ')-(?:blacklist|whitelist)/';
207 
208  if ( preg_match( $regex, $title->getDBkey(), $m ) ) {
209  return strtolower( $m[1] );
210  }
211 
212  return false;
213  }
214 
220  public function getBlacklists() {
221  if ( $this->regexes === false ) {
222  $this->regexes = array_merge(
223  $this->getLocalBlacklists(),
224  $this->getSharedBlacklists() );
225  }
226  return $this->regexes;
227  }
228 
234  public function getLocalBlacklists() {
235  $that = $this;
236  $type = $this->getBlacklistType();
238 
239  return $cache->getWithSetCallback(
240  $cache->makeKey( 'spamblacklist', $type, 'blacklist-regex' ),
242  function () use ( $that, $type ) {
243  return SpamRegexBatch::regexesFromMessage( "{$type}-blacklist", $that );
244  }
245  );
246  }
247 
253  public function getWhitelists() {
254  $that = $this;
255  $type = $this->getBlacklistType();
257 
258  return $cache->getWithSetCallback(
259  $cache->makeKey( 'spamblacklist', $type, 'whitelist-regex' ),
261  function () use ( $that, $type ) {
262  return SpamRegexBatch::regexesFromMessage( "{$type}-whitelist", $that );
263  }
264  );
265  }
266 
271  private function getSharedBlacklists() {
272  $listType = $this->getBlacklistType();
273 
274  wfDebugLog( 'SpamBlacklist', "Loading $listType regex..." );
275 
276  if ( count( $this->files ) == 0 ) {
277  # No lists
278  wfDebugLog( 'SpamBlacklist', "no files specified\n" );
279  return [];
280  }
281 
282  $miss = false;
283 
284  $that = $this;
286  $regexes = $cache->getWithSetCallback(
287  // This used to be cached per-site, but that could be bad on a shared
288  // server where not all wikis have the same configuration.
289  $cache->makeKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ),
291  function () use ( $that, &$miss ) {
292  $miss = true;
293  return $that->buildSharedBlacklists();
294  }
295  );
296 
297  if ( !$miss ) {
298  wfDebugLog( 'SpamBlacklist', "Got shared spam regexes from cache\n" );
299  }
300 
301  return $regexes;
302  }
303 
309  public function clearCache() {
310  $listType = $this->getBlacklistType();
311 
313  $cache->delete( $cache->makeKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ) );
314  $cache->delete( $cache->makeKey( 'spamblacklist', $listType, 'blacklist-regex' ) );
315  $cache->delete( $cache->makeKey( 'spamblacklist', $listType, 'whitelist-regex' ) );
316 
317  wfDebugLog( 'SpamBlacklist', "$listType blacklist local cache cleared.\n" );
318  }
319 
320  private function buildSharedBlacklists() {
321  $regexes = [];
322  $listType = $this->getBlacklistType();
323  # Load lists
324  wfDebugLog( 'SpamBlacklist', "Constructing $listType blacklist\n" );
325  foreach ( $this->files as $fileName ) {
326  $matches = [];
327  if ( preg_match( '/^DB: ([\w-]*) (.*)$/', $fileName, $matches ) ) {
328  $text = $this->getArticleText( $matches[1], $matches[2] );
329  } elseif ( preg_match( '/^(https?:)?\/\//', $fileName ) ) {
330  $text = $this->getHttpText( $fileName );
331  } else {
332  $text = file_get_contents( $fileName );
333  wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" );
334  }
335 
336  // Build a separate batch of regexes from each source.
337  // While in theory we could squeeze a little efficiency
338  // out of combining multiple sources in one regex, if
339  // there's a bad line in one of them we'll gain more
340  // from only having to break that set into smaller pieces.
341  $regexes = array_merge( $regexes,
342  SpamRegexBatch::regexesFromText( $text, $this, $fileName ) );
343  }
344 
345  return $regexes;
346  }
347 
348  private function getHttpText( $fileName ) {
349  global $wgDBname, $messageMemc;
350  $listType = $this->getBlacklistType();
351 
352  # HTTP request
353  # To keep requests to a minimum, we save results into $messageMemc, which is
354  # similar to $wgMemc except almost certain to exist. By default, it is stored
355  # in the database
356  # There are two keys, when the warning key expires, a random thread will refresh
357  # the real key. This reduces the chance of multiple requests under high traffic
358  # conditions.
359  $key = "{$listType}_blacklist_file:$fileName";
360  $warningKey = "$wgDBname:{$listType}filewarning:$fileName";
361  $httpText = $messageMemc->get( $key );
362  $warning = $messageMemc->get( $warningKey );
363 
364  if ( !is_string( $httpText ) || ( !$warning && !mt_rand( 0, $this->warningChance ) ) ) {
365  wfDebugLog( 'SpamBlacklist', "Loading $listType blacklist from $fileName\n" );
366  $httpText = Http::get( $fileName );
367  if ( $httpText === false ) {
368  wfDebugLog( 'SpamBlacklist', "Error loading $listType blacklist from $fileName\n" );
369  }
370  $messageMemc->set( $warningKey, 1, $this->warningTime );
371  $messageMemc->set( $key, $httpText, $this->expiryTime );
372  } else {
373  wfDebugLog( 'SpamBlacklist', "Got $listType blacklist from HTTP cache for $fileName\n" );
374  }
375  return $httpText;
376  }
377 
386  private function getArticleText( $wiki, $article ) {
387  wfDebugLog( 'SpamBlacklist',
388  "Fetching {$this->getBlacklistType()} blacklist from '$article' on '$wiki'...\n" );
389 
391  // Load all the relevant tables from the correct DB.
392  // This assumes that old_text is the actual text or
393  // that the external store system is at least unified.
394  if ( is_callable( [ Revision::class, 'getQueryInfo' ] ) ) {
395  $revQuery = Revision::getQueryInfo( [ 'page', 'text' ] );
396  } else {
397  $revQuery = [
398  'tables' => [ 'revision', 'page', 'text' ],
399  'fields' => array_merge(
403  ),
404  'joins' => [
405  'text' => [ 'JOIN', 'old_id=rev_text_id' ]
406  ],
407  ];
408  }
409  $row = wfGetDB( DB_REPLICA, [], $wiki )->selectRow(
410  $revQuery['tables'],
411  $revQuery['fields'],
412  [
413  'page_namespace' => $title->getNamespace(), // assume NS IDs match
414  'page_title' => $title->getDBkey(), // assume same case rules
415  ],
416  __METHOD__,
417  [],
418  [ 'page' => [ 'JOIN', 'rev_id=page_latest' ] ] + $revQuery['joins']
419  );
420 
421  return $row
422  ? ContentHandler::getContentText( Revision::newFromRow( $row )->getContent() )
423  : false;
424  }
425 
431  public function getRegexStart() {
432  return '/[a-z0-9_\-.]*';
433  }
434 
441  public function getRegexEnd( $batchSize ) {
442  return ( $batchSize > 0 ) ? '/Sim' : '/im';
443  }
444 
449  public function warmCachesForFilter( Title $title, array $entries ) {
450  // subclass this
451  }
452 }
BaseBlacklist\getSpamBlacklist
static getSpamBlacklist()
Definition: BaseBlacklist.php:96
BaseBlacklist\getBlacklistTypes
static getBlacklistTypes()
Return the array of blacklist types currently defined.
Definition: BaseBlacklist.php:89
$article
return true to allow those checks to and false if checking is done remove or add to the links of a group of changes in EnhancedChangesList Hook subscribers can return false to omit this line from recentchanges use this to change the tables headers change it to an object instance and return false override the list derivative used the name of the old file & $article
Definition: hooks.txt:1476
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:306
BaseBlacklist\getInstance
static getInstance( $type)
Returns an instance of the given blacklist.
Definition: BaseBlacklist.php:115
BaseBlacklist\getRegexEnd
getRegexEnd( $batchSize)
Returns the end of the regex for matches.
Definition: BaseBlacklist.php:441
$wgDBname
controlled by the following MediaWiki still creates a BagOStuff but calls it to it are no ops If the cache daemon can t be it should also disable itself fairly $wgDBname
Definition: memcached.txt:93
BaseBlacklist\getBlacklists
getBlacklists()
Fetch local and (possibly cached) remote blacklists.
Definition: BaseBlacklist.php:220
BaseBlacklist\getHttpText
getHttpText( $fileName)
Definition: BaseBlacklist.php:348
captcha-old.count
count
Definition: captcha-old.py:249
BaseBlacklist\getBlacklistType
getBlacklistType()
Returns the code for the blacklist implementation.
BaseBlacklist\getArticleText
getArticleText( $wiki, $article)
Fetch an article from this or another local MediaWiki database.
Definition: BaseBlacklist.php:386
BaseBlacklist\$files
array $files
Array of blacklist sources.
Definition: BaseBlacklist.php:12
BaseBlacklist\isLocalSource
static isLocalSource(Title $title)
Check if the given local page title is a spam regex source.
Definition: BaseBlacklist.php:148
BaseBlacklist\addBlacklistType
static addBlacklistType( $type, $class)
Adds a blacklist class to the registry.
Definition: BaseBlacklist.php:80
BaseBlacklist\getTypeFromTitle
static getTypeFromTitle(Title $title)
Returns the type of blacklist from the given title.
Definition: BaseBlacklist.php:202
SpamRegexBatch\regexesFromText
static regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false)
Build a set of regular expressions from the given multiline input text, with empty lines and comments...
Definition: SpamRegexBatch.php:155
$revQuery
$revQuery
Definition: testCompression.php:51
wfDebugLog
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Definition: GlobalFunctions.php:1043
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
Revision\selectTextFields
static selectTextFields()
Return the list of text fields that should be selected to read the revision text.
Definition: Revision.php:462
BaseBlacklist\$blacklistTypes
static array $blacklistTypes
Array containing blacklists that extend BaseBlacklist.
Definition: BaseBlacklist.php:43
Revision\getQueryInfo
static getQueryInfo( $options=[])
Return the tables, fields, and join conditions to be selected to create a new revision object.
Definition: Revision.php:511
$title
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:925
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2636
$matches
$matches
Definition: NoLocalSettings.php:24
Http\get
static get( $url, array $options=[], $caller=__METHOD__)
Simple wrapper for Http::request( 'GET' )
Definition: Http.php:98
files
c Accompany it with the information you received as to the offer to distribute corresponding source complete source code means all the source code for all modules it plus any associated interface definition files
Definition: COPYING.txt:157
BaseBlacklist\getLocalBlacklists
getLocalBlacklists()
Returns the local blacklist.
Definition: BaseBlacklist.php:234
BaseBlacklist
Base class for different kinds of blacklists.
Definition: BaseBlacklist.php:6
BaseBlacklist\clearCache
clearCache()
Clear all primary blacklist cache keys.
Definition: BaseBlacklist.php:309
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
SpamRegexBatch\regexesFromMessage
static regexesFromMessage( $message, BaseBlacklist $blacklist)
Build a set of regular expressions from a MediaWiki message.
Definition: SpamRegexBatch.php:168
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
array
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
BaseBlacklist\getSharedBlacklists
getSharedBlacklists()
Fetch (possibly cached) remote blacklists.
Definition: BaseBlacklist.php:271
Revision\selectPageFields
static selectPageFields()
Return the list of page fields that should be selected from page table.
Definition: Revision.php:475
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:271
BaseBlacklist\$regexes
bool array $regexes
Array containing regexes to test against.
Definition: BaseBlacklist.php:19
$messageMemc
$messageMemc
Definition: Setup.php:769
$value
$value
Definition: styleTest.css.php:49
BaseBlacklist\warmCachesForFilter
warmCachesForFilter(Title $title, array $entries)
Definition: BaseBlacklist.php:449
BaseBlacklist\filter
filter(array $links, Title $title, $preventLog=false)
BaseBlacklist\$instances
static array $instances
Array of blacklist instances.
Definition: BaseBlacklist.php:53
Revision\newFromRow
static newFromRow( $row)
Definition: Revision.php:222
PROTO_HTTP
const PROTO_HTTP
Definition: Defines.php:219
Title
Represents a title within MediaWiki.
Definition: Title.php:40
BaseBlacklist\buildSharedBlacklists
buildSharedBlacklists()
Definition: BaseBlacklist.php:320
ContentHandler\getContentText
static getContentText(Content $content=null)
Convenience function for getting flat text from a Content object.
Definition: ContentHandler.php:83
$cache
$cache
Definition: mcc.php:33
ObjectCache\getMainWANInstance
static getMainWANInstance()
Get the main WAN cache object.
Definition: ObjectCache.php:369
BaseBlacklist\$warningChance
int $warningChance
Chance of receiving a warning when the filter is hit.
Definition: BaseBlacklist.php:26
BaseBlacklist\getEmailBlacklist
static getEmailBlacklist()
Definition: BaseBlacklist.php:103
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
BaseBlacklist\__construct
__construct( $settings=[])
Constructor.
Definition: BaseBlacklist.php:60
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:72
class
you have access to all of the normal MediaWiki so you can get a DB use the etc For full docs on the Maintenance class
Definition: maintenance.txt:52
Revision\selectFields
static selectFields()
Return the list of revision fields that should be selected to create a new revision.
Definition: Revision.php:342
BaseBlacklist\$warningTime
int $warningTime
Definition: BaseBlacklist.php:31
$wgContLang
$wgContLang
Definition: Setup.php:790
BaseBlacklist\getWhitelists
getWhitelists()
Returns the (local) whitelist.
Definition: BaseBlacklist.php:253
wfExpandUrl
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
Definition: GlobalFunctions.php:515
BaseBlacklist\getRegexStart
getRegexStart()
Returns the start of the regex for matches.
Definition: BaseBlacklist.php:431
$type
$type
Definition: testCompression.php:48
BaseBlacklist\$expiryTime
int $expiryTime
Definition: BaseBlacklist.php:36