MediaWiki REL1_33
BaseBlacklist.php
Go to the documentation of this file.
1<?php
2
6abstract class BaseBlacklist {
12 public $files = [];
13
19 protected $regexes = false;
20
26 public $warningChance = 100;
27
31 public $warningTime = 600;
32
36 public $expiryTime = 900;
37
43 private static $blacklistTypes = [
44 'spam' => 'SpamBlacklist',
45 'email' => 'EmailBlacklist',
46 ];
47
53 private static $instances = [];
54
60 public function __construct( $settings = [] ) {
61 foreach ( $settings as $name => $value ) {
62 $this->$name = $value;
63 }
64 }
65
72 abstract public function filter( array $links, Title $title, $preventLog = false );
73
80 public static function addBlacklistType( $type, $class ) {
81 self::$blacklistTypes[$type] = $class;
82 }
83
89 public static function getBlacklistTypes() {
91 }
92
96 public static function getSpamBlacklist() {
97 return self::getInstance( 'spam' );
98 }
99
103 public static function getEmailBlacklist() {
104 return self::getInstance( 'email' );
105 }
106
115 public static function getInstance( $type ) {
116 if ( !isset( self::$blacklistTypes[$type] ) ) {
117 throw new Exception( "Invalid blacklist type '$type' passed to " . __METHOD__ );
118 }
119
120 if ( !isset( self::$instances[$type] ) ) {
122
123 // Prevent notices
124 if ( !isset( $wgBlacklistSettings[$type] ) ) {
125 $wgBlacklistSettings[$type] = [];
126 }
127
128 $class = self::$blacklistTypes[$type];
129 self::$instances[$type] = new $class( $wgBlacklistSettings[$type] );
130 }
131
132 return self::$instances[$type];
133 }
134
140 abstract protected function getBlacklistType();
141
148 public static function isLocalSource( Title $title ) {
150
151 if ( $title->getNamespace() == NS_MEDIAWIKI ) {
152 $sources = [];
153 foreach ( self::$blacklistTypes as $type => $class ) {
154 $type = ucfirst( $type );
155 $sources += [
156 "$type-blacklist",
157 "$type-whitelist"
158 ];
159 }
160
161 if ( in_array( $title->getDBkey(), $sources ) ) {
162 return true;
163 }
164 }
165
166 $thisHttp = wfExpandUrl( $title->getFullUrl( 'action=raw' ), PROTO_HTTP );
167 $thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/';
168
169 $files = [];
170 foreach ( self::$blacklistTypes as $type => $class ) {
171 if ( isset( $wgBlacklistSettings[$type]['files'] ) ) {
172 $files += $wgBlacklistSettings[$type]['files'];
173 }
174 }
175
176 // @phan-suppress-next-line PhanTypeMismatchForeach += makes Phan think $files is a number
177 foreach ( $files as $fileName ) {
178 $matches = [];
179 if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
180 if ( $wgDBname == $matches[1] ) {
181 if ( $matches[2] == $title->getPrefixedDbKey() ) {
182 // Local DB fetch of this page...
183 return true;
184 }
185 }
186 } elseif ( preg_match( $thisHttpRegex, $fileName ) ) {
187 // Raw view of this page
188 return true;
189 }
190 }
191
192 return false;
193 }
194
202 public static function getTypeFromTitle( Title $title ) {
203 global $wgContLang;
204
205 $types = array_map( [ $wgContLang, 'ucfirst' ], array_keys( self::$blacklistTypes ) );
206 $regex = '/(' . implode( '|', $types ) . ')-(?:blacklist|whitelist)/';
207
208 if ( preg_match( $regex, $title->getDBkey(), $m ) ) {
209 return strtolower( $m[1] );
210 }
211
212 return false;
213 }
214
220 public function getBlacklists() {
221 if ( $this->regexes === false ) {
222 $this->regexes = array_merge(
223 $this->getLocalBlacklists(),
224 $this->getSharedBlacklists() );
225 }
226 return $this->regexes;
227 }
228
234 public function getLocalBlacklists() {
235 $that = $this;
236 $type = $this->getBlacklistType();
237 $cache = ObjectCache::getMainWANInstance();
238
239 return $cache->getWithSetCallback(
240 $cache->makeKey( 'spamblacklist', $type, 'blacklist-regex' ),
241 $this->expiryTime,
242 function () use ( $that, $type ) {
243 return SpamRegexBatch::regexesFromMessage( "{$type}-blacklist", $that );
244 }
245 );
246 }
247
253 public function getWhitelists() {
254 $that = $this;
255 $type = $this->getBlacklistType();
256 $cache = ObjectCache::getMainWANInstance();
257
258 return $cache->getWithSetCallback(
259 $cache->makeKey( 'spamblacklist', $type, 'whitelist-regex' ),
260 $this->expiryTime,
261 function () use ( $that, $type ) {
262 return SpamRegexBatch::regexesFromMessage( "{$type}-whitelist", $that );
263 }
264 );
265 }
266
271 private function getSharedBlacklists() {
272 $listType = $this->getBlacklistType();
273
274 wfDebugLog( 'SpamBlacklist', "Loading $listType regex..." );
275
276 if ( count( $this->files ) == 0 ) {
277 # No lists
278 wfDebugLog( 'SpamBlacklist', "no files specified\n" );
279 return [];
280 }
281
282 $miss = false;
283
284 $that = $this;
285 $cache = ObjectCache::getMainWANInstance();
286 $regexes = $cache->getWithSetCallback(
287 // This used to be cached per-site, but that could be bad on a shared
288 // server where not all wikis have the same configuration.
289 $cache->makeKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ),
290 $this->expiryTime,
291 function () use ( $that, &$miss ) {
292 $miss = true;
293 return $that->buildSharedBlacklists();
294 }
295 );
296
297 if ( !$miss ) {
298 wfDebugLog( 'SpamBlacklist', "Got shared spam regexes from cache\n" );
299 }
300
301 return $regexes;
302 }
303
309 public function clearCache() {
310 $listType = $this->getBlacklistType();
311
312 $cache = ObjectCache::getMainWANInstance();
313 $cache->delete( $cache->makeKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ) );
314 $cache->delete( $cache->makeKey( 'spamblacklist', $listType, 'blacklist-regex' ) );
315 $cache->delete( $cache->makeKey( 'spamblacklist', $listType, 'whitelist-regex' ) );
316
317 wfDebugLog( 'SpamBlacklist', "$listType blacklist local cache cleared.\n" );
318 }
319
320 private function buildSharedBlacklists() {
321 $regexes = [];
322 $listType = $this->getBlacklistType();
323 # Load lists
324 wfDebugLog( 'SpamBlacklist', "Constructing $listType blacklist\n" );
325 foreach ( $this->files as $fileName ) {
326 $matches = [];
327 if ( preg_match( '/^DB: ([\w-]*) (.*)$/', $fileName, $matches ) ) {
328 $text = $this->getArticleText( $matches[1], $matches[2] );
329 } elseif ( preg_match( '/^(https?:)?\/\//', $fileName ) ) {
330 $text = $this->getHttpText( $fileName );
331 } else {
332 $text = file_get_contents( $fileName );
333 wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" );
334 }
335
336 // Build a separate batch of regexes from each source.
337 // While in theory we could squeeze a little efficiency
338 // out of combining multiple sources in one regex, if
339 // there's a bad line in one of them we'll gain more
340 // from only having to break that set into smaller pieces.
342 SpamRegexBatch::regexesFromText( $text, $this, $fileName ) );
343 }
344
345 return $regexes;
346 }
347
348 private function getHttpText( $fileName ) {
349 global $wgDBname, $messageMemc;
350 $listType = $this->getBlacklistType();
351
352 # HTTP request
353 # To keep requests to a minimum, we save results into $messageMemc, which is
354 # similar to $wgMemc except almost certain to exist. By default, it is stored
355 # in the database
356 # There are two keys, when the warning key expires, a random thread will refresh
357 # the real key. This reduces the chance of multiple requests under high traffic
358 # conditions.
359 $key = "{$listType}_blacklist_file:$fileName";
360 $warningKey = "$wgDBname:{$listType}filewarning:$fileName";
361 $httpText = $messageMemc->get( $key );
362 $warning = $messageMemc->get( $warningKey );
363
364 if ( !is_string( $httpText ) || ( !$warning && !mt_rand( 0, $this->warningChance ) ) ) {
365 wfDebugLog( 'SpamBlacklist', "Loading $listType blacklist from $fileName\n" );
366 $httpText = Http::get( $fileName );
367 if ( $httpText === false ) {
368 wfDebugLog( 'SpamBlacklist', "Error loading $listType blacklist from $fileName\n" );
369 }
370 $messageMemc->set( $warningKey, 1, $this->warningTime );
371 $messageMemc->set( $key, $httpText, $this->expiryTime );
372 } else {
373 wfDebugLog( 'SpamBlacklist', "Got $listType blacklist from HTTP cache for $fileName\n" );
374 }
375 return $httpText;
376 }
377
386 private function getArticleText( $wiki, $article ) {
387 wfDebugLog( 'SpamBlacklist',
388 "Fetching {$this->getBlacklistType()} blacklist from '$article' on '$wiki'...\n" );
389
390 $title = Title::newFromText( $article );
391 // Load all the relevant tables from the correct DB.
392 // This assumes that old_text is the actual text or
393 // that the external store system is at least unified.
394 if ( is_callable( [ Revision::class, 'getQueryInfo' ] ) ) {
395 $revQuery = Revision::getQueryInfo( [ 'page', 'text' ] );
396 } else {
397 $revQuery = [
398 'tables' => [ 'revision', 'page', 'text' ],
399 'fields' => array_merge(
403 ),
404 'joins' => [
405 'text' => [ 'JOIN', 'old_id=rev_text_id' ]
406 ],
407 ];
408 }
409 $row = wfGetDB( DB_REPLICA, [], $wiki )->selectRow(
410 $revQuery['tables'],
411 $revQuery['fields'],
412 [
413 'page_namespace' => $title->getNamespace(), // assume NS IDs match
414 'page_title' => $title->getDBkey(), // assume same case rules
415 ],
416 __METHOD__,
417 [],
418 [ 'page' => [ 'JOIN', 'rev_id=page_latest' ] ] + $revQuery['joins']
419 );
420
421 return $row
422 ? ContentHandler::getContentText( Revision::newFromRow( $row )->getContent() )
423 : false;
424 }
425
431 public function getRegexStart() {
432 return '/[a-z0-9_\-.]*';
433 }
434
441 public function getRegexEnd( $batchSize ) {
442 return ( $batchSize > 0 ) ? '/Sim' : '/im';
443 }
444
449 public function warmCachesForFilter( Title $title, array $entries ) {
450 // subclass this
451 }
452}
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
c Accompany it with the information you received as to the offer to distribute corresponding source complete source code means all the source code for all modules it plus any associated interface definition files
Definition COPYING.txt:158
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
$messageMemc
Definition Setup.php:769
$wgContLang
Definition Setup.php:790
Base class for different kinds of blacklists.
static array $blacklistTypes
Array containing blacklists that extend BaseBlacklist.
getLocalBlacklists()
Returns the local blacklist.
static getBlacklistTypes()
Return the array of blacklist types currently defined.
array $files
Array of blacklist sources.
static getEmailBlacklist()
__construct( $settings=[])
Constructor.
static getSpamBlacklist()
getBlacklists()
Fetch local and (possibly cached) remote blacklists.
clearCache()
Clear all primary blacklist cache keys.
filter(array $links, Title $title, $preventLog=false)
getWhitelists()
Returns the (local) whitelist.
getSharedBlacklists()
Fetch (possibly cached) remote blacklists.
getBlacklistType()
Returns the code for the blacklist implementation.
getRegexStart()
Returns the start of the regex for matches.
static getInstance( $type)
Returns an instance of the given blacklist.
getArticleText( $wiki, $article)
Fetch an article from this or another local MediaWiki database.
static getTypeFromTitle(Title $title)
Returns the type of blacklist from the given title.
bool array $regexes
Array containing regexes to test against.
static isLocalSource(Title $title)
Check if the given local page title is a spam regex source.
static array $instances
Array of blacklist instances.
getHttpText( $fileName)
static addBlacklistType( $type, $class)
Adds a blacklist class to the registry.
int $warningChance
Chance of receiving a warning when the filter is hit.
getRegexEnd( $batchSize)
Returns the end of the regex for matches.
warmCachesForFilter(Title $title, array $entries)
static get( $url, array $options=[], $caller=__METHOD__)
Simple wrapper for Http::request( 'GET' )
Definition Http.php:98
static selectTextFields()
Return the list of text fields that should be selected to read the revision text.
Definition Revision.php:462
static selectPageFields()
Return the list of page fields that should be selected from page table.
Definition Revision.php:475
static selectFields()
Return the list of revision fields that should be selected to create a new revision.
Definition Revision.php:342
static newFromRow( $row)
Definition Revision.php:222
static getQueryInfo( $options=[])
Return the tables, fields, and join conditions to be selected to create a new revision object.
Definition Revision.php:511
static regexesFromMessage( $message, BaseBlacklist $blacklist)
Build a set of regular expressions from a MediaWiki message.
static regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false)
Build a set of regular expressions from the given multiline input text, with empty lines and comments...
Represents a title within MediaWiki.
Definition Title.php:40
return true to allow those checks to and false if checking is done remove or add to the links of a group of changes in EnhancedChangesList Hook subscribers can return false to omit this line from recentchanges use this to change the tables headers change it to an object instance and return false override the list derivative used the name of the old file & $article
Definition hooks.txt:1580
const PROTO_HTTP
Definition Defines.php:228
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback function
Definition injection.txt:30
$cache
Definition mcc.php:33
controlled by the following MediaWiki still creates a BagOStuff but calls it to it are no ops If the cache daemon can t be it should also disable itself fairly $wgDBname
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
const DB_REPLICA
Definition defines.php:25