MediaWiki REL1_30
BaseBlacklist.php
Go to the documentation of this file.
1<?php
2
6abstract class BaseBlacklist {
12 public $files = [];
13
19 protected $regexes = false;
20
26 public $warningChance = 100;
27
31 public $warningTime = 600;
32
36 public $expiryTime = 900;
37
43 private static $blacklistTypes = [
44 'spam' => 'SpamBlacklist',
45 'email' => 'EmailBlacklist',
46 ];
47
53 private static $instances = [];
54
60 function __construct( $settings = [] ) {
61 foreach ( $settings as $name => $value ) {
62 $this->$name = $value;
63 }
64 }
65
72 abstract public function filter( array $links, Title $title, $preventLog = false );
73
80 public static function addBlacklistType( $type, $class ) {
81 self::$blacklistTypes[$type] = $class;
82 }
83
89 public static function getBlacklistTypes() {
91 }
92
100 public static function getInstance( $type ) {
101 if ( !isset( self::$blacklistTypes[$type] ) ) {
102 throw new Exception( "Invalid blacklist type '$type' passed to " . __METHOD__ );
103 }
104
105 if ( !isset( self::$instances[$type] ) ) {
106 global $wgBlacklistSettings;
107
108 // Prevent notices
109 if ( !isset( $wgBlacklistSettings[$type] ) ) {
110 $wgBlacklistSettings[$type] = [];
111 }
112
113 $class = self::$blacklistTypes[$type];
114 self::$instances[$type] = new $class( $wgBlacklistSettings[$type] );
115 }
116
117 return self::$instances[$type];
118 }
119
125 abstract protected function getBlacklistType();
126
133 public static function isLocalSource( Title $title ) {
134 global $wgDBname, $wgBlacklistSettings;
135
136 if ( $title->getNamespace() == NS_MEDIAWIKI ) {
137 $sources = [];
138 foreach ( self::$blacklistTypes as $type => $class ) {
139 $type = ucfirst( $type );
140 $sources += [
141 "$type-blacklist",
142 "$type-whitelist"
143 ];
144 }
145
146 if ( in_array( $title->getDBkey(), $sources ) ) {
147 return true;
148 }
149 }
150
151 $thisHttp = wfExpandUrl( $title->getFullUrl( 'action=raw' ), PROTO_HTTP );
152 $thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/';
153
154 $files = [];
155 foreach ( self::$blacklistTypes as $type => $class ) {
156 if ( isset( $wgBlacklistSettings[$type]['files'] ) ) {
157 $files += $wgBlacklistSettings[$type]['files'];
158 }
159 }
160
161 foreach ( $files as $fileName ) {
162 $matches = [];
163 if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
164 if ( $wgDBname == $matches[1] ) {
165 if ( $matches[2] == $title->getPrefixedDbKey() ) {
166 // Local DB fetch of this page...
167 return true;
168 }
169 }
170 } elseif ( preg_match( $thisHttpRegex, $fileName ) ) {
171 // Raw view of this page
172 return true;
173 }
174 }
175
176 return false;
177 }
178
186 public static function getTypeFromTitle( Title $title ) {
187 global $wgContLang;
188
189 $types = array_map( [ $wgContLang, 'ucfirst' ], array_keys( self::$blacklistTypes ) );
190 $regex = '/(' . implode( '|', $types ). ')-(?:blacklist|whitelist)/';
191
192 if ( preg_match( $regex, $title->getDBkey(), $m ) ) {
193 return strtolower( $m[1] );
194 }
195
196 return false;
197 }
198
204 function getBlacklists() {
205 if ( $this->regexes === false ) {
206 $this->regexes = array_merge(
207 $this->getLocalBlacklists(),
208 $this->getSharedBlacklists() );
209 }
210 return $this->regexes;
211 }
212
218 public function getLocalBlacklists() {
219 $that = $this;
220 $type = $this->getBlacklistType();
221
222 return ObjectCache::getMainWANInstance()->getWithSetCallback(
223 wfMemcKey( 'spamblacklist', $type, 'blacklist-regex' ),
224 $this->expiryTime,
225 function () use ( $that, $type ) {
226 return SpamRegexBatch::regexesFromMessage( "{$type}-blacklist", $that );
227 }
228 );
229 }
230
236 public function getWhitelists() {
237 $that = $this;
238 $type = $this->getBlacklistType();
239
240 return ObjectCache::getMainWANInstance()->getWithSetCallback(
241 wfMemcKey( 'spamblacklist', $type, 'whitelist-regex' ),
242 $this->expiryTime,
243 function () use ( $that, $type ) {
244 return SpamRegexBatch::regexesFromMessage( "{$type}-whitelist", $that );
245 }
246 );
247 }
248
254 $listType = $this->getBlacklistType();
255
256 wfDebugLog( 'SpamBlacklist', "Loading $listType regex..." );
257
258 if ( count( $this->files ) == 0 ) {
259 # No lists
260 wfDebugLog( 'SpamBlacklist', "no files specified\n" );
261 return [];
262 }
263
264 $miss = false;
265
266 $that = $this;
267 $regexes = ObjectCache::getMainWANInstance()->getWithSetCallback(
268 // This used to be cached per-site, but that could be bad on a shared
269 // server where not all wikis have the same configuration.
270 wfMemcKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ),
271 $this->expiryTime,
272 function () use ( $that, &$miss ) {
273 $miss = true;
274 return $that->buildSharedBlacklists();
275 }
276 );
277
278 if ( !$miss ) {
279 wfDebugLog( 'SpamBlacklist', "Got shared spam regexes from cache\n" );
280 }
281
282 return $regexes;
283 }
284
290 function clearCache() {
291 $listType = $this->getBlacklistType();
292
293 $cache = ObjectCache::getMainWANInstance();
294 $cache->delete( wfMemcKey( 'spamblacklist', $listType, 'shared-blacklist-regex' ) );
295 $cache->delete( wfMemcKey( 'spamblacklist', $listType, 'blacklist-regex' ) );
296 $cache->delete( wfMemcKey( 'spamblacklist', $listType, 'whitelist-regex' ) );
297
298 wfDebugLog( 'SpamBlacklist', "$listType blacklist local cache cleared.\n" );
299 }
300
302 $regexes = [];
303 $listType = $this->getBlacklistType();
304 # Load lists
305 wfDebugLog( 'SpamBlacklist', "Constructing $listType blacklist\n" );
306 foreach ( $this->files as $fileName ) {
307 $matches = [];
308 if ( preg_match( '/^DB: ([\w-]*) (.*)$/', $fileName, $matches ) ) {
309 $text = $this->getArticleText( $matches[1], $matches[2] );
310 } elseif ( preg_match( '/^(https?:)?\/\//', $fileName ) ) {
311 $text = $this->getHttpText( $fileName );
312 } else {
313 $text = file_get_contents( $fileName );
314 wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" );
315 }
316
317 // Build a separate batch of regexes from each source.
318 // While in theory we could squeeze a little efficiency
319 // out of combining multiple sources in one regex, if
320 // there's a bad line in one of them we'll gain more
321 // from only having to break that set into smaller pieces.
322 $regexes = array_merge( $regexes,
323 SpamRegexBatch::regexesFromText( $text, $this, $fileName ) );
324 }
325
326 return $regexes;
327 }
328
329 function getHttpText( $fileName ) {
330 global $wgDBname, $messageMemc;
331 $listType = $this->getBlacklistType();
332
333 # HTTP request
334 # To keep requests to a minimum, we save results into $messageMemc, which is
335 # similar to $wgMemc except almost certain to exist. By default, it is stored
336 # in the database
337 # There are two keys, when the warning key expires, a random thread will refresh
338 # the real key. This reduces the chance of multiple requests under high traffic
339 # conditions.
340 $key = "{$listType}_blacklist_file:$fileName";
341 $warningKey = "$wgDBname:{$listType}filewarning:$fileName";
342 $httpText = $messageMemc->get( $key );
343 $warning = $messageMemc->get( $warningKey );
344
345 if ( !is_string( $httpText ) || ( !$warning && !mt_rand( 0, $this->warningChance ) ) ) {
346 wfDebugLog( 'SpamBlacklist', "Loading $listType blacklist from $fileName\n" );
347 $httpText = Http::get( $fileName );
348 if ( $httpText === false ) {
349 wfDebugLog( 'SpamBlacklist', "Error loading $listType blacklist from $fileName\n" );
350 }
351 $messageMemc->set( $warningKey, 1, $this->warningTime );
352 $messageMemc->set( $key, $httpText, $this->expiryTime );
353 } else {
354 wfDebugLog( 'SpamBlacklist', "Got $listType blacklist from HTTP cache for $fileName\n" );
355 }
356 return $httpText;
357 }
358
367 function getArticleText( $wiki, $article ) {
368 wfDebugLog( 'SpamBlacklist',
369 "Fetching {$this->getBlacklistType()} blacklist from '$article' on '$wiki'...\n" );
370
371 $title = Title::newFromText( $article );
372 // Load all the relevant tables from the correct DB.
373 // This assumes that old_text is the actual text or
374 // that the external store system is at least unified.
375 $row = wfGetDB( DB_SLAVE, [], $wiki )->selectRow(
376 [ 'page', 'revision', 'text' ],
377 array_merge(
381 ),
382 [
383 'page_namespace' => $title->getNamespace(), // assume NS IDs match
384 'page_title' => $title->getDBkey(), // assume same case rules
385 'rev_id=page_latest',
386 'old_id=rev_text_id'
387 ],
388 __METHOD__
389 );
390
391 return $row
393 : false;
394 }
395
401 public function getRegexStart() {
402 return '/[a-z0-9_\-.]*';
403 }
404
411 public function getRegexEnd( $batchSize ) {
412 return ( $batchSize > 0 ) ? '/Sim' : '/im';
413 }
414
419 public function warmCachesForFilter( Title $title, array $entries ) {
420 // subclass this
421 }
422}
c Accompany it with the information you received as to the offer to distribute corresponding source complete source code means all the source code for all modules it plus any associated interface definition files
Definition COPYING.txt:158
const DB_SLAVE
Definition Defines.php:37
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
wfMemcKey()
Make a cache key for the local wiki.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Base class for different kinds of blacklists.
static array $blacklistTypes
Array containing blacklists that extend BaseBlacklist.
getLocalBlacklists()
Returns the local blacklist.
static getBlacklistTypes()
Return the array of blacklist types currently defined.
array $files
Array of blacklist sources.
__construct( $settings=[])
Constructor.
getBlacklists()
Fetch local and (possibly cached) remote blacklists.
clearCache()
Clear all primary blacklist cache keys.
filter(array $links, Title $title, $preventLog=false)
getWhitelists()
Returns the (local) whitelist.
getSharedBlacklists()
Fetch (possibly cached) remote blacklists.
getBlacklistType()
Returns the code for the blacklist implementation.
getRegexStart()
Returns the start of the regex for matches.
static getInstance( $type)
Returns an instance of the given blacklist.
getArticleText( $wiki, $article)
Fetch an article from this or another local MediaWiki database.
static getTypeFromTitle(Title $title)
Returns the type of blacklist from the given title.
bool array $regexes
Array containing regexes to test against.
static isLocalSource(Title $title)
Check if the given local page title is a spam regex source.
static array $instances
Array of blacklist instances.
getHttpText( $fileName)
static addBlacklistType( $type, $class)
Adds a blacklist class to the registry.
int $warningChance
Chance of receiving a warning when the filter is hit.
getRegexEnd( $batchSize)
Returns the end of the regex for matches.
warmCachesForFilter(Title $title, array $entries)
static getContentText(Content $content=null)
Convenience function for getting flat text from a Content object.
static selectTextFields()
Return the list of text fields that should be selected to read the revision text.
Definition Revision.php:518
static selectPageFields()
Return the list of page fields that should be selected from page table.
Definition Revision.php:529
static selectFields()
Return the list of revision fields that should be selected to create a new revision.
Definition Revision.php:452
static newFromRow( $row)
Definition Revision.php:238
static regexesFromMessage( $message, BaseBlacklist $blacklist)
Build a set of regular expressions from a MediaWiki message.
static regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false)
Build a set of regular expressions from the given multiline input text, with empty lines and comments...
Represents a title within MediaWiki.
Definition Title.php:39
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition design.txt:57
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest to get request data $messageMemc
Definition globals.txt:66
const PROTO_HTTP
Definition Defines.php:220
$cache
Definition mcc.php:33
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname