MediaWiki REL1_31
SpamRegexBatch.php
Go to the documentation of this file.
1<?php
2
17 static function buildRegexes( $lines, BaseBlacklist $blacklist, $batchSize=4096 ) {
18 # Make regex
19 # It's faster using the S modifier even though it will usually only be run once
20 // $regex = 'https?://+[a-z0-9_\-.]*(' . implode( '|', $lines ) . ')';
21 // return '/' . str_replace( '/', '\/', preg_replace('|\\\*/|', '/', $regex) ) . '/Sim';
22 $regexes = [];
23 $regexStart = $blacklist->getRegexStart();
24 $regexEnd = $blacklist->getRegexEnd( $batchSize );
25 $build = false;
26 foreach ( $lines as $line ) {
27 if ( substr( $line, -1, 1 ) == "\\" ) {
28 // Final \ will break silently on the batched regexes.
29 // Skip it here to avoid breaking the next line;
30 // warnings from getBadLines() will still trigger on
31 // edit to keep new ones from floating in.
32 continue;
33 }
34 // FIXME: not very robust size check, but should work. :)
35 if ( $build === false ) {
36 $build = $line;
37 } elseif ( strlen( $build ) + strlen( $line ) > $batchSize ) {
38 $regexes[] = $regexStart .
39 str_replace( '/', '\/', preg_replace( '|\\\*/|u', '/', $build ) ) .
40 $regexEnd;
41 $build = $line;
42 } else {
43 $build .= '|';
44 $build .= $line;
45 }
46 }
47 if ( $build !== false ) {
48 $regexes[] = $regexStart .
49 str_replace( '/', '\/', preg_replace( '|\\\*/|u', '/', $build ) ) .
50 $regexEnd;
51 }
52 return $regexes;
53 }
54
61 static function validateRegexes( $regexes ) {
62 foreach ( $regexes as $regex ) {
64 $ok = preg_match( $regex, '' );
66
67 if ( $ok === false ) {
68 return false;
69 }
70 }
71 return true;
72 }
73
80 static function stripLines( $lines ) {
81 return array_filter(
82 array_map( 'trim',
83 preg_replace( '/#.*$/', '',
84 $lines ) ) );
85 }
86
95 static function buildSafeRegexes( $lines, BaseBlacklist $blacklist, $fileName=false ) {
97 $regexes = self::buildRegexes( $lines, $blacklist );
98 if ( self::validateRegexes( $regexes ) ) {
99 return $regexes;
100 } else {
101 // _Something_ broke... rebuild line-by-line; it'll be
102 // slower if there's a lot of blacklist lines, but one
103 // broken line won't take out hundreds of its brothers.
104 if ( $fileName ) {
105 wfDebugLog( 'SpamBlacklist', "Spam blacklist warning: bogus line in $fileName\n" );
106 }
107 return self::buildRegexes( $lines, $blacklist, 0 );
108 }
109 }
110
118 static function getBadLines( $lines, BaseBlacklist $blacklist ) {
120
121 $badLines = [];
122 foreach ( $lines as $line ) {
123 if ( substr( $line, -1, 1 ) == "\\" ) {
124 // Final \ will break silently on the batched regexes.
125 $badLines[] = $line;
126 }
127 }
128
129 $regexes = self::buildRegexes( $lines, $blacklist );
130 if ( self::validateRegexes( $regexes ) ) {
131 // No other problems!
132 return $badLines;
133 }
134
135 // Something failed in the batch, so check them one by one.
136 foreach ( $lines as $line ) {
137 $regexes = self::buildRegexes( [ $line ], $blacklist );
138 if ( !self::validateRegexes( $regexes ) ) {
139 $badLines[] = $line;
140 }
141 }
142 return $badLines;
143 }
144
154 static function regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false ) {
155 $lines = explode( "\n", $source );
156 return self::buildSafeRegexes( $lines, $blacklist, $fileName );
157 }
158
167 static function regexesFromMessage( $message, BaseBlacklist $blacklist ) {
168 $source = wfMessage( $message )->inContentLanguage();
169 if ( !$source->isDisabled() ) {
170 return self::regexesFromText( $source->plain(), $blacklist );
171 } else {
172 return [];
173 }
174 }
175}
wfRestoreWarnings()
wfSuppressWarnings( $end=false)
Reference-counted warning suppression.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
$line
Definition cdb.php:59
Base class for different kinds of blacklists.
getRegexStart()
Returns the start of the regex for matches.
getRegexEnd( $batchSize)
Returns the end of the regex for matches.
Utility class for working with blacklists.
static buildSafeRegexes( $lines, BaseBlacklist $blacklist, $fileName=false)
Do a sanity check on the batch regex.
static buildRegexes( $lines, BaseBlacklist $blacklist, $batchSize=4096)
Build a set of regular expressions matching URLs with the list of regex fragments.
static regexesFromMessage( $message, BaseBlacklist $blacklist)
Build a set of regular expressions from a MediaWiki message.
static regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false)
Build a set of regular expressions from the given multiline input text, with empty lines and comments...
static stripLines( $lines)
Strip comments and whitespace, then remove blanks.
static validateRegexes( $regexes)
Confirm that a set of regexes is either empty or valid.
static getBadLines( $lines, BaseBlacklist $blacklist)
Returns an array of invalid lines.
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
$source
$lines
Definition router.php:61