MediaWiki REL1_32
SpamRegexBatch.php
Go to the documentation of this file.
1<?php
2
17 static function buildRegexes( $lines, BaseBlacklist $blacklist, $batchSize = 4096 ) {
18 # Make regex
19 # It's faster using the S modifier even though it will usually only be run once
20 // $regex = 'https?://+[a-z0-9_\-.]*(' . implode( '|', $lines ) . ')';
21 // return '/' . str_replace( '/', '\/', preg_replace('|\\\*/|', '/', $regex) ) . '/Sim';
22 $regexes = [];
23 $regexStart = $blacklist->getRegexStart();
24 $regexEnd = $blacklist->getRegexEnd( $batchSize );
25 $build = false;
26 foreach ( $lines as $line ) {
27 if ( substr( $line, -1, 1 ) == "\\" ) {
28 // Final \ will break silently on the batched regexes.
29 // Skip it here to avoid breaking the next line;
30 // warnings from getBadLines() will still trigger on
31 // edit to keep new ones from floating in.
32 continue;
33 }
34 // FIXME: not very robust size check, but should work. :)
35 if ( $build === false ) {
36 $build = $line;
37 } elseif ( strlen( $build ) + strlen( $line ) > $batchSize ) {
38 $regexes[] = $regexStart .
39 str_replace( '/', '\/', preg_replace( '|\\\*/|u', '/', $build ) ) .
40 $regexEnd;
41 $build = $line;
42 } else {
43 $build .= '|';
44 $build .= $line;
45 }
46 }
47 if ( $build !== false ) {
48 $regexes[] = $regexStart .
49 str_replace( '/', '\/', preg_replace( '|\\\*/|u', '/', $build ) ) .
50 $regexEnd;
51 }
52 return $regexes;
53 }
54
61 static function validateRegexes( $regexes ) {
62 foreach ( $regexes as $regex ) {
64 $ok = preg_match( $regex, '' );
66
67 if ( $ok === false ) {
68 return false;
69 }
70 }
71 return true;
72 }
73
80 static function stripLines( $lines ) {
81 return array_filter(
82 array_map( 'trim',
83 preg_replace( '/#.*$/', '',
84 $lines ) ) );
85 }
86
95 static function buildSafeRegexes( $lines, BaseBlacklist $blacklist, $fileName = false ) {
97 $regexes = self::buildRegexes( $lines, $blacklist );
98 if ( self::validateRegexes( $regexes ) ) {
99 return $regexes;
100 } else {
101 // _Something_ broke... rebuild line-by-line; it'll be
102 // slower if there's a lot of blacklist lines, but one
103 // broken line won't take out hundreds of its brothers.
104 if ( $fileName ) {
105 wfDebugLog( 'SpamBlacklist', "Spam blacklist warning: bogus line in $fileName\n" );
106 }
107 return self::buildRegexes( $lines, $blacklist, 0 );
108 }
109 }
110
118 static function getBadLines( $lines, BaseBlacklist $blacklist ) {
120
121 $badLines = [];
122 foreach ( $lines as $line ) {
123 if ( substr( $line, -1, 1 ) == "\\" ) {
124 // Final \ will break silently on the batched regexes.
125 $badLines[] = $line;
126 }
127 }
128
129 $regexes = self::buildRegexes( $lines, $blacklist );
130 if ( self::validateRegexes( $regexes ) ) {
131 // No other problems!
132 return $badLines;
133 }
134
135 // Something failed in the batch, so check them one by one.
136 foreach ( $lines as $line ) {
137 $regexes = self::buildRegexes( [ $line ], $blacklist );
138 if ( !self::validateRegexes( $regexes ) ) {
139 $badLines[] = $line;
140 }
141 }
142 return $badLines;
143 }
144
154 static function regexesFromText( $source, BaseBlacklist $blacklist, $fileName = false ) {
155 $lines = explode( "\n", $source );
156 return self::buildSafeRegexes( $lines, $blacklist, $fileName );
157 }
158
167 static function regexesFromMessage( $message, BaseBlacklist $blacklist ) {
168 $source = wfMessage( $message )->inContentLanguage();
169 if ( !$source->isDisabled() ) {
170 return self::regexesFromText( $source->plain(), $blacklist );
171 } else {
172 return [];
173 }
174 }
175}
wfRestoreWarnings()
wfSuppressWarnings( $end=false)
Reference-counted warning suppression.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
$line
Definition cdb.php:59
Base class for different kinds of blacklists.
getRegexStart()
Returns the start of the regex for matches.
getRegexEnd( $batchSize)
Returns the end of the regex for matches.
Utility class for working with blacklists.
static buildSafeRegexes( $lines, BaseBlacklist $blacklist, $fileName=false)
Do a sanity check on the batch regex.
static buildRegexes( $lines, BaseBlacklist $blacklist, $batchSize=4096)
Build a set of regular expressions matching URLs with the list of regex fragments.
static regexesFromMessage( $message, BaseBlacklist $blacklist)
Build a set of regular expressions from a MediaWiki message.
static regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false)
Build a set of regular expressions from the given multiline input text, with empty lines and comments...
static stripLines( $lines)
Strip comments and whitespace, then remove blanks.
static validateRegexes( $regexes)
Confirm that a set of regexes is either empty or valid.
static getBadLines( $lines, BaseBlacklist $blacklist)
Returns an array of invalid lines.
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
$source
$lines
Definition router.php:61