MediaWiki REL1_33
SpamRegexBatch.php
Go to the documentation of this file.
1<?php
2
17 private static function buildRegexes( $lines, BaseBlacklist $blacklist, $batchSize = 4096 ) {
18 # Make regex
19 # It's faster using the S modifier even though it will usually only be run once
20 // $regex = 'https?://+[a-z0-9_\-.]*(' . implode( '|', $lines ) . ')';
21 // return '/' . str_replace( '/', '\/', preg_replace('|\\\*/|', '/', $regex) ) . '/Sim';
22 $regexes = [];
23 $regexStart = $blacklist->getRegexStart();
24 $regexEnd = $blacklist->getRegexEnd( $batchSize );
25 $build = false;
26 foreach ( $lines as $line ) {
27 if ( substr( $line, -1, 1 ) == "\\" ) {
28 // Final \ will break silently on the batched regexes.
29 // Skip it here to avoid breaking the next line;
30 // warnings from getBadLines() will still trigger on
31 // edit to keep new ones from floating in.
32 continue;
33 }
34 // FIXME: not very robust size check, but should work. :)
35 if ( $build === false ) {
36 $build = $line;
37 } elseif ( strlen( $build ) + strlen( $line ) > $batchSize ) {
38 $regexes[] = $regexStart .
39 str_replace( '/', '\/', preg_replace( '|\\\*/|u', '/', $build ) ) .
41 $build = $line;
42 } else {
43 $build .= '|';
44 $build .= $line;
45 }
46 }
47 if ( $build !== false ) {
48 $regexes[] = $regexStart .
49 str_replace( '/', '\/', preg_replace( '|\\\*/|u', '/', $build ) ) .
51 }
52 return $regexes;
53 }
54
61 private static function validateRegexes( $regexes ) {
62 foreach ( $regexes as $regex ) {
64 // @phan-suppress-next-line PhanParamSuspiciousOrder False positive
65 $ok = preg_match( $regex, '' );
67
68 if ( $ok === false ) {
69 return false;
70 }
71 }
72 return true;
73 }
74
81 private static function stripLines( $lines ) {
82 return array_filter(
83 array_map( 'trim',
84 preg_replace( '/#.*$/', '',
85 $lines ) ) );
86 }
87
96 private static function buildSafeRegexes( $lines, BaseBlacklist $blacklist, $fileName = false ) {
98 $regexes = self::buildRegexes( $lines, $blacklist );
99 if ( self::validateRegexes( $regexes ) ) {
100 return $regexes;
101 } else {
102 // _Something_ broke... rebuild line-by-line; it'll be
103 // slower if there's a lot of blacklist lines, but one
104 // broken line won't take out hundreds of its brothers.
105 if ( $fileName ) {
106 wfDebugLog( 'SpamBlacklist', "Spam blacklist warning: bogus line in $fileName\n" );
107 }
108 return self::buildRegexes( $lines, $blacklist, 0 );
109 }
110 }
111
119 public static function getBadLines( $lines, BaseBlacklist $blacklist ) {
121
122 $badLines = [];
123 foreach ( $lines as $line ) {
124 if ( substr( $line, -1, 1 ) == "\\" ) {
125 // Final \ will break silently on the batched regexes.
126 $badLines[] = $line;
127 }
128 }
129
130 $regexes = self::buildRegexes( $lines, $blacklist );
131 if ( self::validateRegexes( $regexes ) ) {
132 // No other problems!
133 return $badLines;
134 }
135
136 // Something failed in the batch, so check them one by one.
137 foreach ( $lines as $line ) {
138 $regexes = self::buildRegexes( [ $line ], $blacklist );
139 if ( !self::validateRegexes( $regexes ) ) {
140 $badLines[] = $line;
141 }
142 }
143 return $badLines;
144 }
145
155 public static function regexesFromText( $source, BaseBlacklist $blacklist, $fileName = false ) {
156 $lines = explode( "\n", $source );
157 return self::buildSafeRegexes( $lines, $blacklist, $fileName );
158 }
159
168 public static function regexesFromMessage( $message, BaseBlacklist $blacklist ) {
169 $source = wfMessage( $message )->inContentLanguage();
170 if ( !$source->isDisabled() ) {
171 return self::regexesFromText( $source->plain(), $blacklist );
172 } else {
173 return [];
174 }
175 }
176}
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
$line
Definition cdb.php:59
Base class for different kinds of blacklists.
getRegexStart()
Returns the start of the regex for matches.
getRegexEnd( $batchSize)
Returns the end of the regex for matches.
Utility class for working with blacklists.
static buildSafeRegexes( $lines, BaseBlacklist $blacklist, $fileName=false)
Do a sanity check on the batch regex.
static buildRegexes( $lines, BaseBlacklist $blacklist, $batchSize=4096)
Build a set of regular expressions matching URLs with the list of regex fragments.
static regexesFromMessage( $message, BaseBlacklist $blacklist)
Build a set of regular expressions from a MediaWiki message.
static regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false)
Build a set of regular expressions from the given multiline input text, with empty lines and comments...
static stripLines( $lines)
Strip comments and whitespace, then remove blanks.
static validateRegexes( $regexes)
Confirm that a set of regexes is either empty or valid.
static getBadLines( $lines, BaseBlacklist $blacklist)
Returns an array of invalid lines.
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
$source
$lines
Definition router.php:61