Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
17.36% |
25 / 144 |
|
40.00% |
4 / 10 |
CRAP | |
0.00% |
0 / 1 |
CdnCacheUpdate | |
17.48% |
25 / 143 |
|
40.00% |
4 / 10 |
939.00 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
merge | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
doUpdate | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
20 | |||
purge | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
20 | |||
getUrls | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
resolveReboundDelayByUrl | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
5 | |||
HTCPPurge | |
0.00% |
0 / 52 |
|
0.00% |
0 / 1 |
182 | |||
naivePurge | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
20 | |||
expand | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getRuleForURL | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | namespace MediaWiki\Deferred; |
22 | |
23 | use Exception; |
24 | use InvalidArgumentException; |
25 | use MediaWiki\JobQueue\Jobs\CdnPurgeJob; |
26 | use MediaWiki\MainConfigNames; |
27 | use MediaWiki\MediaWikiServices; |
28 | use MediaWiki\Page\PageReference; |
29 | use RuntimeException; |
30 | use Wikimedia\Assert\Assert; |
31 | use Wikimedia\IPUtils; |
32 | |
33 | /** |
34 | * Handles purging the appropriate CDN objects given a list of URLs or Title instances |
35 | * @ingroup Cache |
36 | */ |
37 | class CdnCacheUpdate implements DeferrableUpdate, MergeableUpdate { |
38 | /** @var array[] List of (URL, rebound purge delay) tuples */ |
39 | private $urlTuples = []; |
40 | /** @var array[] List of (PageReference, rebound purge delay) tuples */ |
41 | private $pageTuples = []; |
42 | |
43 | /** @var int Maximum seconds of rebound purge delay */ |
44 | private const MAX_REBOUND_DELAY = 300; |
45 | |
46 | /** |
47 | * @param string[]|PageReference[] $targets Collection of URLs/titles to be purged from CDN |
48 | * @param array $options Options map. Supports: |
49 | * - reboundDelay: how many seconds after the first purge to send a rebound purge. |
50 | * No rebound purge will be sent if this is not positive. [Default: 0] |
51 | */ |
52 | public function __construct( array $targets, array $options = [] ) { |
53 | $delay = min( |
54 | (int)max( $options['reboundDelay'] ?? 0, 0 ), |
55 | self::MAX_REBOUND_DELAY |
56 | ); |
57 | |
58 | foreach ( $targets as $target ) { |
59 | if ( $target instanceof PageReference ) { |
60 | $this->pageTuples[] = [ $target, $delay ]; |
61 | } else { |
62 | $this->urlTuples[] = [ $target, $delay ]; |
63 | } |
64 | } |
65 | } |
66 | |
67 | public function merge( MergeableUpdate $update ) { |
68 | /** @var self $update */ |
69 | Assert::parameterType( __CLASS__, $update, '$update' ); |
70 | '@phan-var self $update'; |
71 | |
72 | $this->urlTuples = array_merge( $this->urlTuples, $update->urlTuples ); |
73 | $this->pageTuples = array_merge( $this->pageTuples, $update->pageTuples ); |
74 | } |
75 | |
76 | public function doUpdate() { |
77 | // Resolve the final list of URLs just before purging them (T240083) |
78 | $reboundDelayByUrl = $this->resolveReboundDelayByUrl(); |
79 | |
80 | // Send the immediate purges to CDN |
81 | self::purge( array_keys( $reboundDelayByUrl ) ); |
82 | $immediatePurgeTimestamp = time(); |
83 | |
84 | // Get the URLs that need rebound purges, grouped by seconds of purge delay |
85 | $urlsWithReboundByDelay = []; |
86 | foreach ( $reboundDelayByUrl as $url => $delay ) { |
87 | if ( $delay > 0 ) { |
88 | $urlsWithReboundByDelay[$delay][] = $url; |
89 | } |
90 | } |
91 | // Enqueue delayed purge jobs for these URLs (usually only one job) |
92 | $jobs = []; |
93 | foreach ( $urlsWithReboundByDelay as $delay => $urls ) { |
94 | $jobs[] = new CdnPurgeJob( [ |
95 | 'urls' => $urls, |
96 | 'jobReleaseTimestamp' => $immediatePurgeTimestamp + $delay |
97 | ] ); |
98 | } |
99 | MediaWikiServices::getInstance()->getJobQueueGroup()->lazyPush( $jobs ); |
100 | } |
101 | |
102 | /** |
103 | * Purges a list of CDN nodes defined in $wgCdnServers. |
104 | * $urlArr should contain the full URLs to purge as values |
105 | * (example: $urlArr[] = 'http://my.host/something') |
106 | * |
107 | * @param string[] $urls List of full URLs to purge |
108 | */ |
109 | public static function purge( array $urls ) { |
110 | $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers ); |
111 | $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting ); |
112 | if ( !$urls ) { |
113 | return; |
114 | } |
115 | |
116 | // Remove duplicate URLs from list |
117 | $urls = array_unique( $urls ); |
118 | |
119 | wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urls ) ); |
120 | |
121 | // Reliably broadcast the purge to all edge nodes |
122 | $ts = microtime( true ); |
123 | $relayerGroup = MediaWikiServices::getInstance()->getEventRelayerGroup(); |
124 | $relayerGroup->getRelayer( 'cdn-url-purges' )->notifyMulti( |
125 | 'cdn-url-purges', |
126 | array_map( |
127 | static function ( $url ) use ( $ts ) { |
128 | return [ |
129 | 'url' => $url, |
130 | 'timestamp' => $ts, |
131 | ]; |
132 | }, |
133 | $urls |
134 | ) |
135 | ); |
136 | |
137 | // Send lossy UDP broadcasting if enabled |
138 | if ( $htcpRouting ) { |
139 | self::HTCPPurge( $urls ); |
140 | } |
141 | |
142 | // Do direct server purges if enabled (this does not scale very well) |
143 | if ( $cdnServers ) { |
144 | self::naivePurge( $urls ); |
145 | } |
146 | } |
147 | |
148 | /** |
149 | * @return string[] List of URLs |
150 | */ |
151 | public function getUrls() { |
152 | return array_keys( $this->resolveReboundDelayByUrl() ); |
153 | } |
154 | |
155 | /** |
156 | * @return int[] Map of (URL => rebound purge delay) |
157 | */ |
158 | private function resolveReboundDelayByUrl() { |
159 | $services = MediaWikiServices::getInstance(); |
160 | /** @var PageReference $page */ |
161 | |
162 | // Avoid multiple queries for HTMLCacheUpdater::getUrls() call |
163 | $lb = $services->getLinkBatchFactory()->newLinkBatch(); |
164 | foreach ( $this->pageTuples as [ $page, ] ) { |
165 | $lb->addObj( $page ); |
166 | } |
167 | $lb->execute(); |
168 | |
169 | $reboundDelayByUrl = []; |
170 | |
171 | // Resolve the titles into CDN URLs |
172 | $htmlCacheUpdater = $services->getHtmlCacheUpdater(); |
173 | foreach ( $this->pageTuples as [ $page, $delay ] ) { |
174 | foreach ( $htmlCacheUpdater->getUrls( $page ) as $url ) { |
175 | // Use the highest rebound for duplicate URLs in order to handle the most lag |
176 | $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay ); |
177 | } |
178 | } |
179 | |
180 | foreach ( $this->urlTuples as [ $url, $delay ] ) { |
181 | // Use the highest rebound for duplicate URLs in order to handle the most lag |
182 | $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay ); |
183 | } |
184 | |
185 | return $reboundDelayByUrl; |
186 | } |
187 | |
188 | /** |
189 | * Send Hyper Text Caching Protocol (HTCP) CLR requests |
190 | * |
191 | * @param string[] $urls Collection of URLs to purge |
192 | */ |
193 | private static function HTCPPurge( array $urls ) { |
194 | $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting ); |
195 | $htcpMulticastTTL = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPMulticastTTL ); |
196 | // HTCP CLR operation |
197 | $htcpOpCLR = 4; |
198 | |
199 | // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h) |
200 | if ( !defined( "IPPROTO_IP" ) ) { |
201 | define( "IPPROTO_IP", 0 ); |
202 | define( "IP_MULTICAST_LOOP", 34 ); |
203 | define( "IP_MULTICAST_TTL", 33 ); |
204 | } |
205 | |
206 | // pfsockopen doesn't work because we need set_sock_opt |
207 | $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP ); |
208 | if ( !$conn ) { |
209 | $errstr = socket_strerror( socket_last_error() ); |
210 | wfDebugLog( 'squid', __METHOD__ . |
211 | ": Error opening UDP socket: $errstr" ); |
212 | |
213 | return; |
214 | } |
215 | |
216 | // Set socket options |
217 | socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 ); |
218 | if ( $htcpMulticastTTL != 1 ) { |
219 | // Set multicast time to live (hop count) option on socket |
220 | socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL, |
221 | $htcpMulticastTTL ); |
222 | } |
223 | |
224 | // Get sequential trx IDs for packet loss counting |
225 | $idGenerator = MediaWikiServices::getInstance()->getGlobalIdGenerator(); |
226 | $ids = $idGenerator->newSequentialPerNodeIDs( |
227 | 'squidhtcppurge', |
228 | 32, |
229 | count( $urls ) |
230 | ); |
231 | |
232 | foreach ( $urls as $url ) { |
233 | if ( !is_string( $url ) ) { |
234 | throw new InvalidArgumentException( 'Bad purge URL' ); |
235 | } |
236 | $url = self::expand( $url ); |
237 | $conf = self::getRuleForURL( $url, $htcpRouting ); |
238 | if ( !$conf ) { |
239 | wfDebugLog( 'squid', __METHOD__ . |
240 | "No HTCP rule configured for URL {$url} , skipping" ); |
241 | continue; |
242 | } |
243 | |
244 | if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) { |
245 | // Normalize single entries |
246 | $conf = [ $conf ]; |
247 | } |
248 | foreach ( $conf as $subconf ) { |
249 | if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) { |
250 | throw new RuntimeException( "Invalid HTCP rule for URL $url\n" ); |
251 | } |
252 | } |
253 | |
254 | // Construct a minimal HTCP request diagram |
255 | // as per RFC 2756 |
256 | // Opcode 'CLR', no response desired, no auth |
257 | $htcpTransID = current( $ids ); |
258 | next( $ids ); |
259 | |
260 | $htcpSpecifier = pack( 'na4na*na8n', |
261 | 4, 'HEAD', strlen( $url ), $url, |
262 | 8, 'HTTP/1.0', 0 ); |
263 | |
264 | $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier ); |
265 | $htcpLen = 4 + $htcpDataLen + 2; |
266 | |
267 | // Note! Squid gets the bit order of the first |
268 | // word wrong, wrt the RFC. Apparently no other |
269 | // implementation exists, so adapt to Squid |
270 | $htcpPacket = pack( 'nxxnCxNxxa*n', |
271 | $htcpLen, $htcpDataLen, $htcpOpCLR, |
272 | $htcpTransID, $htcpSpecifier, 2 ); |
273 | |
274 | wfDebugLog( 'squid', __METHOD__ . |
275 | "Purging URL $url via HTCP" ); |
276 | foreach ( $conf as $subconf ) { |
277 | socket_sendto( $conn, $htcpPacket, $htcpLen, 0, |
278 | $subconf['host'], $subconf['port'] ); |
279 | } |
280 | } |
281 | } |
282 | |
283 | /** |
284 | * Send HTTP PURGE requests for each of the URLs to all of the cache servers |
285 | * |
286 | * @param string[] $urls |
287 | * @throws Exception |
288 | */ |
289 | private static function naivePurge( array $urls ) { |
290 | $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers ); |
291 | |
292 | $reqs = []; |
293 | foreach ( $urls as $url ) { |
294 | $url = self::expand( $url ); |
295 | $urlInfo = wfGetUrlUtils()->parse( $url ) ?? false; |
296 | $urlHost = strlen( $urlInfo['port'] ?? '' ) |
297 | ? IPUtils::combineHostAndPort( $urlInfo['host'], (int)$urlInfo['port'] ) |
298 | : $urlInfo['host']; |
299 | $baseReq = [ |
300 | 'method' => 'PURGE', |
301 | 'url' => $url, |
302 | 'headers' => [ |
303 | 'Host' => $urlHost, |
304 | 'Connection' => 'Keep-Alive', |
305 | 'Proxy-Connection' => 'Keep-Alive', |
306 | 'User-Agent' => 'MediaWiki/' . MW_VERSION . ' ' . __CLASS__ |
307 | ] |
308 | ]; |
309 | foreach ( $cdnServers as $server ) { |
310 | $reqs[] = ( $baseReq + [ 'proxy' => $server ] ); |
311 | } |
312 | } |
313 | |
314 | $http = MediaWikiServices::getInstance()->getHttpRequestFactory() |
315 | ->createMultiClient( [ 'maxConnsPerHost' => 8, 'usePipelining' => true ] ); |
316 | $http->runMulti( $reqs ); |
317 | } |
318 | |
319 | /** |
320 | * Expand local URLs to fully-qualified URLs using the internal protocol |
321 | * and host defined in $wgInternalServer. Input that's already fully- |
322 | * qualified will be passed through unchanged. |
323 | * |
324 | * This is used to generate purge URLs that may be either local to the |
325 | * main wiki or include a non-native host, such as images hosted on a |
326 | * second internal server. |
327 | * |
328 | * Client functions should not need to call this. |
329 | * |
330 | * @param string $url |
331 | * @return string |
332 | */ |
333 | private static function expand( $url ) { |
334 | return (string)MediaWikiServices::getInstance()->getUrlUtils()->expand( $url, PROTO_INTERNAL ); |
335 | } |
336 | |
337 | /** |
338 | * Find the HTCP routing rule to use for a given URL. |
339 | * @param string $url URL to match |
340 | * @param array $rules Array of rules, see $wgHTCPRouting for format and behavior |
341 | * @return mixed Element of $rules that matched, or false if nothing matched |
342 | */ |
343 | private static function getRuleForURL( $url, $rules ) { |
344 | foreach ( $rules as $regex => $routing ) { |
345 | if ( $regex === '' || preg_match( $regex, $url ) ) { |
346 | return $routing; |
347 | } |
348 | } |
349 | |
350 | return false; |
351 | } |
352 | } |
353 | |
354 | /** @deprecated class alias since 1.42 */ |
355 | class_alias( CdnCacheUpdate::class, 'CdnCacheUpdate' ); |