MediaWiki REL1_41
CdnCacheUpdate.php
Go to the documentation of this file.
1<?php
24use Wikimedia\Assert\Assert;
25use Wikimedia\IPUtils;
26
33 private $urlTuples = [];
35 private $pageTuples = [];
36
38 private const MAX_REBOUND_DELAY = 300;
39
46 public function __construct( array $targets, array $options = [] ) {
47 $delay = min(
48 (int)max( $options['reboundDelay'] ?? 0, 0 ),
49 self::MAX_REBOUND_DELAY
50 );
51
52 foreach ( $targets as $target ) {
53 if ( $target instanceof PageReference ) {
54 $this->pageTuples[] = [ $target, $delay ];
55 } else {
56 $this->urlTuples[] = [ $target, $delay ];
57 }
58 }
59 }
60
61 public function merge( MergeableUpdate $update ) {
63 Assert::parameterType( __CLASS__, $update, '$update' );
64 '@phan-var self $update';
65
66 $this->urlTuples = array_merge( $this->urlTuples, $update->urlTuples );
67 $this->pageTuples = array_merge( $this->pageTuples, $update->pageTuples );
68 }
69
70 public function doUpdate() {
71 // Resolve the final list of URLs just before purging them (T240083)
72 $reboundDelayByUrl = $this->resolveReboundDelayByUrl();
73
74 // Send the immediate purges to CDN
75 self::purge( array_keys( $reboundDelayByUrl ) );
76 $immediatePurgeTimestamp = time();
77
78 // Get the URLs that need rebound purges, grouped by seconds of purge delay
79 $urlsWithReboundByDelay = [];
80 foreach ( $reboundDelayByUrl as $url => $delay ) {
81 if ( $delay > 0 ) {
82 $urlsWithReboundByDelay[$delay][] = $url;
83 }
84 }
85 // Enqueue delayed purge jobs for these URLs (usually only one job)
86 $jobs = [];
87 foreach ( $urlsWithReboundByDelay as $delay => $urls ) {
88 $jobs[] = new CdnPurgeJob( [
89 'urls' => $urls,
90 'jobReleaseTimestamp' => $immediatePurgeTimestamp + $delay
91 ] );
92 }
93 MediaWikiServices::getInstance()->getJobQueueGroup()->lazyPush( $jobs );
94 }
95
103 public static function purge( array $urls ) {
104 $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers );
105 $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting );
106 if ( !$urls ) {
107 return;
108 }
109
110 // Remove duplicate URLs from list
111 $urls = array_unique( $urls );
112
113 wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urls ) );
114
115 // Reliably broadcast the purge to all edge nodes
116 $ts = microtime( true );
117 $relayerGroup = MediaWikiServices::getInstance()->getEventRelayerGroup();
118 $relayerGroup->getRelayer( 'cdn-url-purges' )->notifyMulti(
119 'cdn-url-purges',
120 array_map(
121 static function ( $url ) use ( $ts ) {
122 return [
123 'url' => $url,
124 'timestamp' => $ts,
125 ];
126 },
127 $urls
128 )
129 );
130
131 // Send lossy UDP broadcasting if enabled
132 if ( $htcpRouting ) {
133 self::HTCPPurge( $urls );
134 }
135
136 // Do direct server purges if enabled (this does not scale very well)
137 if ( $cdnServers ) {
138 self::naivePurge( $urls );
139 }
140 }
141
145 public function getUrls() {
146 return array_keys( $this->resolveReboundDelayByUrl() );
147 }
148
152 private function resolveReboundDelayByUrl() {
153 $services = MediaWikiServices::getInstance();
156 // Avoid multiple queries for HtmlCacheUpdater::getUrls() call
157 $lb = $services->getLinkBatchFactory()->newLinkBatch();
158 foreach ( $this->pageTuples as [ $page, ] ) {
159 $lb->addObj( $page );
160 }
161 $lb->execute();
162
163 $reboundDelayByUrl = [];
164
165 // Resolve the titles into CDN URLs
166 $htmlCacheUpdater = $services->getHtmlCacheUpdater();
167 foreach ( $this->pageTuples as [ $page, $delay ] ) {
168 foreach ( $htmlCacheUpdater->getUrls( $page ) as $url ) {
169 // Use the highest rebound for duplicate URLs in order to handle the most lag
170 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
171 }
172 }
173
174 foreach ( $this->urlTuples as [ $url, $delay ] ) {
175 // Use the highest rebound for duplicate URLs in order to handle the most lag
176 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
177 }
178
179 return $reboundDelayByUrl;
180 }
181
187 private static function HTCPPurge( array $urls ) {
188 $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting );
189 $htcpMulticastTTL = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPMulticastTTL );
190 // HTCP CLR operation
191 $htcpOpCLR = 4;
192
193 // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
194 if ( !defined( "IPPROTO_IP" ) ) {
195 define( "IPPROTO_IP", 0 );
196 define( "IP_MULTICAST_LOOP", 34 );
197 define( "IP_MULTICAST_TTL", 33 );
198 }
199
200 // pfsockopen doesn't work because we need set_sock_opt
201 $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP );
202 if ( !$conn ) {
203 $errstr = socket_strerror( socket_last_error() );
204 wfDebugLog( 'squid', __METHOD__ .
205 ": Error opening UDP socket: $errstr" );
206
207 return;
208 }
209
210 // Set socket options
211 socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 );
212 if ( $htcpMulticastTTL != 1 ) {
213 // Set multicast time to live (hop count) option on socket
214 socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL,
215 $htcpMulticastTTL );
216 }
217
218 // Get sequential trx IDs for packet loss counting
219 $idGenerator = MediaWikiServices::getInstance()->getGlobalIdGenerator();
220 $ids = $idGenerator->newSequentialPerNodeIDs(
221 'squidhtcppurge',
222 32,
223 count( $urls )
224 );
225
226 foreach ( $urls as $url ) {
227 if ( !is_string( $url ) ) {
228 throw new InvalidArgumentException( 'Bad purge URL' );
229 }
230 $url = self::expand( $url );
231 $conf = self::getRuleForURL( $url, $htcpRouting );
232 if ( !$conf ) {
233 wfDebugLog( 'squid', __METHOD__ .
234 "No HTCP rule configured for URL {$url} , skipping" );
235 continue;
236 }
237
238 if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
239 // Normalize single entries
240 $conf = [ $conf ];
241 }
242 foreach ( $conf as $subconf ) {
243 if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) {
244 throw new RuntimeException( "Invalid HTCP rule for URL $url\n" );
245 }
246 }
247
248 // Construct a minimal HTCP request diagram
249 // as per RFC 2756
250 // Opcode 'CLR', no response desired, no auth
251 $htcpTransID = current( $ids );
252 next( $ids );
253
254 $htcpSpecifier = pack( 'na4na*na8n',
255 4, 'HEAD', strlen( $url ), $url,
256 8, 'HTTP/1.0', 0 );
257
258 $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier );
259 $htcpLen = 4 + $htcpDataLen + 2;
260
261 // Note! Squid gets the bit order of the first
262 // word wrong, wrt the RFC. Apparently no other
263 // implementation exists, so adapt to Squid
264 $htcpPacket = pack( 'nxxnCxNxxa*n',
265 $htcpLen, $htcpDataLen, $htcpOpCLR,
266 $htcpTransID, $htcpSpecifier, 2 );
267
268 wfDebugLog( 'squid', __METHOD__ .
269 "Purging URL $url via HTCP" );
270 foreach ( $conf as $subconf ) {
271 socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
272 $subconf['host'], $subconf['port'] );
273 }
274 }
275 }
276
283 private static function naivePurge( array $urls ) {
284 $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers );
285
286 $reqs = [];
287 foreach ( $urls as $url ) {
288 $url = self::expand( $url );
289 $urlInfo = wfParseUrl( $url );
290 $urlHost = strlen( $urlInfo['port'] ?? '' )
291 ? IPUtils::combineHostAndPort( $urlInfo['host'], (int)$urlInfo['port'] )
292 : $urlInfo['host'];
293 $baseReq = [
294 'method' => 'PURGE',
295 'url' => $url,
296 'headers' => [
297 'Host' => $urlHost,
298 'Connection' => 'Keep-Alive',
299 'Proxy-Connection' => 'Keep-Alive',
300 'User-Agent' => 'MediaWiki/' . MW_VERSION . ' ' . __CLASS__
301 ]
302 ];
303 foreach ( $cdnServers as $server ) {
304 $reqs[] = ( $baseReq + [ 'proxy' => $server ] );
305 }
306 }
307
308 $http = MediaWikiServices::getInstance()->getHttpRequestFactory()
309 ->createMultiClient( [ 'maxConnsPerHost' => 8, 'usePipelining' => true ] );
310 $http->runMulti( $reqs );
311 }
312
327 private static function expand( $url ) {
328 return (string)MediaWikiServices::getInstance()->getUrlUtils()->expand( $url, PROTO_INTERNAL );
329 }
330
337 private static function getRuleForURL( $url, $rules ) {
338 foreach ( $rules as $regex => $routing ) {
339 if ( $regex === '' || preg_match( $regex, $url ) ) {
340 return $routing;
341 }
342 }
343
344 return false;
345 }
346}
const MW_VERSION
The running version of MediaWiki.
Definition Defines.php:36
const PROTO_INTERNAL
Definition Defines.php:198
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Handles purging the appropriate CDN objects given a list of URLs or Title instances.
__construct(array $targets, array $options=[])
static purge(array $urls)
Purges a list of CDN nodes defined in $wgCdnServers.
doUpdate()
Perform the actual work.
merge(MergeableUpdate $update)
Merge this enqueued update with a new MergeableUpdate of the same qualified class name.
Job to purge a set of URLs from CDN.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Interface that deferrable updates should implement.
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
Interface that deferrable updates can implement to signal that updates can be combined.