MediaWiki REL1_37
CdnCacheUpdate.php
Go to the documentation of this file.
1<?php
23use Wikimedia\Assert\Assert;
24
31 private $urlTuples = [];
33 private $pageTuples = [];
34
36 private const MAX_REBOUND_DELAY = 300;
37
44 public function __construct( array $targets, array $options = [] ) {
45 $delay = min(
46 (int)max( $options['reboundDelay'] ?? 0, 0 ),
47 self::MAX_REBOUND_DELAY
48 );
49
50 foreach ( $targets as $target ) {
51 if ( $target instanceof PageReference ) {
52 $this->pageTuples[] = [ $target, $delay ];
53 } else {
54 $this->urlTuples[] = [ $target, $delay ];
55 }
56 }
57 }
58
59 public function merge( MergeableUpdate $update ) {
61 Assert::parameterType( __CLASS__, $update, '$update' );
62 '@phan-var self $update';
63
64 $this->urlTuples = array_merge( $this->urlTuples, $update->urlTuples );
65 $this->pageTuples = array_merge( $this->pageTuples, $update->pageTuples );
66 }
67
77 public static function newFromTitles( $pages, $urls = [] ) {
78 return new CdnCacheUpdate( array_merge( $pages, $urls ) );
79 }
80
81 public function doUpdate() {
82 // Resolve the final list of URLs just before purging them (T240083)
83 $reboundDelayByUrl = $this->resolveReboundDelayByUrl();
84
85 // Send the immediate purges to CDN
86 self::purge( array_keys( $reboundDelayByUrl ) );
87 $immediatePurgeTimestamp = time();
88
89 // Get the URLs that need rebound purges, grouped by seconds of purge delay
90 $urlsWithReboundByDelay = [];
91 foreach ( $reboundDelayByUrl as $url => $delay ) {
92 if ( $delay > 0 ) {
93 $urlsWithReboundByDelay[$delay][] = $url;
94 }
95 }
96 // Enqueue delayed purge jobs for these URLs (usually only one job)
97 $jobs = [];
98 foreach ( $urlsWithReboundByDelay as $delay => $urls ) {
99 $jobs[] = new CdnPurgeJob( [
100 'urls' => $urls,
101 'jobReleaseTimestamp' => $immediatePurgeTimestamp + $delay
102 ] );
103 }
104 JobQueueGroup::singleton()->lazyPush( $jobs );
105 }
106
114 public static function purge( array $urls ) {
116
117 if ( !$urls ) {
118 return;
119 }
120
121 // Remove duplicate URLs from list
122 $urls = array_unique( $urls );
123
124 wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urls ) );
125
126 // Reliably broadcast the purge to all edge nodes
127 $ts = microtime( true );
128 $relayerGroup = MediaWikiServices::getInstance()->getEventRelayerGroup();
129 $relayerGroup->getRelayer( 'cdn-url-purges' )->notifyMulti(
130 'cdn-url-purges',
131 array_map(
132 static function ( $url ) use ( $ts ) {
133 return [
134 'url' => $url,
135 'timestamp' => $ts,
136 ];
137 },
138 $urls
139 )
140 );
141
142 // Send lossy UDP broadcasting if enabled
143 if ( $wgHTCPRouting ) {
144 self::HTCPPurge( $urls );
145 }
146
147 // Do direct server purges if enabled (this does not scale very well)
148 if ( $wgCdnServers ) {
149 self::naivePurge( $urls );
150 }
151 }
152
156 public function getUrls() {
157 return array_keys( $this->resolveReboundDelayByUrl() );
158 }
159
163 private function resolveReboundDelayByUrl() {
164 $services = MediaWikiServices::getInstance();
167 // Avoid multiple queries for HtmlCacheUpdater::getUrls() call
168 $lb = $services->getLinkBatchFactory()->newLinkBatch();
169 foreach ( $this->pageTuples as list( $page, $delay ) ) {
170 $lb->addObj( $page );
171 }
172 $lb->execute();
173
174 $reboundDelayByUrl = [];
175
176 // Resolve the titles into CDN URLs
177 $htmlCacheUpdater = $services->getHtmlCacheUpdater();
178 foreach ( $this->pageTuples as list( $page, $delay ) ) {
179 foreach ( $htmlCacheUpdater->getUrls( $page ) as $url ) {
180 // Use the highest rebound for duplicate URLs in order to handle the most lag
181 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
182 }
183 }
184
185 foreach ( $this->urlTuples as list( $url, $delay ) ) {
186 // Use the highest rebound for duplicate URLs in order to handle the most lag
187 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
188 }
189
190 return $reboundDelayByUrl;
191 }
192
199 private static function HTCPPurge( array $urls ) {
201
202 // HTCP CLR operation
203 $htcpOpCLR = 4;
204
205 // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
206 if ( !defined( "IPPROTO_IP" ) ) {
207 define( "IPPROTO_IP", 0 );
208 define( "IP_MULTICAST_LOOP", 34 );
209 define( "IP_MULTICAST_TTL", 33 );
210 }
211
212 // pfsockopen doesn't work because we need set_sock_opt
213 $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP );
214 if ( !$conn ) {
215 $errstr = socket_strerror( socket_last_error() );
216 wfDebugLog( 'squid', __METHOD__ .
217 ": Error opening UDP socket: $errstr" );
218
219 return;
220 }
221
222 // Set socket options
223 socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 );
224 if ( $wgHTCPMulticastTTL != 1 ) {
225 // Set multicast time to live (hop count) option on socket
226 socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL,
228 }
229
230 // Get sequential trx IDs for packet loss counting
231 $idGenerator = MediaWikiServices::getInstance()->getGlobalIdGenerator();
232 $ids = $idGenerator->newSequentialPerNodeIDs(
233 'squidhtcppurge',
234 32,
235 count( $urls )
236 );
237
238 foreach ( $urls as $url ) {
239 if ( !is_string( $url ) ) {
240 throw new MWException( 'Bad purge URL' );
241 }
242 $url = self::expand( $url );
243 $conf = self::getRuleForURL( $url, $wgHTCPRouting );
244 if ( !$conf ) {
245 wfDebugLog( 'squid', __METHOD__ .
246 "No HTCP rule configured for URL {$url} , skipping" );
247 continue;
248 }
249
250 if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
251 // Normalize single entries
252 $conf = [ $conf ];
253 }
254 foreach ( $conf as $subconf ) {
255 if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) {
256 throw new MWException( "Invalid HTCP rule for URL $url\n" );
257 }
258 }
259
260 // Construct a minimal HTCP request diagram
261 // as per RFC 2756
262 // Opcode 'CLR', no response desired, no auth
263 $htcpTransID = current( $ids );
264 next( $ids );
265
266 $htcpSpecifier = pack( 'na4na*na8n',
267 4, 'HEAD', strlen( $url ), $url,
268 8, 'HTTP/1.0', 0 );
269
270 $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier );
271 $htcpLen = 4 + $htcpDataLen + 2;
272
273 // Note! Squid gets the bit order of the first
274 // word wrong, wrt the RFC. Apparently no other
275 // implementation exists, so adapt to Squid
276 $htcpPacket = pack( 'nxxnCxNxxa*n',
277 $htcpLen, $htcpDataLen, $htcpOpCLR,
278 $htcpTransID, $htcpSpecifier, 2 );
279
280 wfDebugLog( 'squid', __METHOD__ .
281 "Purging URL $url via HTCP" );
282 foreach ( $conf as $subconf ) {
283 socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
284 $subconf['host'], $subconf['port'] );
285 }
286 }
287 }
288
295 private static function naivePurge( array $urls ) {
296 global $wgCdnServers;
297
298 $reqs = [];
299 foreach ( $urls as $url ) {
300 $url = self::expand( $url );
301 $urlInfo = wfParseUrl( $url );
302 $urlHost = strlen( $urlInfo['port'] ?? null )
303 ? IP::combineHostAndPort( $urlInfo['host'], $urlInfo['port'] )
304 : $urlInfo['host'];
305 $baseReq = [
306 'method' => 'PURGE',
307 'url' => $url,
308 'headers' => [
309 'Host' => $urlHost,
310 'Connection' => 'Keep-Alive',
311 'Proxy-Connection' => 'Keep-Alive',
312 'User-Agent' => 'MediaWiki/' . MW_VERSION . ' ' . __CLASS__
313 ]
314 ];
315 foreach ( $wgCdnServers as $server ) {
316 $reqs[] = ( $baseReq + [ 'proxy' => $server ] );
317 }
318 }
319
320 $http = MediaWikiServices::getInstance()->getHttpRequestFactory()
321 ->createMultiClient( [ 'maxConnsPerHost' => 8, 'usePipelining' => true ] );
322 $http->runMulti( $reqs );
323 }
324
339 private static function expand( $url ) {
340 return wfExpandUrl( $url, PROTO_INTERNAL );
341 }
342
349 private static function getRuleForURL( $url, $rules ) {
350 foreach ( $rules as $regex => $routing ) {
351 if ( $regex === '' || preg_match( $regex, $url ) ) {
352 return $routing;
353 }
354 }
355
356 return false;
357 }
358}
$wgHTCPRouting
Routing configuration for HTCP multicast purging.
$wgCdnServers
List of proxy servers to purge on changes; default port is 80.
$wgHTCPMulticastTTL
HTCP multicast TTL.
const MW_VERSION
The running version of MediaWiki.
Definition Defines.php:36
const PROTO_INTERNAL
Definition Defines.php:197
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Handles purging the appropriate CDN objects given a list of URLs or Title instances.
__construct(array $targets, array $options=[])
static newFromTitles( $pages, $urls=[])
Create an update object from an array of Title objects, or a TitleArray object.
array[] $urlTuples
List of (URL, rebound purge delay) tuples.
static purge(array $urls)
Purges a list of CDN nodes defined in $wgCdnServers.
static getRuleForURL( $url, $rules)
Find the HTCP routing rule to use for a given URL.
static HTCPPurge(array $urls)
Send Hyper Text Caching Protocol (HTCP) CLR requests.
doUpdate()
Perform the actual work.
array[] $pageTuples
List of (PageReference, rebound purge delay) tuples.
static expand( $url)
Expand local URLs to fully-qualified URLs using the internal protocol and host defined in $wgInternal...
merge(MergeableUpdate $update)
Merge this enqueued update with a new MergeableUpdate of the same qualified class name.
static naivePurge(array $urls)
Send HTTP PURGE requests for each of the URLs to all of the cache servers.
Job to purge a set of URLs from CDN.
MediaWiki exception.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Interface that deferrable updates should implement.
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
Interface that deferrable updates can implement to signal that updates can be combined.