MediaWiki REL1_40
CdnCacheUpdate.php
Go to the documentation of this file.
1<?php
24use Wikimedia\Assert\Assert;
25use Wikimedia\IPUtils;
26
33 private $urlTuples = [];
35 private $pageTuples = [];
36
38 private const MAX_REBOUND_DELAY = 300;
39
46 public function __construct( array $targets, array $options = [] ) {
47 $delay = min(
48 (int)max( $options['reboundDelay'] ?? 0, 0 ),
49 self::MAX_REBOUND_DELAY
50 );
51
52 foreach ( $targets as $target ) {
53 if ( $target instanceof PageReference ) {
54 $this->pageTuples[] = [ $target, $delay ];
55 } else {
56 $this->urlTuples[] = [ $target, $delay ];
57 }
58 }
59 }
60
61 public function merge( MergeableUpdate $update ) {
63 Assert::parameterType( __CLASS__, $update, '$update' );
64 '@phan-var self $update';
65
66 $this->urlTuples = array_merge( $this->urlTuples, $update->urlTuples );
67 $this->pageTuples = array_merge( $this->pageTuples, $update->pageTuples );
68 }
69
70 public function doUpdate() {
71 // Resolve the final list of URLs just before purging them (T240083)
72 $reboundDelayByUrl = $this->resolveReboundDelayByUrl();
73
74 // Send the immediate purges to CDN
75 self::purge( array_keys( $reboundDelayByUrl ) );
76 $immediatePurgeTimestamp = time();
77
78 // Get the URLs that need rebound purges, grouped by seconds of purge delay
79 $urlsWithReboundByDelay = [];
80 foreach ( $reboundDelayByUrl as $url => $delay ) {
81 if ( $delay > 0 ) {
82 $urlsWithReboundByDelay[$delay][] = $url;
83 }
84 }
85 // Enqueue delayed purge jobs for these URLs (usually only one job)
86 $jobs = [];
87 foreach ( $urlsWithReboundByDelay as $delay => $urls ) {
88 $jobs[] = new CdnPurgeJob( [
89 'urls' => $urls,
90 'jobReleaseTimestamp' => $immediatePurgeTimestamp + $delay
91 ] );
92 }
93 MediaWikiServices::getInstance()->getJobQueueGroup()->lazyPush( $jobs );
94 }
95
103 public static function purge( array $urls ) {
104 $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers );
105 $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting );
106 if ( !$urls ) {
107 return;
108 }
109
110 // Remove duplicate URLs from list
111 $urls = array_unique( $urls );
112
113 wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urls ) );
114
115 // Reliably broadcast the purge to all edge nodes
116 $ts = microtime( true );
117 $relayerGroup = MediaWikiServices::getInstance()->getEventRelayerGroup();
118 $relayerGroup->getRelayer( 'cdn-url-purges' )->notifyMulti(
119 'cdn-url-purges',
120 array_map(
121 static function ( $url ) use ( $ts ) {
122 return [
123 'url' => $url,
124 'timestamp' => $ts,
125 ];
126 },
127 $urls
128 )
129 );
130
131 // Send lossy UDP broadcasting if enabled
132 if ( $htcpRouting ) {
133 self::HTCPPurge( $urls );
134 }
135
136 // Do direct server purges if enabled (this does not scale very well)
137 if ( $cdnServers ) {
138 self::naivePurge( $urls );
139 }
140 }
141
145 public function getUrls() {
146 return array_keys( $this->resolveReboundDelayByUrl() );
147 }
148
152 private function resolveReboundDelayByUrl() {
153 $services = MediaWikiServices::getInstance();
156 // Avoid multiple queries for HtmlCacheUpdater::getUrls() call
157 $lb = $services->getLinkBatchFactory()->newLinkBatch();
158 foreach ( $this->pageTuples as [ $page, ] ) {
159 $lb->addObj( $page );
160 }
161 $lb->execute();
162
163 $reboundDelayByUrl = [];
164
165 // Resolve the titles into CDN URLs
166 $htmlCacheUpdater = $services->getHtmlCacheUpdater();
167 foreach ( $this->pageTuples as [ $page, $delay ] ) {
168 foreach ( $htmlCacheUpdater->getUrls( $page ) as $url ) {
169 // Use the highest rebound for duplicate URLs in order to handle the most lag
170 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
171 }
172 }
173
174 foreach ( $this->urlTuples as [ $url, $delay ] ) {
175 // Use the highest rebound for duplicate URLs in order to handle the most lag
176 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
177 }
178
179 return $reboundDelayByUrl;
180 }
181
188 private static function HTCPPurge( array $urls ) {
189 $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting );
190 $htcpMulticastTTL = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPMulticastTTL );
191 // HTCP CLR operation
192 $htcpOpCLR = 4;
193
194 // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
195 if ( !defined( "IPPROTO_IP" ) ) {
196 define( "IPPROTO_IP", 0 );
197 define( "IP_MULTICAST_LOOP", 34 );
198 define( "IP_MULTICAST_TTL", 33 );
199 }
200
201 // pfsockopen doesn't work because we need set_sock_opt
202 $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP );
203 if ( !$conn ) {
204 $errstr = socket_strerror( socket_last_error() );
205 wfDebugLog( 'squid', __METHOD__ .
206 ": Error opening UDP socket: $errstr" );
207
208 return;
209 }
210
211 // Set socket options
212 socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 );
213 if ( $htcpMulticastTTL != 1 ) {
214 // Set multicast time to live (hop count) option on socket
215 socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL,
216 $htcpMulticastTTL );
217 }
218
219 // Get sequential trx IDs for packet loss counting
220 $idGenerator = MediaWikiServices::getInstance()->getGlobalIdGenerator();
221 $ids = $idGenerator->newSequentialPerNodeIDs(
222 'squidhtcppurge',
223 32,
224 count( $urls )
225 );
226
227 foreach ( $urls as $url ) {
228 if ( !is_string( $url ) ) {
229 throw new MWException( 'Bad purge URL' );
230 }
231 $url = self::expand( $url );
232 $conf = self::getRuleForURL( $url, $htcpRouting );
233 if ( !$conf ) {
234 wfDebugLog( 'squid', __METHOD__ .
235 "No HTCP rule configured for URL {$url} , skipping" );
236 continue;
237 }
238
239 if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
240 // Normalize single entries
241 $conf = [ $conf ];
242 }
243 foreach ( $conf as $subconf ) {
244 if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) {
245 throw new MWException( "Invalid HTCP rule for URL $url\n" );
246 }
247 }
248
249 // Construct a minimal HTCP request diagram
250 // as per RFC 2756
251 // Opcode 'CLR', no response desired, no auth
252 $htcpTransID = current( $ids );
253 next( $ids );
254
255 $htcpSpecifier = pack( 'na4na*na8n',
256 4, 'HEAD', strlen( $url ), $url,
257 8, 'HTTP/1.0', 0 );
258
259 $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier );
260 $htcpLen = 4 + $htcpDataLen + 2;
261
262 // Note! Squid gets the bit order of the first
263 // word wrong, wrt the RFC. Apparently no other
264 // implementation exists, so adapt to Squid
265 $htcpPacket = pack( 'nxxnCxNxxa*n',
266 $htcpLen, $htcpDataLen, $htcpOpCLR,
267 $htcpTransID, $htcpSpecifier, 2 );
268
269 wfDebugLog( 'squid', __METHOD__ .
270 "Purging URL $url via HTCP" );
271 foreach ( $conf as $subconf ) {
272 socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
273 $subconf['host'], $subconf['port'] );
274 }
275 }
276 }
277
284 private static function naivePurge( array $urls ) {
285 $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers );
286
287 $reqs = [];
288 foreach ( $urls as $url ) {
289 $url = self::expand( $url );
290 $urlInfo = wfParseUrl( $url );
291 $urlHost = strlen( $urlInfo['port'] ?? '' )
292 ? IPUtils::combineHostAndPort( $urlInfo['host'], (int)$urlInfo['port'] )
293 : $urlInfo['host'];
294 $baseReq = [
295 'method' => 'PURGE',
296 'url' => $url,
297 'headers' => [
298 'Host' => $urlHost,
299 'Connection' => 'Keep-Alive',
300 'Proxy-Connection' => 'Keep-Alive',
301 'User-Agent' => 'MediaWiki/' . MW_VERSION . ' ' . __CLASS__
302 ]
303 ];
304 foreach ( $cdnServers as $server ) {
305 $reqs[] = ( $baseReq + [ 'proxy' => $server ] );
306 }
307 }
308
309 $http = MediaWikiServices::getInstance()->getHttpRequestFactory()
310 ->createMultiClient( [ 'maxConnsPerHost' => 8, 'usePipelining' => true ] );
311 $http->runMulti( $reqs );
312 }
313
328 private static function expand( $url ) {
329 return wfExpandUrl( $url, PROTO_INTERNAL );
330 }
331
338 private static function getRuleForURL( $url, $rules ) {
339 foreach ( $rules as $regex => $routing ) {
340 if ( $regex === '' || preg_match( $regex, $url ) ) {
341 return $routing;
342 }
343 }
344
345 return false;
346 }
347}
const MW_VERSION
The running version of MediaWiki.
Definition Defines.php:36
const PROTO_INTERNAL
Definition Defines.php:200
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Handles purging the appropriate CDN objects given a list of URLs or Title instances.
__construct(array $targets, array $options=[])
static purge(array $urls)
Purges a list of CDN nodes defined in $wgCdnServers.
doUpdate()
Perform the actual work.
merge(MergeableUpdate $update)
Merge this enqueued update with a new MergeableUpdate of the same qualified class name.
Job to purge a set of URLs from CDN.
MediaWiki exception.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Interface that deferrable updates should implement.
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
Interface that deferrable updates can implement to signal that updates can be combined.