MediaWiki master
CdnCacheUpdate.php
Go to the documentation of this file.
1<?php
21namespace MediaWiki\Deferred;
22
23use CdnPurgeJob;
24use Exception;
25use InvalidArgumentException;
29use RuntimeException;
30use Wikimedia\Assert\Assert;
31use Wikimedia\IPUtils;
32
39 private $urlTuples = [];
41 private $pageTuples = [];
42
44 private const MAX_REBOUND_DELAY = 300;
45
52 public function __construct( array $targets, array $options = [] ) {
53 $delay = min(
54 (int)max( $options['reboundDelay'] ?? 0, 0 ),
55 self::MAX_REBOUND_DELAY
56 );
57
58 foreach ( $targets as $target ) {
59 if ( $target instanceof PageReference ) {
60 $this->pageTuples[] = [ $target, $delay ];
61 } else {
62 $this->urlTuples[] = [ $target, $delay ];
63 }
64 }
65 }
66
67 public function merge( MergeableUpdate $update ) {
69 Assert::parameterType( __CLASS__, $update, '$update' );
70 '@phan-var self $update';
71
72 $this->urlTuples = array_merge( $this->urlTuples, $update->urlTuples );
73 $this->pageTuples = array_merge( $this->pageTuples, $update->pageTuples );
74 }
75
76 public function doUpdate() {
77 // Resolve the final list of URLs just before purging them (T240083)
78 $reboundDelayByUrl = $this->resolveReboundDelayByUrl();
79
80 // Send the immediate purges to CDN
81 self::purge( array_keys( $reboundDelayByUrl ) );
82 $immediatePurgeTimestamp = time();
83
84 // Get the URLs that need rebound purges, grouped by seconds of purge delay
85 $urlsWithReboundByDelay = [];
86 foreach ( $reboundDelayByUrl as $url => $delay ) {
87 if ( $delay > 0 ) {
88 $urlsWithReboundByDelay[$delay][] = $url;
89 }
90 }
91 // Enqueue delayed purge jobs for these URLs (usually only one job)
92 $jobs = [];
93 foreach ( $urlsWithReboundByDelay as $delay => $urls ) {
94 $jobs[] = new CdnPurgeJob( [
95 'urls' => $urls,
96 'jobReleaseTimestamp' => $immediatePurgeTimestamp + $delay
97 ] );
98 }
99 MediaWikiServices::getInstance()->getJobQueueGroup()->lazyPush( $jobs );
100 }
101
109 public static function purge( array $urls ) {
110 $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers );
111 $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting );
112 if ( !$urls ) {
113 return;
114 }
115
116 // Remove duplicate URLs from list
117 $urls = array_unique( $urls );
118
119 wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urls ) );
120
121 // Reliably broadcast the purge to all edge nodes
122 $ts = microtime( true );
123 $relayerGroup = MediaWikiServices::getInstance()->getEventRelayerGroup();
124 $relayerGroup->getRelayer( 'cdn-url-purges' )->notifyMulti(
125 'cdn-url-purges',
126 array_map(
127 static function ( $url ) use ( $ts ) {
128 return [
129 'url' => $url,
130 'timestamp' => $ts,
131 ];
132 },
133 $urls
134 )
135 );
136
137 // Send lossy UDP broadcasting if enabled
138 if ( $htcpRouting ) {
139 self::HTCPPurge( $urls );
140 }
141
142 // Do direct server purges if enabled (this does not scale very well)
143 if ( $cdnServers ) {
144 self::naivePurge( $urls );
145 }
146 }
147
151 public function getUrls() {
152 return array_keys( $this->resolveReboundDelayByUrl() );
153 }
154
158 private function resolveReboundDelayByUrl() {
159 $services = MediaWikiServices::getInstance();
162 // Avoid multiple queries for HTMLCacheUpdater::getUrls() call
163 $lb = $services->getLinkBatchFactory()->newLinkBatch();
164 foreach ( $this->pageTuples as [ $page, ] ) {
165 $lb->addObj( $page );
166 }
167 $lb->execute();
168
169 $reboundDelayByUrl = [];
170
171 // Resolve the titles into CDN URLs
172 $htmlCacheUpdater = $services->getHtmlCacheUpdater();
173 foreach ( $this->pageTuples as [ $page, $delay ] ) {
174 foreach ( $htmlCacheUpdater->getUrls( $page ) as $url ) {
175 // Use the highest rebound for duplicate URLs in order to handle the most lag
176 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
177 }
178 }
179
180 foreach ( $this->urlTuples as [ $url, $delay ] ) {
181 // Use the highest rebound for duplicate URLs in order to handle the most lag
182 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
183 }
184
185 return $reboundDelayByUrl;
186 }
187
193 private static function HTCPPurge( array $urls ) {
194 $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting );
195 $htcpMulticastTTL = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPMulticastTTL );
196 // HTCP CLR operation
197 $htcpOpCLR = 4;
198
199 // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
200 if ( !defined( "IPPROTO_IP" ) ) {
201 define( "IPPROTO_IP", 0 );
202 define( "IP_MULTICAST_LOOP", 34 );
203 define( "IP_MULTICAST_TTL", 33 );
204 }
205
206 // pfsockopen doesn't work because we need set_sock_opt
207 $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP );
208 if ( !$conn ) {
209 $errstr = socket_strerror( socket_last_error() );
210 wfDebugLog( 'squid', __METHOD__ .
211 ": Error opening UDP socket: $errstr" );
212
213 return;
214 }
215
216 // Set socket options
217 socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 );
218 if ( $htcpMulticastTTL != 1 ) {
219 // Set multicast time to live (hop count) option on socket
220 socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL,
221 $htcpMulticastTTL );
222 }
223
224 // Get sequential trx IDs for packet loss counting
225 $idGenerator = MediaWikiServices::getInstance()->getGlobalIdGenerator();
226 $ids = $idGenerator->newSequentialPerNodeIDs(
227 'squidhtcppurge',
228 32,
229 count( $urls )
230 );
231
232 foreach ( $urls as $url ) {
233 if ( !is_string( $url ) ) {
234 throw new InvalidArgumentException( 'Bad purge URL' );
235 }
236 $url = self::expand( $url );
237 $conf = self::getRuleForURL( $url, $htcpRouting );
238 if ( !$conf ) {
239 wfDebugLog( 'squid', __METHOD__ .
240 "No HTCP rule configured for URL {$url} , skipping" );
241 continue;
242 }
243
244 if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
245 // Normalize single entries
246 $conf = [ $conf ];
247 }
248 foreach ( $conf as $subconf ) {
249 if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) {
250 throw new RuntimeException( "Invalid HTCP rule for URL $url\n" );
251 }
252 }
253
254 // Construct a minimal HTCP request diagram
255 // as per RFC 2756
256 // Opcode 'CLR', no response desired, no auth
257 $htcpTransID = current( $ids );
258 next( $ids );
259
260 $htcpSpecifier = pack( 'na4na*na8n',
261 4, 'HEAD', strlen( $url ), $url,
262 8, 'HTTP/1.0', 0 );
263
264 $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier );
265 $htcpLen = 4 + $htcpDataLen + 2;
266
267 // Note! Squid gets the bit order of the first
268 // word wrong, wrt the RFC. Apparently no other
269 // implementation exists, so adapt to Squid
270 $htcpPacket = pack( 'nxxnCxNxxa*n',
271 $htcpLen, $htcpDataLen, $htcpOpCLR,
272 $htcpTransID, $htcpSpecifier, 2 );
273
274 wfDebugLog( 'squid', __METHOD__ .
275 "Purging URL $url via HTCP" );
276 foreach ( $conf as $subconf ) {
277 socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
278 $subconf['host'], $subconf['port'] );
279 }
280 }
281 }
282
289 private static function naivePurge( array $urls ) {
290 $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers );
291
292 $reqs = [];
293 foreach ( $urls as $url ) {
294 $url = self::expand( $url );
295 $urlInfo = wfParseUrl( $url );
296 $urlHost = strlen( $urlInfo['port'] ?? '' )
297 ? IPUtils::combineHostAndPort( $urlInfo['host'], (int)$urlInfo['port'] )
298 : $urlInfo['host'];
299 $baseReq = [
300 'method' => 'PURGE',
301 'url' => $url,
302 'headers' => [
303 'Host' => $urlHost,
304 'Connection' => 'Keep-Alive',
305 'Proxy-Connection' => 'Keep-Alive',
306 'User-Agent' => 'MediaWiki/' . MW_VERSION . ' ' . __CLASS__
307 ]
308 ];
309 foreach ( $cdnServers as $server ) {
310 $reqs[] = ( $baseReq + [ 'proxy' => $server ] );
311 }
312 }
313
314 $http = MediaWikiServices::getInstance()->getHttpRequestFactory()
315 ->createMultiClient( [ 'maxConnsPerHost' => 8, 'usePipelining' => true ] );
316 $http->runMulti( $reqs );
317 }
318
333 private static function expand( $url ) {
334 return (string)MediaWikiServices::getInstance()->getUrlUtils()->expand( $url, PROTO_INTERNAL );
335 }
336
343 private static function getRuleForURL( $url, $rules ) {
344 foreach ( $rules as $regex => $routing ) {
345 if ( $regex === '' || preg_match( $regex, $url ) ) {
346 return $routing;
347 }
348 }
349
350 return false;
351 }
352}
353
355class_alias( CdnCacheUpdate::class, 'CdnCacheUpdate' );
const MW_VERSION
The running version of MediaWiki.
Definition Defines.php:36
const PROTO_INTERNAL
Definition Defines.php:209
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Job to purge a set of URLs from CDN.
Handles purging the appropriate CDN objects given a list of URLs or Title instances.
doUpdate()
Perform the actual work.
merge(MergeableUpdate $update)
Merge this enqueued update with a new MergeableUpdate of the same qualified class name.
__construct(array $targets, array $options=[])
static purge(array $urls)
Purges a list of CDN nodes defined in $wgCdnServers.
A class containing constants representing the names of configuration variables.
const HTCPMulticastTTL
Name constant for the HTCPMulticastTTL setting, for use with Config::get()
const CdnServers
Name constant for the CdnServers setting, for use with Config::get()
const HTCPRouting
Name constant for the HTCPRouting setting, for use with Config::get()
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Interface that deferrable updates should implement.
Interface that deferrable updates can implement to signal that updates can be combined.
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.