MediaWiki master
CdnCacheUpdate.php
Go to the documentation of this file.
1<?php
21namespace MediaWiki\Deferred;
22
23use Exception;
24use InvalidArgumentException;
29use RuntimeException;
30use Wikimedia\Assert\Assert;
31use Wikimedia\IPUtils;
32
39 private $urlTuples = [];
41 private $pageTuples = [];
42
44 private const MAX_REBOUND_DELAY = 300;
45
52 public function __construct( array $targets, array $options = [] ) {
53 $delay = min(
54 (int)max( $options['reboundDelay'] ?? 0, 0 ),
55 self::MAX_REBOUND_DELAY
56 );
57
58 foreach ( $targets as $target ) {
59 if ( $target instanceof PageReference ) {
60 $this->pageTuples[] = [ $target, $delay ];
61 } else {
62 $this->urlTuples[] = [ $target, $delay ];
63 }
64 }
65 }
66
67 public function merge( MergeableUpdate $update ) {
69 Assert::parameterType( __CLASS__, $update, '$update' );
70 '@phan-var self $update';
71
72 $this->urlTuples = array_merge( $this->urlTuples, $update->urlTuples );
73 $this->pageTuples = array_merge( $this->pageTuples, $update->pageTuples );
74 }
75
76 public function doUpdate() {
77 // Resolve the final list of URLs just before purging them (T240083)
78 $reboundDelayByUrl = $this->resolveReboundDelayByUrl();
79
80 // Send the immediate purges to CDN
81 self::purge( array_keys( $reboundDelayByUrl ) );
82 $immediatePurgeTimestamp = time();
83
84 // Get the URLs that need rebound purges, grouped by seconds of purge delay
85 $urlsWithReboundByDelay = [];
86 foreach ( $reboundDelayByUrl as $url => $delay ) {
87 if ( $delay > 0 ) {
88 $urlsWithReboundByDelay[$delay][] = $url;
89 }
90 }
91 // Enqueue delayed purge jobs for these URLs (usually only one job)
92 $jobs = [];
93 foreach ( $urlsWithReboundByDelay as $delay => $urls ) {
94 $jobs[] = new CdnPurgeJob( [
95 'urls' => $urls,
96 'jobReleaseTimestamp' => $immediatePurgeTimestamp + $delay
97 ] );
98 }
99 MediaWikiServices::getInstance()->getJobQueueGroup()->lazyPush( $jobs );
100 }
101
109 public static function purge( array $urls ) {
110 $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers );
111 $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting );
112 if ( !$urls ) {
113 return;
114 }
115
116 // Remove duplicate URLs from list
117 $urls = array_unique( $urls );
118
119 wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urls ) );
120
121 // Reliably broadcast the purge to all edge nodes
122 $ts = microtime( true );
123 $relayerGroup = MediaWikiServices::getInstance()->getEventRelayerGroup();
124 $relayerGroup->getRelayer( 'cdn-url-purges' )->notifyMulti(
125 'cdn-url-purges',
126 array_map(
127 static function ( $url ) use ( $ts ) {
128 return [
129 'url' => $url,
130 'timestamp' => $ts,
131 ];
132 },
133 $urls
134 )
135 );
136
137 // Send lossy UDP broadcasting if enabled
138 if ( $htcpRouting ) {
139 self::HTCPPurge( $urls );
140 }
141
142 // Do direct server purges if enabled (this does not scale very well)
143 if ( $cdnServers ) {
144 self::naivePurge( $urls );
145 }
146 }
147
151 public function getUrls() {
152 return array_keys( $this->resolveReboundDelayByUrl() );
153 }
154
158 private function resolveReboundDelayByUrl() {
159 $services = MediaWikiServices::getInstance();
162 // Avoid multiple queries for HTMLCacheUpdater::getUrls() call
163 $lb = $services->getLinkBatchFactory()->newLinkBatch()
164 ->setCaller( __METHOD__ );
165 foreach ( $this->pageTuples as [ $page, ] ) {
166 $lb->addObj( $page );
167 }
168 $lb->execute();
169
170 $reboundDelayByUrl = [];
171
172 // Resolve the titles into CDN URLs
173 $htmlCacheUpdater = $services->getHtmlCacheUpdater();
174 foreach ( $this->pageTuples as [ $page, $delay ] ) {
175 foreach ( $htmlCacheUpdater->getUrls( $page ) as $url ) {
176 // Use the highest rebound for duplicate URLs in order to handle the most lag
177 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
178 }
179 }
180
181 foreach ( $this->urlTuples as [ $url, $delay ] ) {
182 // Use the highest rebound for duplicate URLs in order to handle the most lag
183 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
184 }
185
186 return $reboundDelayByUrl;
187 }
188
194 private static function HTCPPurge( array $urls ) {
195 $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting );
196 $htcpMulticastTTL = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPMulticastTTL );
197 // HTCP CLR operation
198 $htcpOpCLR = 4;
199
200 // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
201 if ( !defined( "IPPROTO_IP" ) ) {
202 define( "IPPROTO_IP", 0 );
203 define( "IP_MULTICAST_LOOP", 34 );
204 define( "IP_MULTICAST_TTL", 33 );
205 }
206
207 // pfsockopen doesn't work because we need set_sock_opt
208 $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP );
209 if ( !$conn ) {
210 $errstr = socket_strerror( socket_last_error() );
211 wfDebugLog( 'squid', __METHOD__ .
212 ": Error opening UDP socket: $errstr" );
213
214 return;
215 }
216
217 // Set socket options
218 socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 );
219 if ( $htcpMulticastTTL != 1 ) {
220 // Set multicast time to live (hop count) option on socket
221 socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL,
222 $htcpMulticastTTL );
223 }
224
225 // Get sequential trx IDs for packet loss counting
226 $idGenerator = MediaWikiServices::getInstance()->getGlobalIdGenerator();
227 $ids = $idGenerator->newSequentialPerNodeIDs(
228 'squidhtcppurge',
229 32,
230 count( $urls )
231 );
232
233 foreach ( $urls as $url ) {
234 if ( !is_string( $url ) ) {
235 throw new InvalidArgumentException( 'Bad purge URL' );
236 }
237 $url = self::expand( $url );
238 $conf = self::getRuleForURL( $url, $htcpRouting );
239 if ( !$conf ) {
240 wfDebugLog( 'squid', __METHOD__ .
241 "No HTCP rule configured for URL {$url} , skipping" );
242 continue;
243 }
244
245 if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
246 // Normalize single entries
247 $conf = [ $conf ];
248 }
249 foreach ( $conf as $subconf ) {
250 if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) {
251 throw new RuntimeException( "Invalid HTCP rule for URL $url\n" );
252 }
253 }
254
255 // Construct a minimal HTCP request diagram
256 // as per RFC 2756
257 // Opcode 'CLR', no response desired, no auth
258 $htcpTransID = current( $ids );
259 next( $ids );
260
261 $htcpSpecifier = pack( 'na4na*na8n',
262 4, 'HEAD', strlen( $url ), $url,
263 8, 'HTTP/1.0', 0 );
264
265 $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier );
266 $htcpLen = 4 + $htcpDataLen + 2;
267
268 // Note! Squid gets the bit order of the first
269 // word wrong, wrt the RFC. Apparently no other
270 // implementation exists, so adapt to Squid
271 $htcpPacket = pack( 'nxxnCxNxxa*n',
272 $htcpLen, $htcpDataLen, $htcpOpCLR,
273 $htcpTransID, $htcpSpecifier, 2 );
274
275 wfDebugLog( 'squid', __METHOD__ .
276 "Purging URL $url via HTCP" );
277 foreach ( $conf as $subconf ) {
278 socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
279 $subconf['host'], $subconf['port'] );
280 }
281 }
282 }
283
290 private static function naivePurge( array $urls ) {
291 $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers );
292
293 $reqs = [];
294 foreach ( $urls as $url ) {
295 $url = self::expand( $url );
296 $urlInfo = wfGetUrlUtils()->parse( $url ) ?? false;
297 $urlHost = strlen( $urlInfo['port'] ?? '' )
298 ? IPUtils::combineHostAndPort( $urlInfo['host'], (int)$urlInfo['port'] )
299 : $urlInfo['host'];
300 $baseReq = [
301 'method' => 'PURGE',
302 'url' => $url,
303 'headers' => [
304 'Host' => $urlHost,
305 'Connection' => 'Keep-Alive',
306 'Proxy-Connection' => 'Keep-Alive',
307 'User-Agent' => 'MediaWiki/' . MW_VERSION . ' ' . __CLASS__
308 ]
309 ];
310 foreach ( $cdnServers as $server ) {
311 $reqs[] = ( $baseReq + [ 'proxy' => $server ] );
312 }
313 }
314
315 $http = MediaWikiServices::getInstance()->getHttpRequestFactory()
316 ->createMultiClient( [ 'maxConnsPerHost' => 8, 'usePipelining' => true ] );
317 $http->runMulti( $reqs );
318 }
319
334 private static function expand( $url ) {
335 return (string)MediaWikiServices::getInstance()->getUrlUtils()->expand( $url, PROTO_INTERNAL );
336 }
337
344 private static function getRuleForURL( $url, $rules ) {
345 foreach ( $rules as $regex => $routing ) {
346 if ( $regex === '' || preg_match( $regex, $url ) ) {
347 return $routing;
348 }
349 }
350
351 return false;
352 }
353}
354
356class_alias( CdnCacheUpdate::class, 'CdnCacheUpdate' );
const MW_VERSION
The running version of MediaWiki.
Definition Defines.php:37
const PROTO_INTERNAL
Definition Defines.php:238
wfGetUrlUtils()
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Handles purging the appropriate CDN objects given a list of URLs or Title instances.
doUpdate()
Perform the actual work.
merge(MergeableUpdate $update)
Merge this enqueued update with a new MergeableUpdate of the same qualified class name.
__construct(array $targets, array $options=[])
static purge(array $urls)
Purges a list of CDN nodes defined in $wgCdnServers.
Job to purge a set of URLs from CDN.
A class containing constants representing the names of configuration variables.
const HTCPMulticastTTL
Name constant for the HTCPMulticastTTL setting, for use with Config::get()
const CdnServers
Name constant for the CdnServers setting, for use with Config::get()
const HTCPRouting
Name constant for the HTCPRouting setting, for use with Config::get()
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Interface that deferrable updates should implement.
Interface that deferrable updates can implement to signal that updates can be combined.
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.