MediaWiki master
CdnCacheUpdate.php
Go to the documentation of this file.
1<?php
7namespace MediaWiki\Deferred;
8
9use Exception;
10use InvalidArgumentException;
15use RuntimeException;
16use Wikimedia\Assert\Assert;
17use Wikimedia\IPUtils;
18
25 private $urlTuples = [];
27 private $pageTuples = [];
28
30 private const MAX_REBOUND_DELAY = 300;
31
38 public function __construct( array $targets, array $options = [] ) {
39 $delay = min(
40 (int)max( $options['reboundDelay'] ?? 0, 0 ),
41 self::MAX_REBOUND_DELAY
42 );
43
44 foreach ( $targets as $target ) {
45 if ( $target instanceof PageReference ) {
46 $this->pageTuples[] = [ $target, $delay ];
47 } else {
48 $this->urlTuples[] = [ $target, $delay ];
49 }
50 }
51 }
52
53 public function merge( MergeableUpdate $update ) {
55 Assert::parameterType( __CLASS__, $update, '$update' );
56 '@phan-var self $update';
57
58 $this->urlTuples = array_merge( $this->urlTuples, $update->urlTuples );
59 $this->pageTuples = array_merge( $this->pageTuples, $update->pageTuples );
60 }
61
62 public function doUpdate() {
63 // Resolve the final list of URLs just before purging them (T240083)
64 $reboundDelayByUrl = $this->resolveReboundDelayByUrl();
65
66 // Send the immediate purges to CDN
67 self::purge( array_keys( $reboundDelayByUrl ) );
68 $immediatePurgeTimestamp = time();
69
70 // Get the URLs that need rebound purges, grouped by seconds of purge delay
71 $urlsWithReboundByDelay = [];
72 foreach ( $reboundDelayByUrl as $url => $delay ) {
73 if ( $delay > 0 ) {
74 $urlsWithReboundByDelay[$delay][] = $url;
75 }
76 }
77 // Enqueue delayed purge jobs for these URLs (usually only one job)
78 $jobs = [];
79 foreach ( $urlsWithReboundByDelay as $delay => $urls ) {
80 $jobs[] = new CdnPurgeJob( [
81 'urls' => $urls,
82 'jobReleaseTimestamp' => $immediatePurgeTimestamp + $delay
83 ] );
84 }
85 MediaWikiServices::getInstance()->getJobQueueGroup()->lazyPush( $jobs );
86 }
87
95 public static function purge( array $urls ) {
96 $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers );
97 $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting );
98 if ( !$urls ) {
99 return;
100 }
101
102 // Remove duplicate URLs from list
103 $urls = array_unique( $urls );
104
105 wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urls ) );
106
107 // Reliably broadcast the purge to all edge nodes
108 $ts = microtime( true );
109 $relayerGroup = MediaWikiServices::getInstance()->getEventRelayerGroup();
110 $relayerGroup->getRelayer( 'cdn-url-purges' )->notifyMulti(
111 'cdn-url-purges',
112 array_map(
113 static function ( $url ) use ( $ts ) {
114 return [
115 'url' => $url,
116 'timestamp' => $ts,
117 ];
118 },
119 $urls
120 )
121 );
122
123 // Send lossy UDP broadcasting if enabled
124 if ( $htcpRouting ) {
125 self::HTCPPurge( $urls );
126 }
127
128 // Do direct server purges if enabled (this does not scale very well)
129 if ( $cdnServers ) {
130 self::naivePurge( $urls );
131 }
132 }
133
137 public function getUrls() {
138 return array_keys( $this->resolveReboundDelayByUrl() );
139 }
140
144 private function resolveReboundDelayByUrl() {
145 $services = MediaWikiServices::getInstance();
148 // Avoid multiple queries for HTMLCacheUpdater::getUrls() call
149 $lb = $services->getLinkBatchFactory()->newLinkBatch()
150 ->setCaller( __METHOD__ );
151 foreach ( $this->pageTuples as [ $page, ] ) {
152 $lb->addObj( $page );
153 }
154 $lb->execute();
155
156 $reboundDelayByUrl = [];
157
158 // Resolve the titles into CDN URLs
159 $htmlCacheUpdater = $services->getHtmlCacheUpdater();
160 foreach ( $this->pageTuples as [ $page, $delay ] ) {
161 foreach ( $htmlCacheUpdater->getUrls( $page ) as $url ) {
162 // Use the highest rebound for duplicate URLs in order to handle the most lag
163 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
164 }
165 }
166
167 foreach ( $this->urlTuples as [ $url, $delay ] ) {
168 // Use the highest rebound for duplicate URLs in order to handle the most lag
169 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
170 }
171
172 return $reboundDelayByUrl;
173 }
174
180 private static function HTCPPurge( array $urls ) {
181 $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting );
182 $htcpMulticastTTL = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPMulticastTTL );
183 // HTCP CLR operation
184 $htcpOpCLR = 4;
185
186 // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
187 if ( !defined( "IPPROTO_IP" ) ) {
188 define( "IPPROTO_IP", 0 );
189 define( "IP_MULTICAST_LOOP", 34 );
190 define( "IP_MULTICAST_TTL", 33 );
191 }
192
193 // pfsockopen doesn't work because we need set_sock_opt
194 $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP );
195 if ( !$conn ) {
196 $errstr = socket_strerror( socket_last_error() );
197 wfDebugLog( 'squid', __METHOD__ .
198 ": Error opening UDP socket: $errstr" );
199
200 return;
201 }
202
203 // Set socket options
204 socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 );
205 if ( $htcpMulticastTTL != 1 ) {
206 // Set multicast time to live (hop count) option on socket
207 socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL,
208 $htcpMulticastTTL );
209 }
210
211 // Get sequential trx IDs for packet loss counting
212 $idGenerator = MediaWikiServices::getInstance()->getGlobalIdGenerator();
213 $ids = $idGenerator->newSequentialPerNodeIDs(
214 'squidhtcppurge',
215 32,
216 count( $urls )
217 );
218
219 foreach ( $urls as $url ) {
220 if ( !is_string( $url ) ) {
221 throw new InvalidArgumentException( 'Bad purge URL' );
222 }
223 $url = self::expand( $url );
224 $conf = self::getRuleForURL( $url, $htcpRouting );
225 if ( !$conf ) {
226 wfDebugLog( 'squid', __METHOD__ .
227 "No HTCP rule configured for URL {$url} , skipping" );
228 continue;
229 }
230
231 if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
232 // Normalize single entries
233 $conf = [ $conf ];
234 }
235 foreach ( $conf as $subconf ) {
236 if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) {
237 throw new RuntimeException( "Invalid HTCP rule for URL $url\n" );
238 }
239 }
240
241 // Construct a minimal HTCP request diagram
242 // as per RFC 2756
243 // Opcode 'CLR', no response desired, no auth
244 $htcpTransID = current( $ids );
245 next( $ids );
246
247 $htcpSpecifier = pack( 'na4na*na8n',
248 4, 'HEAD', strlen( $url ), $url,
249 8, 'HTTP/1.0', 0 );
250
251 $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier );
252 $htcpLen = 4 + $htcpDataLen + 2;
253
254 // Note! Squid gets the bit order of the first
255 // word wrong, wrt the RFC. Apparently no other
256 // implementation exists, so adapt to Squid
257 $htcpPacket = pack( 'nxxnCxNxxa*n',
258 $htcpLen, $htcpDataLen, $htcpOpCLR,
259 $htcpTransID, $htcpSpecifier, 2 );
260
261 wfDebugLog( 'squid', __METHOD__ .
262 "Purging URL $url via HTCP" );
263 foreach ( $conf as $subconf ) {
264 socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
265 $subconf['host'], $subconf['port'] );
266 }
267 }
268 }
269
276 private static function naivePurge( array $urls ) {
277 $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers );
278
279 $reqs = [];
280 foreach ( $urls as $url ) {
281 $url = self::expand( $url );
282 $urlInfo = wfGetUrlUtils()->parse( $url ) ?? false;
283 $urlHost = strlen( $urlInfo['port'] ?? '' )
284 ? IPUtils::combineHostAndPort( $urlInfo['host'], (int)$urlInfo['port'] )
285 : $urlInfo['host'];
286 $baseReq = [
287 'method' => 'PURGE',
288 'url' => $url,
289 'headers' => [
290 'Host' => $urlHost,
291 'Connection' => 'Keep-Alive',
292 'Proxy-Connection' => 'Keep-Alive',
293 'User-Agent' => 'MediaWiki/' . MW_VERSION . ' ' . __CLASS__
294 ]
295 ];
296 foreach ( $cdnServers as $server ) {
297 $reqs[] = ( $baseReq + [ 'proxy' => $server ] );
298 }
299 }
300
301 $http = MediaWikiServices::getInstance()->getHttpRequestFactory()
302 ->createMultiClient( [ 'maxConnsPerHost' => 8, 'usePipelining' => true ] );
303 $http->runMulti( $reqs );
304 }
305
320 private static function expand( $url ) {
321 return (string)MediaWikiServices::getInstance()->getUrlUtils()->expand( $url, PROTO_INTERNAL );
322 }
323
330 private static function getRuleForURL( $url, $rules ) {
331 foreach ( $rules as $regex => $routing ) {
332 if ( $regex === '' || preg_match( $regex, $url ) ) {
333 return $routing;
334 }
335 }
336
337 return false;
338 }
339}
340
342class_alias( CdnCacheUpdate::class, 'CdnCacheUpdate' );
const MW_VERSION
The running version of MediaWiki.
Definition Defines.php:23
const PROTO_INTERNAL
Definition Defines.php:224
wfGetUrlUtils()
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Handles purging the appropriate CDN objects given a list of URLs or Title instances.
doUpdate()
Perform the actual work.
merge(MergeableUpdate $update)
Merge this enqueued update with a new MergeableUpdate of the same qualified class name.
__construct(array $targets, array $options=[])
static purge(array $urls)
Purges a list of CDN nodes defined in $wgCdnServers.
Job to purge a set of URLs from CDN.
A class containing constants representing the names of configuration variables.
const HTCPMulticastTTL
Name constant for the HTCPMulticastTTL setting, for use with Config::get()
const CdnServers
Name constant for the CdnServers setting, for use with Config::get()
const HTCPRouting
Name constant for the HTCPRouting setting, for use with Config::get()
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Interface that deferrable updates should implement.
Interface that deferrable updates can implement to signal that updates can be combined.
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.