MediaWiki  master
CdnCacheUpdate.php
Go to the documentation of this file.
1 <?php
22 use Wikimedia\Assert\Assert;
23 
30  private $urlTuples = [];
32  private $titleTuples = [];
33 
35  private const MAX_REBOUND_DELAY = 300;
36 
43  public function __construct( array $targets, array $options = [] ) {
44  $delay = min(
45  (int)max( $options['reboundDelay'] ?? 0, 0 ),
46  self::MAX_REBOUND_DELAY
47  );
48 
49  foreach ( $targets as $target ) {
50  if ( $target instanceof Title ) {
51  $this->titleTuples[] = [ $target, $delay ];
52  } else {
53  $this->urlTuples[] = [ $target, $delay ];
54  }
55  }
56  }
57 
58  public function merge( MergeableUpdate $update ) {
60  Assert::parameterType( __CLASS__, $update, '$update' );
61  '@phan-var self $update';
62 
63  $this->urlTuples = array_merge( $this->urlTuples, $update->urlTuples );
64  $this->titleTuples = array_merge( $this->titleTuples, $update->titleTuples );
65  }
66 
75  public static function newFromTitles( $titles, $urls = [] ) {
76  return new CdnCacheUpdate( array_merge( $titles, $urls ) );
77  }
78 
79  public function doUpdate() {
80  // Resolve the final list of URLs just before purging them (T240083)
81  $reboundDelayByUrl = $this->resolveReboundDelayByUrl();
82 
83  // Send the immediate purges to CDN
84  self::purge( array_keys( $reboundDelayByUrl ) );
85  $immediatePurgeTimestamp = time();
86 
87  // Get the URLs that need rebound purges, grouped by seconds of purge delay
88  $urlsWithReboundByDelay = [];
89  foreach ( $reboundDelayByUrl as $url => $delay ) {
90  if ( $delay > 0 ) {
91  $urlsWithReboundByDelay[$delay][] = $url;
92  }
93  }
94  // Enqueue delayed purge jobs for these URLs (usually only one job)
95  $jobs = [];
96  foreach ( $urlsWithReboundByDelay as $delay => $urls ) {
97  $jobs[] = new CdnPurgeJob( [
98  'urls' => $urls,
99  'jobReleaseTimestamp' => $immediatePurgeTimestamp + $delay
100  ] );
101  }
102  JobQueueGroup::singleton()->lazyPush( $jobs );
103  }
104 
112  public static function purge( array $urls ) {
114 
115  if ( !$urls ) {
116  return;
117  }
118 
119  // Remove duplicate URLs from list
120  $urls = array_unique( $urls );
121 
122  wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urls ) );
123 
124  // Reliably broadcast the purge to all edge nodes
125  $ts = microtime( true );
126  $relayerGroup = MediaWikiServices::getInstance()->getEventRelayerGroup();
127  $relayerGroup->getRelayer( 'cdn-url-purges' )->notifyMulti(
128  'cdn-url-purges',
129  array_map(
130  function ( $url ) use ( $ts ) {
131  return [
132  'url' => $url,
133  'timestamp' => $ts,
134  ];
135  },
136  $urls
137  )
138  );
139 
140  // Send lossy UDP broadcasting if enabled
141  if ( $wgHTCPRouting ) {
142  self::HTCPPurge( $urls );
143  }
144 
145  // Do direct server purges if enabled (this does not scale very well)
146  if ( $wgCdnServers ) {
147  self::naivePurge( $urls );
148  }
149  }
150 
154  public function getUrls() {
155  return array_keys( $this->resolveReboundDelayByUrl() );
156  }
157 
161  private function resolveReboundDelayByUrl() {
164  // Avoid multiple queries for getCdnUrls() call
165  $lb = MediaWikiServices::getInstance()->getLinkBatchFactory()->newLinkBatch();
166  foreach ( $this->titleTuples as list( $title, $delay ) ) {
167  $lb->addObj( $title );
168  }
169  $lb->execute();
170 
171  $reboundDelayByUrl = [];
172 
173  // Resolve the titles into CDN URLs
174  foreach ( $this->titleTuples as list( $title, $delay ) ) {
175  foreach ( $title->getCdnUrls() as $url ) {
176  // Use the highest rebound for duplicate URLs in order to handle the most lag
177  $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
178  }
179  }
180 
181  foreach ( $this->urlTuples as list( $url, $delay ) ) {
182  // Use the highest rebound for duplicate URLs in order to handle the most lag
183  $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
184  }
185 
186  return $reboundDelayByUrl;
187  }
188 
195  private static function HTCPPurge( array $urls ) {
197 
198  // HTCP CLR operation
199  $htcpOpCLR = 4;
200 
201  // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
202  if ( !defined( "IPPROTO_IP" ) ) {
203  define( "IPPROTO_IP", 0 );
204  define( "IP_MULTICAST_LOOP", 34 );
205  define( "IP_MULTICAST_TTL", 33 );
206  }
207 
208  // pfsockopen doesn't work because we need set_sock_opt
209  $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP );
210  if ( !$conn ) {
211  $errstr = socket_strerror( socket_last_error() );
212  wfDebugLog( 'squid', __METHOD__ .
213  ": Error opening UDP socket: $errstr" );
214 
215  return;
216  }
217 
218  // Set socket options
219  socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 );
220  if ( $wgHTCPMulticastTTL != 1 ) {
221  // Set multicast time to live (hop count) option on socket
222  socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL,
224  }
225 
226  // Get sequential trx IDs for packet loss counting
227  $idGenerator = MediaWikiServices::getInstance()->getGlobalIdGenerator();
228  $ids = $idGenerator->newSequentialPerNodeIDs(
229  'squidhtcppurge', 32,
230  count( $urls ),
231  $idGenerator::QUICK_VOLATILE
232  );
233 
234  foreach ( $urls as $url ) {
235  if ( !is_string( $url ) ) {
236  throw new MWException( 'Bad purge URL' );
237  }
238  $url = self::expand( $url );
239  $conf = self::getRuleForURL( $url, $wgHTCPRouting );
240  if ( !$conf ) {
241  wfDebugLog( 'squid', __METHOD__ .
242  "No HTCP rule configured for URL {$url} , skipping" );
243  continue;
244  }
245 
246  if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
247  // Normalize single entries
248  $conf = [ $conf ];
249  }
250  foreach ( $conf as $subconf ) {
251  if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) {
252  throw new MWException( "Invalid HTCP rule for URL $url\n" );
253  }
254  }
255 
256  // Construct a minimal HTCP request diagram
257  // as per RFC 2756
258  // Opcode 'CLR', no response desired, no auth
259  $htcpTransID = current( $ids );
260  next( $ids );
261 
262  $htcpSpecifier = pack( 'na4na*na8n',
263  4, 'HEAD', strlen( $url ), $url,
264  8, 'HTTP/1.0', 0 );
265 
266  $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier );
267  $htcpLen = 4 + $htcpDataLen + 2;
268 
269  // Note! Squid gets the bit order of the first
270  // word wrong, wrt the RFC. Apparently no other
271  // implementation exists, so adapt to Squid
272  $htcpPacket = pack( 'nxxnCxNxxa*n',
273  $htcpLen, $htcpDataLen, $htcpOpCLR,
274  $htcpTransID, $htcpSpecifier, 2 );
275 
276  wfDebugLog( 'squid', __METHOD__ .
277  "Purging URL $url via HTCP" );
278  foreach ( $conf as $subconf ) {
279  socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
280  $subconf['host'], $subconf['port'] );
281  }
282  }
283  }
284 
291  private static function naivePurge( array $urls ) {
292  global $wgCdnServers;
293 
294  $reqs = [];
295  foreach ( $urls as $url ) {
296  $urlInfo = wfParseUrl( self::expand( $url ) );
297  $urlHost = strlen( $urlInfo['port'] ?? null )
298  ? IP::combineHostAndPort( $urlInfo['host'], $urlInfo['port'] )
299  : $urlInfo['host'];
300  $urlPath = strlen( $urlInfo['query'] ?? null )
301  ? wfAppendQuery( $urlInfo['path'], $urlInfo['query'] )
302  : $urlInfo['path'];
303  $baseReq = [
304  'method' => 'PURGE',
305  'url' => $urlPath,
306  'headers' => [
307  'Host' => $urlHost,
308  'Connection' => 'Keep-Alive',
309  'Proxy-Connection' => 'Keep-Alive',
310  'User-Agent' => 'MediaWiki/' . MW_VERSION . ' ' . __CLASS__
311  ]
312  ];
313  foreach ( $wgCdnServers as $server ) {
314  $reqs[] = ( $baseReq + [ 'proxy' => $server ] );
315  }
316  }
317 
318  $http = MediaWikiServices::getInstance()->getHttpRequestFactory()
319  ->createMultiClient( [ 'maxConnsPerHost' => 8, 'usePipelining' => true ] );
320  $http->runMulti( $reqs );
321  }
322 
337  private static function expand( $url ) {
338  return wfExpandUrl( $url, PROTO_INTERNAL );
339  }
340 
347  private static function getRuleForURL( $url, $rules ) {
348  foreach ( $rules as $regex => $routing ) {
349  if ( $regex === '' || preg_match( $regex, $url ) ) {
350  return $routing;
351  }
352  }
353 
354  return false;
355  }
356 }
CdnCacheUpdate\HTCPPurge
static HTCPPurge(array $urls)
Send Hyper Text Caching Protocol (HTCP) CLR requests.
Definition: CdnCacheUpdate.php:195
CdnCacheUpdate\$titleTuples
array[] $titleTuples
List of (Title, rebound purge delay) tuples.
Definition: CdnCacheUpdate.php:32
CdnCacheUpdate\getUrls
getUrls()
Definition: CdnCacheUpdate.php:154
$wgCdnServers
$wgCdnServers
List of proxy servers to purge on changes; default port is 80.
Definition: DefaultSettings.php:2986
PROTO_INTERNAL
const PROTO_INTERNAL
Definition: Defines.php:213
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:154
$wgHTCPMulticastTTL
$wgHTCPMulticastTTL
HTCP multicast TTL.
Definition: DefaultSettings.php:3072
MergeableUpdate
Interface that deferrable updates can implement to signal that updates can be combined.
Definition: MergeableUpdate.php:20
MW_VERSION
const MW_VERSION
The running version of MediaWiki.
Definition: Defines.php:39
CdnCacheUpdate\doUpdate
doUpdate()
Perform the actual work.
Definition: CdnCacheUpdate.php:79
wfDebugLog
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Definition: GlobalFunctions.php:988
CdnCacheUpdate\getRuleForURL
static getRuleForURL( $url, $rules)
Find the HTCP routing rule to use for a given URL.
Definition: CdnCacheUpdate.php:347
wfAppendQuery
wfAppendQuery( $url, $query)
Append a query string to an existing URL, which may or may not already have query string parameters a...
Definition: GlobalFunctions.php:438
CdnCacheUpdate\merge
merge(MergeableUpdate $update)
Merge this update with $update.
Definition: CdnCacheUpdate.php:58
wfParseUrl
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
Definition: GlobalFunctions.php:791
MWException
MediaWiki exception.
Definition: MWException.php:29
CdnCacheUpdate\$urlTuples
array[] $urlTuples
List of (URL, rebound purge delay) tuples.
Definition: CdnCacheUpdate.php:30
CdnCacheUpdate\expand
static expand( $url)
Expand local URLs to fully-qualified URLs using the internal protocol and host defined in $wgInternal...
Definition: CdnCacheUpdate.php:337
$wgHTCPRouting
$wgHTCPRouting
Routing configuration for HTCP multicast purging.
Definition: DefaultSettings.php:3066
CdnCacheUpdate\naivePurge
static naivePurge(array $urls)
Send HTTP PURGE requests for each of the URLs to all of the cache servers.
Definition: CdnCacheUpdate.php:291
$title
$title
Definition: testCompression.php:38
CdnCacheUpdate
Handles purging the appropriate CDN objects given a list of URLs or Title instances.
Definition: CdnCacheUpdate.php:28
CdnPurgeJob
Job to purge a set of URLs from CDN.
Definition: CdnPurgeJob.php:30
CdnCacheUpdate\newFromTitles
static newFromTitles( $titles, $urls=[])
Create an update object from an array of Title objects, or a TitleArray object.
Definition: CdnCacheUpdate.php:75
CdnCacheUpdate\__construct
__construct(array $targets, array $options=[])
Definition: CdnCacheUpdate.php:43
Title
Represents a title within MediaWiki.
Definition: Title.php:42
JobQueueGroup\singleton
static singleton( $domain=false)
Definition: JobQueueGroup.php:70
DeferrableUpdate
Interface that deferrable updates should implement.
Definition: DeferrableUpdate.php:11
CdnCacheUpdate\purge
static purge(array $urls)
Purges a list of CDN nodes defined in $wgCdnServers.
Definition: CdnCacheUpdate.php:112
CdnCacheUpdate\resolveReboundDelayByUrl
resolveReboundDelayByUrl()
Definition: CdnCacheUpdate.php:161
wfExpandUrl
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
Definition: GlobalFunctions.php:490