MediaWiki  master
CdnCacheUpdate.php
Go to the documentation of this file.
1 <?php
22 use Wikimedia\Assert\Assert;
23 
30  private $urlTuples = [];
32  private $titleTuples = [];
33 
35  private const MAX_REBOUND_DELAY = 300;
36 
43  public function __construct( array $targets, array $options = [] ) {
44  $delay = min(
45  (int)max( $options['reboundDelay'] ?? 0, 0 ),
46  self::MAX_REBOUND_DELAY
47  );
48 
49  foreach ( $targets as $target ) {
50  if ( $target instanceof Title ) {
51  $this->titleTuples[] = [ $target, $delay ];
52  } else {
53  $this->urlTuples[] = [ $target, $delay ];
54  }
55  }
56  }
57 
58  public function merge( MergeableUpdate $update ) {
60  Assert::parameterType( __CLASS__, $update, '$update' );
61  '@phan-var self $update';
62 
63  $this->urlTuples = array_merge( $this->urlTuples, $update->urlTuples );
64  $this->titleTuples = array_merge( $this->titleTuples, $update->titleTuples );
65  }
66 
75  public static function newFromTitles( $titles, $urls = [] ) {
76  return new CdnCacheUpdate( array_merge( $titles, $urls ) );
77  }
78 
79  public function doUpdate() {
80  // Resolve the final list of URLs just before purging them (T240083)
81  $reboundDelayByUrl = $this->resolveReboundDelayByUrl();
82 
83  // Send the immediate purges to CDN
84  self::purge( array_keys( $reboundDelayByUrl ) );
85  $immediatePurgeTimestamp = time();
86 
87  // Get the URLs that need rebound purges, grouped by seconds of purge delay
88  $urlsWithReboundByDelay = [];
89  foreach ( $reboundDelayByUrl as $url => $delay ) {
90  if ( $delay > 0 ) {
91  $urlsWithReboundByDelay[$delay][] = $url;
92  }
93  }
94  // Enqueue delayed purge jobs for these URLs (usually only one job)
95  $jobs = [];
96  foreach ( $urlsWithReboundByDelay as $delay => $urls ) {
97  $jobs[] = new CdnPurgeJob( [
98  'urls' => $urls,
99  'jobReleaseTimestamp' => $immediatePurgeTimestamp + $delay
100  ] );
101  }
102  JobQueueGroup::singleton()->lazyPush( $jobs );
103  }
104 
112  public static function purge( array $urls ) {
114 
115  if ( !$urls ) {
116  return;
117  }
118 
119  // Remove duplicate URLs from list
120  $urls = array_unique( $urls );
121 
122  wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urls ) );
123 
124  // Reliably broadcast the purge to all edge nodes
125  $ts = microtime( true );
126  $relayerGroup = MediaWikiServices::getInstance()->getEventRelayerGroup();
127  $relayerGroup->getRelayer( 'cdn-url-purges' )->notifyMulti(
128  'cdn-url-purges',
129  array_map(
130  function ( $url ) use ( $ts ) {
131  return [
132  'url' => $url,
133  'timestamp' => $ts,
134  ];
135  },
136  $urls
137  )
138  );
139 
140  // Send lossy UDP broadcasting if enabled
141  if ( $wgHTCPRouting ) {
142  self::HTCPPurge( $urls );
143  }
144 
145  // Do direct server purges if enabled (this does not scale very well)
146  if ( $wgCdnServers ) {
147  self::naivePurge( $urls );
148  }
149  }
150 
154  public function getUrls() {
155  return array_keys( $this->resolveReboundDelayByUrl() );
156  }
157 
161  private function resolveReboundDelayByUrl() {
162  $services = MediaWikiServices::getInstance();
165  // Avoid multiple queries for HtmlCacheUpdater::getUrls() call
166  $lb = $services->getLinkBatchFactory()->newLinkBatch();
167  foreach ( $this->titleTuples as list( $title, $delay ) ) {
168  $lb->addObj( $title );
169  }
170  $lb->execute();
171 
172  $reboundDelayByUrl = [];
173 
174  // Resolve the titles into CDN URLs
175  $htmlCacheUpdater = $services->getHtmlCacheUpdater();
176  foreach ( $this->titleTuples as list( $title, $delay ) ) {
177  foreach ( $htmlCacheUpdater->getUrls( $title ) as $url ) {
178  // Use the highest rebound for duplicate URLs in order to handle the most lag
179  $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
180  }
181  }
182 
183  foreach ( $this->urlTuples as list( $url, $delay ) ) {
184  // Use the highest rebound for duplicate URLs in order to handle the most lag
185  $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
186  }
187 
188  return $reboundDelayByUrl;
189  }
190 
197  private static function HTCPPurge( array $urls ) {
199 
200  // HTCP CLR operation
201  $htcpOpCLR = 4;
202 
203  // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
204  if ( !defined( "IPPROTO_IP" ) ) {
205  define( "IPPROTO_IP", 0 );
206  define( "IP_MULTICAST_LOOP", 34 );
207  define( "IP_MULTICAST_TTL", 33 );
208  }
209 
210  // pfsockopen doesn't work because we need set_sock_opt
211  $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP );
212  if ( !$conn ) {
213  $errstr = socket_strerror( socket_last_error() );
214  wfDebugLog( 'squid', __METHOD__ .
215  ": Error opening UDP socket: $errstr" );
216 
217  return;
218  }
219 
220  // Set socket options
221  socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 );
222  if ( $wgHTCPMulticastTTL != 1 ) {
223  // Set multicast time to live (hop count) option on socket
224  socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL,
226  }
227 
228  // Get sequential trx IDs for packet loss counting
229  $idGenerator = MediaWikiServices::getInstance()->getGlobalIdGenerator();
230  $ids = $idGenerator->newSequentialPerNodeIDs(
231  'squidhtcppurge', 32,
232  count( $urls ),
233  $idGenerator::QUICK_VOLATILE
234  );
235 
236  foreach ( $urls as $url ) {
237  if ( !is_string( $url ) ) {
238  throw new MWException( 'Bad purge URL' );
239  }
240  $url = self::expand( $url );
241  $conf = self::getRuleForURL( $url, $wgHTCPRouting );
242  if ( !$conf ) {
243  wfDebugLog( 'squid', __METHOD__ .
244  "No HTCP rule configured for URL {$url} , skipping" );
245  continue;
246  }
247 
248  if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
249  // Normalize single entries
250  $conf = [ $conf ];
251  }
252  foreach ( $conf as $subconf ) {
253  if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) {
254  throw new MWException( "Invalid HTCP rule for URL $url\n" );
255  }
256  }
257 
258  // Construct a minimal HTCP request diagram
259  // as per RFC 2756
260  // Opcode 'CLR', no response desired, no auth
261  $htcpTransID = current( $ids );
262  next( $ids );
263 
264  $htcpSpecifier = pack( 'na4na*na8n',
265  4, 'HEAD', strlen( $url ), $url,
266  8, 'HTTP/1.0', 0 );
267 
268  $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier );
269  $htcpLen = 4 + $htcpDataLen + 2;
270 
271  // Note! Squid gets the bit order of the first
272  // word wrong, wrt the RFC. Apparently no other
273  // implementation exists, so adapt to Squid
274  $htcpPacket = pack( 'nxxnCxNxxa*n',
275  $htcpLen, $htcpDataLen, $htcpOpCLR,
276  $htcpTransID, $htcpSpecifier, 2 );
277 
278  wfDebugLog( 'squid', __METHOD__ .
279  "Purging URL $url via HTCP" );
280  foreach ( $conf as $subconf ) {
281  socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
282  $subconf['host'], $subconf['port'] );
283  }
284  }
285  }
286 
293  private static function naivePurge( array $urls ) {
294  global $wgCdnServers;
295 
296  $reqs = [];
297  foreach ( $urls as $url ) {
298  $urlInfo = wfParseUrl( self::expand( $url ) );
299  $urlHost = strlen( $urlInfo['port'] ?? null )
300  ? IP::combineHostAndPort( $urlInfo['host'], $urlInfo['port'] )
301  : $urlInfo['host'];
302  $urlPath = strlen( $urlInfo['query'] ?? null )
303  ? wfAppendQuery( $urlInfo['path'], $urlInfo['query'] )
304  : $urlInfo['path'];
305  $baseReq = [
306  'method' => 'PURGE',
307  'url' => $urlPath,
308  'headers' => [
309  'Host' => $urlHost,
310  'Connection' => 'Keep-Alive',
311  'Proxy-Connection' => 'Keep-Alive',
312  'User-Agent' => 'MediaWiki/' . MW_VERSION . ' ' . __CLASS__
313  ]
314  ];
315  foreach ( $wgCdnServers as $server ) {
316  $reqs[] = ( $baseReq + [ 'proxy' => $server ] );
317  }
318  }
319 
320  $http = MediaWikiServices::getInstance()->getHttpRequestFactory()
321  ->createMultiClient( [ 'maxConnsPerHost' => 8, 'usePipelining' => true ] );
322  $http->runMulti( $reqs );
323  }
324 
339  private static function expand( $url ) {
340  return wfExpandUrl( $url, PROTO_INTERNAL );
341  }
342 
349  private static function getRuleForURL( $url, $rules ) {
350  foreach ( $rules as $regex => $routing ) {
351  if ( $regex === '' || preg_match( $regex, $url ) ) {
352  return $routing;
353  }
354  }
355 
356  return false;
357  }
358 }
CdnCacheUpdate\HTCPPurge
static HTCPPurge(array $urls)
Send Hyper Text Caching Protocol (HTCP) CLR requests.
Definition: CdnCacheUpdate.php:197
CdnCacheUpdate\$titleTuples
array[] $titleTuples
List of (Title, rebound purge delay) tuples.
Definition: CdnCacheUpdate.php:32
CdnCacheUpdate\getUrls
getUrls()
Definition: CdnCacheUpdate.php:154
$wgCdnServers
$wgCdnServers
List of proxy servers to purge on changes; default port is 80.
Definition: DefaultSettings.php:3000
PROTO_INTERNAL
const PROTO_INTERNAL
Definition: Defines.php:213
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:165
$wgHTCPMulticastTTL
$wgHTCPMulticastTTL
HTCP multicast TTL.
Definition: DefaultSettings.php:3086
MergeableUpdate
Interface that deferrable updates can implement to signal that updates can be combined.
Definition: MergeableUpdate.php:20
MW_VERSION
const MW_VERSION
The running version of MediaWiki.
Definition: Defines.php:39
CdnCacheUpdate\doUpdate
doUpdate()
Perform the actual work.
Definition: CdnCacheUpdate.php:79
wfDebugLog
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Definition: GlobalFunctions.php:989
CdnCacheUpdate\getRuleForURL
static getRuleForURL( $url, $rules)
Find the HTCP routing rule to use for a given URL.
Definition: CdnCacheUpdate.php:349
wfAppendQuery
wfAppendQuery( $url, $query)
Append a query string to an existing URL, which may or may not already have query string parameters a...
Definition: GlobalFunctions.php:438
CdnCacheUpdate\merge
merge(MergeableUpdate $update)
Merge this update with $update.
Definition: CdnCacheUpdate.php:58
wfParseUrl
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
Definition: GlobalFunctions.php:792
MWException
MediaWiki exception.
Definition: MWException.php:29
CdnCacheUpdate\$urlTuples
array[] $urlTuples
List of (URL, rebound purge delay) tuples.
Definition: CdnCacheUpdate.php:30
CdnCacheUpdate\expand
static expand( $url)
Expand local URLs to fully-qualified URLs using the internal protocol and host defined in $wgInternal...
Definition: CdnCacheUpdate.php:339
$wgHTCPRouting
$wgHTCPRouting
Routing configuration for HTCP multicast purging.
Definition: DefaultSettings.php:3080
CdnCacheUpdate\naivePurge
static naivePurge(array $urls)
Send HTTP PURGE requests for each of the URLs to all of the cache servers.
Definition: CdnCacheUpdate.php:293
$title
$title
Definition: testCompression.php:38
CdnCacheUpdate
Handles purging the appropriate CDN objects given a list of URLs or Title instances.
Definition: CdnCacheUpdate.php:28
CdnPurgeJob
Job to purge a set of URLs from CDN.
Definition: CdnPurgeJob.php:30
CdnCacheUpdate\newFromTitles
static newFromTitles( $titles, $urls=[])
Create an update object from an array of Title objects, or a TitleArray object.
Definition: CdnCacheUpdate.php:75
CdnCacheUpdate\__construct
__construct(array $targets, array $options=[])
Definition: CdnCacheUpdate.php:43
Title
Represents a title within MediaWiki.
Definition: Title.php:41
JobQueueGroup\singleton
static singleton( $domain=false)
Definition: JobQueueGroup.php:70
DeferrableUpdate
Interface that deferrable updates should implement.
Definition: DeferrableUpdate.php:11
CdnCacheUpdate\purge
static purge(array $urls)
Purges a list of CDN nodes defined in $wgCdnServers.
Definition: CdnCacheUpdate.php:112
CdnCacheUpdate\resolveReboundDelayByUrl
resolveReboundDelayByUrl()
Definition: CdnCacheUpdate.php:161
wfExpandUrl
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
Definition: GlobalFunctions.php:490