MediaWiki  master
CdnCacheUpdate.php
Go to the documentation of this file.
1 <?php
23 use Wikimedia\Assert\Assert;
24 use Wikimedia\IPUtils;
25 
32  private $urlTuples = [];
34  private $pageTuples = [];
35 
37  private const MAX_REBOUND_DELAY = 300;
38 
45  public function __construct( array $targets, array $options = [] ) {
46  $delay = min(
47  (int)max( $options['reboundDelay'] ?? 0, 0 ),
48  self::MAX_REBOUND_DELAY
49  );
50 
51  foreach ( $targets as $target ) {
52  if ( $target instanceof PageReference ) {
53  $this->pageTuples[] = [ $target, $delay ];
54  } else {
55  $this->urlTuples[] = [ $target, $delay ];
56  }
57  }
58  }
59 
60  public function merge( MergeableUpdate $update ) {
62  Assert::parameterType( __CLASS__, $update, '$update' );
63  '@phan-var self $update';
64 
65  $this->urlTuples = array_merge( $this->urlTuples, $update->urlTuples );
66  $this->pageTuples = array_merge( $this->pageTuples, $update->pageTuples );
67  }
68 
78  public static function newFromTitles( $pages, $urls = [] ) {
79  return new CdnCacheUpdate( array_merge( $pages, $urls ) );
80  }
81 
82  public function doUpdate() {
83  // Resolve the final list of URLs just before purging them (T240083)
84  $reboundDelayByUrl = $this->resolveReboundDelayByUrl();
85 
86  // Send the immediate purges to CDN
87  self::purge( array_keys( $reboundDelayByUrl ) );
88  $immediatePurgeTimestamp = time();
89 
90  // Get the URLs that need rebound purges, grouped by seconds of purge delay
91  $urlsWithReboundByDelay = [];
92  foreach ( $reboundDelayByUrl as $url => $delay ) {
93  if ( $delay > 0 ) {
94  $urlsWithReboundByDelay[$delay][] = $url;
95  }
96  }
97  // Enqueue delayed purge jobs for these URLs (usually only one job)
98  $jobs = [];
99  foreach ( $urlsWithReboundByDelay as $delay => $urls ) {
100  $jobs[] = new CdnPurgeJob( [
101  'urls' => $urls,
102  'jobReleaseTimestamp' => $immediatePurgeTimestamp + $delay
103  ] );
104  }
105  JobQueueGroup::singleton()->lazyPush( $jobs );
106  }
107 
115  public static function purge( array $urls ) {
116  $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( 'CdnServers' );
117  $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( 'HTCPRouting' );
118  if ( !$urls ) {
119  return;
120  }
121 
122  // Remove duplicate URLs from list
123  $urls = array_unique( $urls );
124 
125  wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urls ) );
126 
127  // Reliably broadcast the purge to all edge nodes
128  $ts = microtime( true );
129  $relayerGroup = MediaWikiServices::getInstance()->getEventRelayerGroup();
130  $relayerGroup->getRelayer( 'cdn-url-purges' )->notifyMulti(
131  'cdn-url-purges',
132  array_map(
133  static function ( $url ) use ( $ts ) {
134  return [
135  'url' => $url,
136  'timestamp' => $ts,
137  ];
138  },
139  $urls
140  )
141  );
142 
143  // Send lossy UDP broadcasting if enabled
144  if ( $htcpRouting ) {
145  self::HTCPPurge( $urls );
146  }
147 
148  // Do direct server purges if enabled (this does not scale very well)
149  if ( $cdnServers ) {
150  self::naivePurge( $urls );
151  }
152  }
153 
157  public function getUrls() {
158  return array_keys( $this->resolveReboundDelayByUrl() );
159  }
160 
164  private function resolveReboundDelayByUrl() {
165  $services = MediaWikiServices::getInstance();
168  // Avoid multiple queries for HtmlCacheUpdater::getUrls() call
169  $lb = $services->getLinkBatchFactory()->newLinkBatch();
170  foreach ( $this->pageTuples as list( $page, $delay ) ) {
171  $lb->addObj( $page );
172  }
173  $lb->execute();
174 
175  $reboundDelayByUrl = [];
176 
177  // Resolve the titles into CDN URLs
178  $htmlCacheUpdater = $services->getHtmlCacheUpdater();
179  foreach ( $this->pageTuples as list( $page, $delay ) ) {
180  foreach ( $htmlCacheUpdater->getUrls( $page ) as $url ) {
181  // Use the highest rebound for duplicate URLs in order to handle the most lag
182  $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
183  }
184  }
185 
186  foreach ( $this->urlTuples as list( $url, $delay ) ) {
187  // Use the highest rebound for duplicate URLs in order to handle the most lag
188  $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
189  }
190 
191  return $reboundDelayByUrl;
192  }
193 
200  private static function HTCPPurge( array $urls ) {
201  $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( 'HTCPRouting' );
202  $htcpMulticastTTL = MediaWikiServices::getInstance()->getMainConfig()->get( 'HTCPMulticastTTL' );
203  // HTCP CLR operation
204  $htcpOpCLR = 4;
205 
206  // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
207  if ( !defined( "IPPROTO_IP" ) ) {
208  define( "IPPROTO_IP", 0 );
209  define( "IP_MULTICAST_LOOP", 34 );
210  define( "IP_MULTICAST_TTL", 33 );
211  }
212 
213  // pfsockopen doesn't work because we need set_sock_opt
214  $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP );
215  if ( !$conn ) {
216  $errstr = socket_strerror( socket_last_error() );
217  wfDebugLog( 'squid', __METHOD__ .
218  ": Error opening UDP socket: $errstr" );
219 
220  return;
221  }
222 
223  // Set socket options
224  socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 );
225  if ( $htcpMulticastTTL != 1 ) {
226  // Set multicast time to live (hop count) option on socket
227  socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL,
228  $htcpMulticastTTL );
229  }
230 
231  // Get sequential trx IDs for packet loss counting
232  $idGenerator = MediaWikiServices::getInstance()->getGlobalIdGenerator();
233  $ids = $idGenerator->newSequentialPerNodeIDs(
234  'squidhtcppurge',
235  32,
236  count( $urls )
237  );
238 
239  foreach ( $urls as $url ) {
240  if ( !is_string( $url ) ) {
241  throw new MWException( 'Bad purge URL' );
242  }
243  $url = self::expand( $url );
244  $conf = self::getRuleForURL( $url, $htcpRouting );
245  if ( !$conf ) {
246  wfDebugLog( 'squid', __METHOD__ .
247  "No HTCP rule configured for URL {$url} , skipping" );
248  continue;
249  }
250 
251  if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
252  // Normalize single entries
253  $conf = [ $conf ];
254  }
255  foreach ( $conf as $subconf ) {
256  if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) {
257  throw new MWException( "Invalid HTCP rule for URL $url\n" );
258  }
259  }
260 
261  // Construct a minimal HTCP request diagram
262  // as per RFC 2756
263  // Opcode 'CLR', no response desired, no auth
264  $htcpTransID = current( $ids );
265  next( $ids );
266 
267  $htcpSpecifier = pack( 'na4na*na8n',
268  4, 'HEAD', strlen( $url ), $url,
269  8, 'HTTP/1.0', 0 );
270 
271  $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier );
272  $htcpLen = 4 + $htcpDataLen + 2;
273 
274  // Note! Squid gets the bit order of the first
275  // word wrong, wrt the RFC. Apparently no other
276  // implementation exists, so adapt to Squid
277  $htcpPacket = pack( 'nxxnCxNxxa*n',
278  $htcpLen, $htcpDataLen, $htcpOpCLR,
279  $htcpTransID, $htcpSpecifier, 2 );
280 
281  wfDebugLog( 'squid', __METHOD__ .
282  "Purging URL $url via HTCP" );
283  foreach ( $conf as $subconf ) {
284  socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
285  $subconf['host'], $subconf['port'] );
286  }
287  }
288  }
289 
296  private static function naivePurge( array $urls ) {
297  $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( 'CdnServers' );
298 
299  $reqs = [];
300  foreach ( $urls as $url ) {
301  $url = self::expand( $url );
302  $urlInfo = wfParseUrl( $url );
303  $urlHost = strlen( $urlInfo['port'] ?? null )
304  ? IPUtils::combineHostAndPort( $urlInfo['host'], $urlInfo['port'] )
305  : $urlInfo['host'];
306  $baseReq = [
307  'method' => 'PURGE',
308  'url' => $url,
309  'headers' => [
310  'Host' => $urlHost,
311  'Connection' => 'Keep-Alive',
312  'Proxy-Connection' => 'Keep-Alive',
313  'User-Agent' => 'MediaWiki/' . MW_VERSION . ' ' . __CLASS__
314  ]
315  ];
316  foreach ( $cdnServers as $server ) {
317  $reqs[] = ( $baseReq + [ 'proxy' => $server ] );
318  }
319  }
320 
321  $http = MediaWikiServices::getInstance()->getHttpRequestFactory()
322  ->createMultiClient( [ 'maxConnsPerHost' => 8, 'usePipelining' => true ] );
323  $http->runMulti( $reqs );
324  }
325 
340  private static function expand( $url ) {
341  return wfExpandUrl( $url, PROTO_INTERNAL );
342  }
343 
350  private static function getRuleForURL( $url, $rules ) {
351  foreach ( $rules as $regex => $routing ) {
352  if ( $regex === '' || preg_match( $regex, $url ) ) {
353  return $routing;
354  }
355  }
356 
357  return false;
358  }
359 }
CdnCacheUpdate\newFromTitles
static newFromTitles( $pages, $urls=[])
Create an update object from an array of Title objects, or a TitleArray object.
Definition: CdnCacheUpdate.php:78
CdnCacheUpdate\HTCPPurge
static HTCPPurge(array $urls)
Send Hyper Text Caching Protocol (HTCP) CLR requests.
Definition: CdnCacheUpdate.php:200
CdnCacheUpdate\$pageTuples
array[] $pageTuples
List of (PageReference, rebound purge delay) tuples.
Definition: CdnCacheUpdate.php:34
CdnCacheUpdate\getUrls
getUrls()
Definition: CdnCacheUpdate.php:157
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:204
MergeableUpdate
Interface that deferrable updates can implement to signal that updates can be combined.
Definition: MergeableUpdate.php:21
MW_VERSION
const MW_VERSION
The running version of MediaWiki.
Definition: Defines.php:36
CdnCacheUpdate\doUpdate
doUpdate()
Perform the actual work.
Definition: CdnCacheUpdate.php:82
wfDebugLog
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Definition: GlobalFunctions.php:958
Page\PageReference
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
Definition: PageReference.php:49
CdnCacheUpdate\getRuleForURL
static getRuleForURL( $url, $rules)
Find the HTCP routing rule to use for a given URL.
Definition: CdnCacheUpdate.php:350
CdnCacheUpdate\merge
merge(MergeableUpdate $update)
Merge this enqueued update with a new MergeableUpdate of the same qualified class name.
Definition: CdnCacheUpdate.php:60
wfParseUrl
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
Definition: GlobalFunctions.php:776
MWException
MediaWiki exception.
Definition: MWException.php:29
CdnCacheUpdate\$urlTuples
array[] $urlTuples
List of (URL, rebound purge delay) tuples.
Definition: CdnCacheUpdate.php:32
CdnCacheUpdate\expand
static expand( $url)
Expand local URLs to fully-qualified URLs using the internal protocol and host defined in $wgInternal...
Definition: CdnCacheUpdate.php:340
CdnCacheUpdate\naivePurge
static naivePurge(array $urls)
Send HTTP PURGE requests for each of the URLs to all of the cache servers.
Definition: CdnCacheUpdate.php:296
CdnCacheUpdate
Handles purging the appropriate CDN objects given a list of URLs or Title instances.
Definition: CdnCacheUpdate.php:30
CdnPurgeJob
Job to purge a set of URLs from CDN.
Definition: CdnPurgeJob.php:30
CdnCacheUpdate\__construct
__construct(array $targets, array $options=[])
Definition: CdnCacheUpdate.php:45
PROTO_INTERNAL
const PROTO_INTERNAL
Definition: Defines.php:197
JobQueueGroup\singleton
static singleton( $domain=false)
Definition: JobQueueGroup.php:114
DeferrableUpdate
Interface that deferrable updates should implement.
Definition: DeferrableUpdate.php:11
CdnCacheUpdate\purge
static purge(array $urls)
Purges a list of CDN nodes defined in $wgCdnServers.
Definition: CdnCacheUpdate.php:115
CdnCacheUpdate\resolveReboundDelayByUrl
resolveReboundDelayByUrl()
Definition: CdnCacheUpdate.php:164
wfExpandUrl
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
Definition: GlobalFunctions.php:474