MediaWiki  master
CdnCacheUpdate.php
Go to the documentation of this file.
1 <?php
24 use Wikimedia\Assert\Assert;
25 use Wikimedia\IPUtils;
26 
33  private $urlTuples = [];
35  private $pageTuples = [];
36 
38  private const MAX_REBOUND_DELAY = 300;
39 
46  public function __construct( array $targets, array $options = [] ) {
47  $delay = min(
48  (int)max( $options['reboundDelay'] ?? 0, 0 ),
49  self::MAX_REBOUND_DELAY
50  );
51 
52  foreach ( $targets as $target ) {
53  if ( $target instanceof PageReference ) {
54  $this->pageTuples[] = [ $target, $delay ];
55  } else {
56  $this->urlTuples[] = [ $target, $delay ];
57  }
58  }
59  }
60 
61  public function merge( MergeableUpdate $update ) {
63  Assert::parameterType( __CLASS__, $update, '$update' );
64  '@phan-var self $update';
65 
66  $this->urlTuples = array_merge( $this->urlTuples, $update->urlTuples );
67  $this->pageTuples = array_merge( $this->pageTuples, $update->pageTuples );
68  }
69 
70  public function doUpdate() {
71  // Resolve the final list of URLs just before purging them (T240083)
72  $reboundDelayByUrl = $this->resolveReboundDelayByUrl();
73 
74  // Send the immediate purges to CDN
75  self::purge( array_keys( $reboundDelayByUrl ) );
76  $immediatePurgeTimestamp = time();
77 
78  // Get the URLs that need rebound purges, grouped by seconds of purge delay
79  $urlsWithReboundByDelay = [];
80  foreach ( $reboundDelayByUrl as $url => $delay ) {
81  if ( $delay > 0 ) {
82  $urlsWithReboundByDelay[$delay][] = $url;
83  }
84  }
85  // Enqueue delayed purge jobs for these URLs (usually only one job)
86  $jobs = [];
87  foreach ( $urlsWithReboundByDelay as $delay => $urls ) {
88  $jobs[] = new CdnPurgeJob( [
89  'urls' => $urls,
90  'jobReleaseTimestamp' => $immediatePurgeTimestamp + $delay
91  ] );
92  }
93  MediaWikiServices::getInstance()->getJobQueueGroup()->lazyPush( $jobs );
94  }
95 
103  public static function purge( array $urls ) {
104  $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers );
105  $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting );
106  if ( !$urls ) {
107  return;
108  }
109 
110  // Remove duplicate URLs from list
111  $urls = array_unique( $urls );
112 
113  wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urls ) );
114 
115  // Reliably broadcast the purge to all edge nodes
116  $ts = microtime( true );
117  $relayerGroup = MediaWikiServices::getInstance()->getEventRelayerGroup();
118  $relayerGroup->getRelayer( 'cdn-url-purges' )->notifyMulti(
119  'cdn-url-purges',
120  array_map(
121  static function ( $url ) use ( $ts ) {
122  return [
123  'url' => $url,
124  'timestamp' => $ts,
125  ];
126  },
127  $urls
128  )
129  );
130 
131  // Send lossy UDP broadcasting if enabled
132  if ( $htcpRouting ) {
133  self::HTCPPurge( $urls );
134  }
135 
136  // Do direct server purges if enabled (this does not scale very well)
137  if ( $cdnServers ) {
138  self::naivePurge( $urls );
139  }
140  }
141 
145  public function getUrls() {
146  return array_keys( $this->resolveReboundDelayByUrl() );
147  }
148 
152  private function resolveReboundDelayByUrl() {
153  $services = MediaWikiServices::getInstance();
156  // Avoid multiple queries for HtmlCacheUpdater::getUrls() call
157  $lb = $services->getLinkBatchFactory()->newLinkBatch();
158  foreach ( $this->pageTuples as [ $page, ] ) {
159  $lb->addObj( $page );
160  }
161  $lb->execute();
162 
163  $reboundDelayByUrl = [];
164 
165  // Resolve the titles into CDN URLs
166  $htmlCacheUpdater = $services->getHtmlCacheUpdater();
167  foreach ( $this->pageTuples as [ $page, $delay ] ) {
168  foreach ( $htmlCacheUpdater->getUrls( $page ) as $url ) {
169  // Use the highest rebound for duplicate URLs in order to handle the most lag
170  $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
171  }
172  }
173 
174  foreach ( $this->urlTuples as [ $url, $delay ] ) {
175  // Use the highest rebound for duplicate URLs in order to handle the most lag
176  $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
177  }
178 
179  return $reboundDelayByUrl;
180  }
181 
188  private static function HTCPPurge( array $urls ) {
189  $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting );
190  $htcpMulticastTTL = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPMulticastTTL );
191  // HTCP CLR operation
192  $htcpOpCLR = 4;
193 
194  // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
195  if ( !defined( "IPPROTO_IP" ) ) {
196  define( "IPPROTO_IP", 0 );
197  define( "IP_MULTICAST_LOOP", 34 );
198  define( "IP_MULTICAST_TTL", 33 );
199  }
200 
201  // pfsockopen doesn't work because we need set_sock_opt
202  $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP );
203  if ( !$conn ) {
204  $errstr = socket_strerror( socket_last_error() );
205  wfDebugLog( 'squid', __METHOD__ .
206  ": Error opening UDP socket: $errstr" );
207 
208  return;
209  }
210 
211  // Set socket options
212  socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 );
213  if ( $htcpMulticastTTL != 1 ) {
214  // Set multicast time to live (hop count) option on socket
215  socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL,
216  $htcpMulticastTTL );
217  }
218 
219  // Get sequential trx IDs for packet loss counting
220  $idGenerator = MediaWikiServices::getInstance()->getGlobalIdGenerator();
221  $ids = $idGenerator->newSequentialPerNodeIDs(
222  'squidhtcppurge',
223  32,
224  count( $urls )
225  );
226 
227  foreach ( $urls as $url ) {
228  if ( !is_string( $url ) ) {
229  throw new MWException( 'Bad purge URL' );
230  }
231  $url = self::expand( $url );
232  $conf = self::getRuleForURL( $url, $htcpRouting );
233  if ( !$conf ) {
234  wfDebugLog( 'squid', __METHOD__ .
235  "No HTCP rule configured for URL {$url} , skipping" );
236  continue;
237  }
238 
239  if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
240  // Normalize single entries
241  $conf = [ $conf ];
242  }
243  foreach ( $conf as $subconf ) {
244  if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) {
245  throw new MWException( "Invalid HTCP rule for URL $url\n" );
246  }
247  }
248 
249  // Construct a minimal HTCP request diagram
250  // as per RFC 2756
251  // Opcode 'CLR', no response desired, no auth
252  $htcpTransID = current( $ids );
253  next( $ids );
254 
255  $htcpSpecifier = pack( 'na4na*na8n',
256  4, 'HEAD', strlen( $url ), $url,
257  8, 'HTTP/1.0', 0 );
258 
259  $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier );
260  $htcpLen = 4 + $htcpDataLen + 2;
261 
262  // Note! Squid gets the bit order of the first
263  // word wrong, wrt the RFC. Apparently no other
264  // implementation exists, so adapt to Squid
265  $htcpPacket = pack( 'nxxnCxNxxa*n',
266  $htcpLen, $htcpDataLen, $htcpOpCLR,
267  $htcpTransID, $htcpSpecifier, 2 );
268 
269  wfDebugLog( 'squid', __METHOD__ .
270  "Purging URL $url via HTCP" );
271  foreach ( $conf as $subconf ) {
272  socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
273  $subconf['host'], $subconf['port'] );
274  }
275  }
276  }
277 
284  private static function naivePurge( array $urls ) {
285  $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers );
286 
287  $reqs = [];
288  foreach ( $urls as $url ) {
289  $url = self::expand( $url );
290  $urlInfo = wfParseUrl( $url );
291  $urlHost = strlen( $urlInfo['port'] ?? '' )
292  ? IPUtils::combineHostAndPort( $urlInfo['host'], (int)$urlInfo['port'] )
293  : $urlInfo['host'];
294  $baseReq = [
295  'method' => 'PURGE',
296  'url' => $url,
297  'headers' => [
298  'Host' => $urlHost,
299  'Connection' => 'Keep-Alive',
300  'Proxy-Connection' => 'Keep-Alive',
301  'User-Agent' => 'MediaWiki/' . MW_VERSION . ' ' . __CLASS__
302  ]
303  ];
304  foreach ( $cdnServers as $server ) {
305  $reqs[] = ( $baseReq + [ 'proxy' => $server ] );
306  }
307  }
308 
309  $http = MediaWikiServices::getInstance()->getHttpRequestFactory()
310  ->createMultiClient( [ 'maxConnsPerHost' => 8, 'usePipelining' => true ] );
311  $http->runMulti( $reqs );
312  }
313 
328  private static function expand( $url ) {
329  return wfExpandUrl( $url, PROTO_INTERNAL );
330  }
331 
338  private static function getRuleForURL( $url, $rules ) {
339  foreach ( $rules as $regex => $routing ) {
340  if ( $regex === '' || preg_match( $regex, $url ) ) {
341  return $routing;
342  }
343  }
344 
345  return false;
346  }
347 }
const MW_VERSION
The running version of MediaWiki.
Definition: Defines.php:36
const PROTO_INTERNAL
Definition: Defines.php:200
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Handles purging the appropriate CDN objects given a list of URLs or Title instances.
__construct(array $targets, array $options=[])
static purge(array $urls)
Purges a list of CDN nodes defined in $wgCdnServers.
doUpdate()
Perform the actual work.
merge(MergeableUpdate $update)
Merge this enqueued update with a new MergeableUpdate of the same qualified class name.
Job to purge a set of URLs from CDN.
Definition: CdnPurgeJob.php:27
MediaWiki exception.
Definition: MWException.php:29
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Interface that deferrable updates should implement.
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
Interface that deferrable updates can implement to signal that updates can be combined.