MediaWiki  master
CdnCacheUpdate.php
Go to the documentation of this file.
1 <?php
25 
32  protected $urls = [];
33 
37  public function __construct( array $urlArr ) {
38  $this->urls = $urlArr;
39  }
40 
41  public function merge( MergeableUpdate $update ) {
43  Assert::parameterType( __CLASS__, $update, '$update' );
44 
45  $this->urls = array_merge( $this->urls, $update->urls );
46  }
47 
55  public static function newFromTitles( $titles, $urlArr = [] ) {
56  ( new LinkBatch( $titles ) )->execute();
58  foreach ( $titles as $title ) {
59  $urlArr = array_merge( $urlArr, $title->getCdnUrls() );
60  }
61 
62  return new CdnCacheUpdate( $urlArr );
63  }
64 
68  public function doUpdate() {
70 
71  self::purge( $this->urls );
72 
73  if ( $wgCdnReboundPurgeDelay > 0 ) {
74  JobQueueGroup::singleton()->lazyPush( new CdnPurgeJob( [
75  'urls' => $this->urls,
76  'jobReleaseTimestamp' => time() + $wgCdnReboundPurgeDelay
77  ] ) );
78  }
79  }
80 
88  public static function purge( array $urlArr ) {
90 
91  if ( !$urlArr ) {
92  return;
93  }
94 
95  // Remove duplicate URLs from list
96  $urlArr = array_unique( $urlArr );
97 
98  wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urlArr ) );
99 
100  // Reliably broadcast the purge to all edge nodes
101  $relayer = MediaWikiServices::getInstance()->getEventRelayerGroup()
102  ->getRelayer( 'cdn-url-purges' );
103  $ts = microtime( true );
104  $relayer->notifyMulti(
105  'cdn-url-purges',
106  array_map(
107  function ( $url ) use ( $ts ) {
108  return [
109  'url' => $url,
110  'timestamp' => $ts,
111  ];
112  },
113  $urlArr
114  )
115  );
116 
117  // Send lossy UDP broadcasting if enabled
118  if ( $wgHTCPRouting ) {
119  self::HTCPPurge( $urlArr );
120  }
121 
122  // Do direct server purges if enabled (this does not scale very well)
123  if ( $wgSquidServers ) {
124  // Maximum number of parallel connections per squid
125  $maxSocketsPerSquid = 8;
126  // Number of requests to send per socket
127  // 400 seems to be a good tradeoff, opening a socket takes a while
128  $urlsPerSocket = 400;
129  $socketsPerSquid = ceil( count( $urlArr ) / $urlsPerSocket );
130  if ( $socketsPerSquid > $maxSocketsPerSquid ) {
131  $socketsPerSquid = $maxSocketsPerSquid;
132  }
133 
134  $pool = new SquidPurgeClientPool;
135  $chunks = array_chunk( $urlArr, ceil( count( $urlArr ) / $socketsPerSquid ) );
136  foreach ( $wgSquidServers as $server ) {
137  foreach ( $chunks as $chunk ) {
138  $client = new SquidPurgeClient( $server );
139  foreach ( $chunk as $url ) {
140  $client->queuePurge( $url );
141  }
142  $pool->addClient( $client );
143  }
144  }
145 
146  $pool->run();
147  }
148  }
149 
156  private static function HTCPPurge( array $urlArr ) {
158 
159  // HTCP CLR operation
160  $htcpOpCLR = 4;
161 
162  // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
163  if ( !defined( "IPPROTO_IP" ) ) {
164  define( "IPPROTO_IP", 0 );
165  define( "IP_MULTICAST_LOOP", 34 );
166  define( "IP_MULTICAST_TTL", 33 );
167  }
168 
169  // pfsockopen doesn't work because we need set_sock_opt
170  $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP );
171  if ( !$conn ) {
172  $errstr = socket_strerror( socket_last_error() );
173  wfDebugLog( 'squid', __METHOD__ .
174  ": Error opening UDP socket: $errstr" );
175 
176  return;
177  }
178 
179  // Set socket options
180  socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 );
181  if ( $wgHTCPMulticastTTL != 1 ) {
182  // Set multicast time to live (hop count) option on socket
183  socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL,
184  $wgHTCPMulticastTTL );
185  }
186 
187  // Get sequential trx IDs for packet loss counting
189  'squidhtcppurge', 32, count( $urlArr ), UIDGenerator::QUICK_VOLATILE
190  );
191 
192  foreach ( $urlArr as $url ) {
193  if ( !is_string( $url ) ) {
194  throw new MWException( 'Bad purge URL' );
195  }
196  $url = self::expand( $url );
197  $conf = self::getRuleForURL( $url, $wgHTCPRouting );
198  if ( !$conf ) {
199  wfDebugLog( 'squid', __METHOD__ .
200  "No HTCP rule configured for URL {$url} , skipping" );
201  continue;
202  }
203 
204  if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
205  // Normalize single entries
206  $conf = [ $conf ];
207  }
208  foreach ( $conf as $subconf ) {
209  if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) {
210  throw new MWException( "Invalid HTCP rule for URL $url\n" );
211  }
212  }
213 
214  // Construct a minimal HTCP request diagram
215  // as per RFC 2756
216  // Opcode 'CLR', no response desired, no auth
217  $htcpTransID = current( $ids );
218  next( $ids );
219 
220  $htcpSpecifier = pack( 'na4na*na8n',
221  4, 'HEAD', strlen( $url ), $url,
222  8, 'HTTP/1.0', 0 );
223 
224  $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier );
225  $htcpLen = 4 + $htcpDataLen + 2;
226 
227  // Note! Squid gets the bit order of the first
228  // word wrong, wrt the RFC. Apparently no other
229  // implementation exists, so adapt to Squid
230  $htcpPacket = pack( 'nxxnCxNxxa*n',
231  $htcpLen, $htcpDataLen, $htcpOpCLR,
232  $htcpTransID, $htcpSpecifier, 2 );
233 
234  wfDebugLog( 'squid', __METHOD__ .
235  "Purging URL $url via HTCP" );
236  foreach ( $conf as $subconf ) {
237  socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
238  $subconf['host'], $subconf['port'] );
239  }
240  }
241  }
242 
257  public static function expand( $url ) {
258  return wfExpandUrl( $url, PROTO_INTERNAL );
259  }
260 
267  private static function getRuleForURL( $url, $rules ) {
268  foreach ( $rules as $regex => $routing ) {
269  if ( $regex === '' || preg_match( $regex, $url ) ) {
270  return $routing;
271  }
272  }
273 
274  return false;
275  }
276 }
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
const QUICK_VOLATILE
static HTCPPurge(array $urlArr)
Send Hyper Text Caching Protocol (HTCP) CLR requests.
$wgSquidServers
List of proxy servers to purge on changes; default port is 80.
$batch execute()
static purge(array $urlArr)
Purges a list of CDN nodes defined in $wgSquidServers.
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
static expand( $url)
Expand local URLs to fully-qualified URLs using the internal protocol and host defined in $wgInternal...
Handles purging appropriate CDN URLs given a title (or titles)
Interface that deferrable updates can implement to signal that updates can be combined.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
In both all secondary updates will be triggered handle like object that caches derived data representing a and can trigger updates of cached copies of that e g in the links the and the CDN layer DerivedPageDataUpdater is used by PageUpdater when creating new but can also be used independently when performing meta data updates during or when puring a page It s a stepping stone on the way to a more complete refactoring of WikiPage we want to define interfaces for the different use cases of particularly providing access to post PST content and ParserOutput to callbacks during revision which currently use and allowing updates to be triggered on purge
Definition: pageupdater.txt:78
static getRuleForURL( $url, $rules)
Find the HTCP routing rule to use for a given URL.
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:34
static newFromTitles( $titles, $urlArr=[])
Create an update object from an array of Title objects, or a TitleArray object.
const PROTO_INTERNAL
Definition: Defines.php:224
string [] $urls
Collection of URLs to purge.
An HTTP 1.0 client built for the purposes of purging Squid and Varnish.
doUpdate()
Purges the list of URLs passed to the constructor.
merge(MergeableUpdate $update)
Merge this update with $update.
__construct(array $urlArr)
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:925
$wgHTCPMulticastTTL
HTCP multicast TTL.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
static newSequentialPerNodeIDs( $bucket, $bits, $count, $flags=0)
Return IDs that are sequential only for this node and bucket.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition: linkcache.txt:17
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not...
static singleton( $domain=false)
$wgHTCPRouting
Routing configuration for HTCP multicast purging.
Job to purge a set of URLs from CDN.
Definition: CdnPurgeJob.php:30
$wgCdnReboundPurgeDelay
If set, any SquidPurge call on a URL or URLs will send a second purge no less than this many seconds ...