Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
17.93% |
26 / 145 |
|
40.00% |
4 / 10 |
CRAP | |
0.00% |
0 / 1 |
| CdnCacheUpdate | |
18.06% |
26 / 144 |
|
40.00% |
4 / 10 |
920.40 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
| merge | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| doUpdate | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
20 | |||
| purge | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
20 | |||
| getUrls | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| resolveReboundDelayByUrl | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
5 | |||
| HTCPPurge | |
0.00% |
0 / 52 |
|
0.00% |
0 / 1 |
182 | |||
| naivePurge | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
20 | |||
| expand | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getRuleForURL | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
20 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * @license GPL-2.0-or-later |
| 4 | * @file |
| 5 | */ |
| 6 | |
| 7 | namespace MediaWiki\Deferred; |
| 8 | |
| 9 | use Exception; |
| 10 | use InvalidArgumentException; |
| 11 | use MediaWiki\JobQueue\Jobs\CdnPurgeJob; |
| 12 | use MediaWiki\MainConfigNames; |
| 13 | use MediaWiki\MediaWikiServices; |
| 14 | use MediaWiki\Page\PageReference; |
| 15 | use RuntimeException; |
| 16 | use Wikimedia\Assert\Assert; |
| 17 | use Wikimedia\IPUtils; |
| 18 | |
| 19 | /** |
| 20 | * Handles purging the appropriate CDN objects given a list of URLs or Title instances |
| 21 | * @ingroup Cache |
| 22 | */ |
| 23 | class CdnCacheUpdate implements DeferrableUpdate, MergeableUpdate { |
| 24 | /** @var array[] List of (URL, rebound purge delay) tuples */ |
| 25 | private $urlTuples = []; |
| 26 | /** @var array[] List of (PageReference, rebound purge delay) tuples */ |
| 27 | private $pageTuples = []; |
| 28 | |
| 29 | /** @var int Maximum seconds of rebound purge delay */ |
| 30 | private const MAX_REBOUND_DELAY = 300; |
| 31 | |
| 32 | /** |
| 33 | * @param string[]|PageReference[] $targets Collection of URLs/titles to be purged from CDN |
| 34 | * @param array $options Options map. Supports: |
| 35 | * - reboundDelay: how many seconds after the first purge to send a rebound purge. |
| 36 | * No rebound purge will be sent if this is not positive. [Default: 0] |
| 37 | */ |
| 38 | public function __construct( array $targets, array $options = [] ) { |
| 39 | $delay = min( |
| 40 | (int)max( $options['reboundDelay'] ?? 0, 0 ), |
| 41 | self::MAX_REBOUND_DELAY |
| 42 | ); |
| 43 | |
| 44 | foreach ( $targets as $target ) { |
| 45 | if ( $target instanceof PageReference ) { |
| 46 | $this->pageTuples[] = [ $target, $delay ]; |
| 47 | } else { |
| 48 | $this->urlTuples[] = [ $target, $delay ]; |
| 49 | } |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | public function merge( MergeableUpdate $update ) { |
| 54 | /** @var self $update */ |
| 55 | Assert::parameterType( __CLASS__, $update, '$update' ); |
| 56 | '@phan-var self $update'; |
| 57 | |
| 58 | $this->urlTuples = array_merge( $this->urlTuples, $update->urlTuples ); |
| 59 | $this->pageTuples = array_merge( $this->pageTuples, $update->pageTuples ); |
| 60 | } |
| 61 | |
| 62 | public function doUpdate() { |
| 63 | // Resolve the final list of URLs just before purging them (T240083) |
| 64 | $reboundDelayByUrl = $this->resolveReboundDelayByUrl(); |
| 65 | |
| 66 | // Send the immediate purges to CDN |
| 67 | self::purge( array_keys( $reboundDelayByUrl ) ); |
| 68 | $immediatePurgeTimestamp = time(); |
| 69 | |
| 70 | // Get the URLs that need rebound purges, grouped by seconds of purge delay |
| 71 | $urlsWithReboundByDelay = []; |
| 72 | foreach ( $reboundDelayByUrl as $url => $delay ) { |
| 73 | if ( $delay > 0 ) { |
| 74 | $urlsWithReboundByDelay[$delay][] = $url; |
| 75 | } |
| 76 | } |
| 77 | // Enqueue delayed purge jobs for these URLs (usually only one job) |
| 78 | $jobs = []; |
| 79 | foreach ( $urlsWithReboundByDelay as $delay => $urls ) { |
| 80 | $jobs[] = new CdnPurgeJob( [ |
| 81 | 'urls' => $urls, |
| 82 | 'jobReleaseTimestamp' => $immediatePurgeTimestamp + $delay |
| 83 | ] ); |
| 84 | } |
| 85 | MediaWikiServices::getInstance()->getJobQueueGroup()->lazyPush( $jobs ); |
| 86 | } |
| 87 | |
| 88 | /** |
| 89 | * Purges a list of CDN nodes defined in $wgCdnServers. |
| 90 | * $urlArr should contain the full URLs to purge as values |
| 91 | * (example: $urlArr[] = 'http://my.host/something') |
| 92 | * |
| 93 | * @param string[] $urls List of full URLs to purge |
| 94 | */ |
| 95 | public static function purge( array $urls ) { |
| 96 | $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers ); |
| 97 | $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting ); |
| 98 | if ( !$urls ) { |
| 99 | return; |
| 100 | } |
| 101 | |
| 102 | // Remove duplicate URLs from list |
| 103 | $urls = array_unique( $urls ); |
| 104 | |
| 105 | wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urls ) ); |
| 106 | |
| 107 | // Reliably broadcast the purge to all edge nodes |
| 108 | $ts = microtime( true ); |
| 109 | $relayerGroup = MediaWikiServices::getInstance()->getEventRelayerGroup(); |
| 110 | $relayerGroup->getRelayer( 'cdn-url-purges' )->notifyMulti( |
| 111 | 'cdn-url-purges', |
| 112 | array_map( |
| 113 | static function ( $url ) use ( $ts ) { |
| 114 | return [ |
| 115 | 'url' => $url, |
| 116 | 'timestamp' => $ts, |
| 117 | ]; |
| 118 | }, |
| 119 | $urls |
| 120 | ) |
| 121 | ); |
| 122 | |
| 123 | // Send lossy UDP broadcasting if enabled |
| 124 | if ( $htcpRouting ) { |
| 125 | self::HTCPPurge( $urls ); |
| 126 | } |
| 127 | |
| 128 | // Do direct server purges if enabled (this does not scale very well) |
| 129 | if ( $cdnServers ) { |
| 130 | self::naivePurge( $urls ); |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | /** |
| 135 | * @return string[] List of URLs |
| 136 | */ |
| 137 | public function getUrls() { |
| 138 | return array_keys( $this->resolveReboundDelayByUrl() ); |
| 139 | } |
| 140 | |
| 141 | /** |
| 142 | * @return int[] Map of (URL => rebound purge delay) |
| 143 | */ |
| 144 | private function resolveReboundDelayByUrl() { |
| 145 | $services = MediaWikiServices::getInstance(); |
| 146 | /** @var PageReference $page */ |
| 147 | |
| 148 | // Avoid multiple queries for HTMLCacheUpdater::getUrls() call |
| 149 | $lb = $services->getLinkBatchFactory()->newLinkBatch() |
| 150 | ->setCaller( __METHOD__ ); |
| 151 | foreach ( $this->pageTuples as [ $page, ] ) { |
| 152 | $lb->addObj( $page ); |
| 153 | } |
| 154 | $lb->execute(); |
| 155 | |
| 156 | $reboundDelayByUrl = []; |
| 157 | |
| 158 | // Resolve the titles into CDN URLs |
| 159 | $htmlCacheUpdater = $services->getHtmlCacheUpdater(); |
| 160 | foreach ( $this->pageTuples as [ $page, $delay ] ) { |
| 161 | foreach ( $htmlCacheUpdater->getUrls( $page ) as $url ) { |
| 162 | // Use the highest rebound for duplicate URLs in order to handle the most lag |
| 163 | $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay ); |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | foreach ( $this->urlTuples as [ $url, $delay ] ) { |
| 168 | // Use the highest rebound for duplicate URLs in order to handle the most lag |
| 169 | $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay ); |
| 170 | } |
| 171 | |
| 172 | return $reboundDelayByUrl; |
| 173 | } |
| 174 | |
| 175 | /** |
| 176 | * Send Hyper Text Caching Protocol (HTCP) CLR requests |
| 177 | * |
| 178 | * @param string[] $urls Collection of URLs to purge |
| 179 | */ |
| 180 | private static function HTCPPurge( array $urls ) { |
| 181 | $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting ); |
| 182 | $htcpMulticastTTL = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPMulticastTTL ); |
| 183 | // HTCP CLR operation |
| 184 | $htcpOpCLR = 4; |
| 185 | |
| 186 | // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h) |
| 187 | if ( !defined( "IPPROTO_IP" ) ) { |
| 188 | define( "IPPROTO_IP", 0 ); |
| 189 | define( "IP_MULTICAST_LOOP", 34 ); |
| 190 | define( "IP_MULTICAST_TTL", 33 ); |
| 191 | } |
| 192 | |
| 193 | // pfsockopen doesn't work because we need set_sock_opt |
| 194 | $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP ); |
| 195 | if ( !$conn ) { |
| 196 | $errstr = socket_strerror( socket_last_error() ); |
| 197 | wfDebugLog( 'squid', __METHOD__ . |
| 198 | ": Error opening UDP socket: $errstr" ); |
| 199 | |
| 200 | return; |
| 201 | } |
| 202 | |
| 203 | // Set socket options |
| 204 | socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 ); |
| 205 | if ( $htcpMulticastTTL != 1 ) { |
| 206 | // Set multicast time to live (hop count) option on socket |
| 207 | socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL, |
| 208 | $htcpMulticastTTL ); |
| 209 | } |
| 210 | |
| 211 | // Get sequential trx IDs for packet loss counting |
| 212 | $idGenerator = MediaWikiServices::getInstance()->getGlobalIdGenerator(); |
| 213 | $ids = $idGenerator->newSequentialPerNodeIDs( |
| 214 | 'squidhtcppurge', |
| 215 | 32, |
| 216 | count( $urls ) |
| 217 | ); |
| 218 | |
| 219 | foreach ( $urls as $url ) { |
| 220 | if ( !is_string( $url ) ) { |
| 221 | throw new InvalidArgumentException( 'Bad purge URL' ); |
| 222 | } |
| 223 | $url = self::expand( $url ); |
| 224 | $conf = self::getRuleForURL( $url, $htcpRouting ); |
| 225 | if ( !$conf ) { |
| 226 | wfDebugLog( 'squid', __METHOD__ . |
| 227 | "No HTCP rule configured for URL {$url} , skipping" ); |
| 228 | continue; |
| 229 | } |
| 230 | |
| 231 | if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) { |
| 232 | // Normalize single entries |
| 233 | $conf = [ $conf ]; |
| 234 | } |
| 235 | foreach ( $conf as $subconf ) { |
| 236 | if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) { |
| 237 | throw new RuntimeException( "Invalid HTCP rule for URL $url\n" ); |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | // Construct a minimal HTCP request diagram |
| 242 | // as per RFC 2756 |
| 243 | // Opcode 'CLR', no response desired, no auth |
| 244 | $htcpTransID = current( $ids ); |
| 245 | next( $ids ); |
| 246 | |
| 247 | $htcpSpecifier = pack( 'na4na*na8n', |
| 248 | 4, 'HEAD', strlen( $url ), $url, |
| 249 | 8, 'HTTP/1.0', 0 ); |
| 250 | |
| 251 | $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier ); |
| 252 | $htcpLen = 4 + $htcpDataLen + 2; |
| 253 | |
| 254 | // Note! Squid gets the bit order of the first |
| 255 | // word wrong, wrt the RFC. Apparently no other |
| 256 | // implementation exists, so adapt to Squid |
| 257 | $htcpPacket = pack( 'nxxnCxNxxa*n', |
| 258 | $htcpLen, $htcpDataLen, $htcpOpCLR, |
| 259 | $htcpTransID, $htcpSpecifier, 2 ); |
| 260 | |
| 261 | wfDebugLog( 'squid', __METHOD__ . |
| 262 | "Purging URL $url via HTCP" ); |
| 263 | foreach ( $conf as $subconf ) { |
| 264 | socket_sendto( $conn, $htcpPacket, $htcpLen, 0, |
| 265 | $subconf['host'], $subconf['port'] ); |
| 266 | } |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | /** |
| 271 | * Send HTTP PURGE requests for each of the URLs to all of the cache servers |
| 272 | * |
| 273 | * @param string[] $urls |
| 274 | * @throws Exception |
| 275 | */ |
| 276 | private static function naivePurge( array $urls ) { |
| 277 | $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers ); |
| 278 | |
| 279 | $reqs = []; |
| 280 | foreach ( $urls as $url ) { |
| 281 | $url = self::expand( $url ); |
| 282 | $urlInfo = wfGetUrlUtils()->parse( $url ) ?? false; |
| 283 | $urlHost = strlen( $urlInfo['port'] ?? '' ) |
| 284 | ? IPUtils::combineHostAndPort( $urlInfo['host'], (int)$urlInfo['port'] ) |
| 285 | : $urlInfo['host']; |
| 286 | $baseReq = [ |
| 287 | 'method' => 'PURGE', |
| 288 | 'url' => $url, |
| 289 | 'headers' => [ |
| 290 | 'Host' => $urlHost, |
| 291 | 'Connection' => 'Keep-Alive', |
| 292 | 'Proxy-Connection' => 'Keep-Alive', |
| 293 | 'User-Agent' => 'MediaWiki/' . MW_VERSION . ' ' . __CLASS__ |
| 294 | ] |
| 295 | ]; |
| 296 | foreach ( $cdnServers as $server ) { |
| 297 | $reqs[] = ( $baseReq + [ 'proxy' => $server ] ); |
| 298 | } |
| 299 | } |
| 300 | |
| 301 | $http = MediaWikiServices::getInstance()->getHttpRequestFactory() |
| 302 | ->createMultiClient( [ 'maxConnsPerHost' => 8, 'usePipelining' => true ] ); |
| 303 | $http->runMulti( $reqs ); |
| 304 | } |
| 305 | |
| 306 | /** |
| 307 | * Expand local URLs to fully-qualified URLs using the internal protocol |
| 308 | * and host defined in $wgInternalServer. Input that's already fully- |
| 309 | * qualified will be passed through unchanged. |
| 310 | * |
| 311 | * This is used to generate purge URLs that may be either local to the |
| 312 | * main wiki or include a non-native host, such as images hosted on a |
| 313 | * second internal server. |
| 314 | * |
| 315 | * Client functions should not need to call this. |
| 316 | * |
| 317 | * @param string $url |
| 318 | * @return string |
| 319 | */ |
| 320 | private static function expand( $url ) { |
| 321 | return (string)MediaWikiServices::getInstance()->getUrlUtils()->expand( $url, PROTO_INTERNAL ); |
| 322 | } |
| 323 | |
| 324 | /** |
| 325 | * Find the HTCP routing rule to use for a given URL. |
| 326 | * @param string $url URL to match |
| 327 | * @param array $rules Array of rules, see $wgHTCPRouting for format and behavior |
| 328 | * @return mixed Element of $rules that matched, or false if nothing matched |
| 329 | */ |
| 330 | private static function getRuleForURL( $url, $rules ) { |
| 331 | foreach ( $rules as $regex => $routing ) { |
| 332 | if ( $regex === '' || preg_match( $regex, $url ) ) { |
| 333 | return $routing; |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | return false; |
| 338 | } |
| 339 | } |
| 340 | |
| 341 | /** @deprecated class alias since 1.42 */ |
| 342 | class_alias( CdnCacheUpdate::class, 'CdnCacheUpdate' ); |