Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 284 |
|
0.00% |
0 / 16 |
CRAP | |
0.00% |
0 / 1 |
| RSSParser | |
0.00% |
0 / 284 |
|
0.00% |
0 / 16 |
6320 | |
0.00% |
0 / 1 |
| explodeOnSpaces | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| __construct | |
0.00% |
0 / 40 |
|
0.00% |
0 / 1 |
240 | |||
| insertStripItem | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
| fetch | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
| loadFromCache | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
30 | |||
| storeInCache | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
| fetchRemote | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
56 | |||
| sandboxParse | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
| renderFeed | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
90 | |||
| renderItem | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
56 | |||
| sanitizeUrl | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
| escapeTemplateParameter | |
0.00% |
0 / 45 |
|
0.00% |
0 / 1 |
30 | |||
| responseToXML | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
56 | |||
| canDisplay | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
30 | |||
| filter | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
| highlightTerms | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
6 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Extension\RSS; |
| 4 | |
| 5 | use DOMDocument; |
| 6 | use MediaWiki\Content\TextContent; |
| 7 | use MediaWiki\MediaWikiServices; |
| 8 | use MediaWiki\Parser\Parser; |
| 9 | use MediaWiki\Parser\ParserFactory; |
| 10 | use MediaWiki\Parser\PPFrame; |
| 11 | use MediaWiki\Parser\Sanitizer; |
| 12 | use MediaWiki\Status\Status; |
| 13 | use MediaWiki\Title\Title; |
| 14 | use MWHttpRequest; |
| 15 | use Wikimedia\ObjectCache\WANObjectCache; |
| 16 | |
| 17 | class RSSParser { |
| 18 | /** @var int */ |
| 19 | protected $maxheads = 32; |
| 20 | /** @var string */ |
| 21 | protected $date = "Y-m-d H:i:s"; |
| 22 | /** @var int */ |
| 23 | protected $ItemMaxLength = 200; |
| 24 | /** @var bool */ |
| 25 | protected $reversed = false; |
| 26 | /** @var string[] */ |
| 27 | protected $highlight = []; |
| 28 | /** @var string[] */ |
| 29 | protected $filter = []; |
| 30 | /** @var string[] */ |
| 31 | protected $filterOut = []; |
| 32 | /** @var string|null */ |
| 33 | protected $itemTemplate; |
| 34 | /** @var string|null */ |
| 35 | protected $url; |
| 36 | /** @var string */ |
| 37 | protected $etag; |
| 38 | /** @var int */ |
| 39 | protected $lastModified; |
| 40 | /** @var DOMDocument */ |
| 41 | protected $xml; |
| 42 | /** @var string[] */ |
| 43 | protected $displayFields = [ 'author', 'title', 'encodedContent', 'description' ]; |
| 44 | /** @var string[] */ |
| 45 | protected $stripItems; |
| 46 | /** @var string */ |
| 47 | protected $markerString; |
| 48 | |
| 49 | /** |
| 50 | * @var WANObjectCache |
| 51 | */ |
| 52 | private $cache; |
| 53 | |
| 54 | /** |
| 55 | * @var ParserFactory |
| 56 | */ |
| 57 | private $parserFactory; |
| 58 | |
| 59 | /** |
| 60 | * @var RSSData|null |
| 61 | */ |
| 62 | public $rss; |
| 63 | |
| 64 | /** |
| 65 | * @var MWHttpRequest |
| 66 | */ |
| 67 | public $client; |
| 68 | |
| 69 | /** |
| 70 | * Convenience function that takes a space-separated string and returns an array of words |
| 71 | * @param string $str list of words |
| 72 | * @return array words found |
| 73 | */ |
| 74 | private static function explodeOnSpaces( $str ) { |
| 75 | $found = preg_split( '# +#', $str ); |
| 76 | return is_array( $found ) ? $found : []; |
| 77 | } |
| 78 | |
| 79 | /** |
| 80 | * Take a bit of WikiText that looks like |
| 81 | * <rss max=5>http://example.com/</rss> |
| 82 | * and return an object that can produce rendered output. |
| 83 | * @param string $url |
| 84 | * @param array $args |
| 85 | */ |
| 86 | public function __construct( $url, $args ) { |
| 87 | global $wgRSSDateDefaultFormat, $wgRSSItemMaxLength; |
| 88 | |
| 89 | $this->url = $url; |
| 90 | |
| 91 | // Quote marks to prevent XSS (T307028) |
| 92 | $this->markerString = "'\"" . wfRandomString( 32 ); |
| 93 | $this->stripItems = []; |
| 94 | $this->cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
| 95 | $this->parserFactory = MediaWikiServices::getInstance()->getParserFactory(); |
| 96 | |
| 97 | # Get max number of headlines from argument-array |
| 98 | if ( isset( $args['max'] ) ) { |
| 99 | $this->maxheads = $args['max']; |
| 100 | } |
| 101 | |
| 102 | # Get reverse flag from argument array |
| 103 | if ( isset( $args['reverse'] ) ) { |
| 104 | $this->reversed = true; |
| 105 | } |
| 106 | |
| 107 | # Get date format from argument array |
| 108 | # or use a default value |
| 109 | # @todo FIXME: not used yet |
| 110 | if ( isset( $args['date'] ) ) { |
| 111 | $this->date = $args['date']; |
| 112 | } elseif ( $wgRSSDateDefaultFormat !== null ) { |
| 113 | $this->date = $wgRSSDateDefaultFormat; |
| 114 | } |
| 115 | |
| 116 | # Get highlight terms from argument array |
| 117 | if ( isset( $args['highlight'] ) ) { |
| 118 | # mapping to lowercase here so the regex can be case insensitive below. |
| 119 | $this->highlight = self::explodeOnSpaces( $args['highlight'] ); |
| 120 | } |
| 121 | |
| 122 | # Get filter terms from argument array |
| 123 | if ( isset( $args['filter'] ) ) { |
| 124 | $this->filter = self::explodeOnSpaces( $args['filter'] ); |
| 125 | } |
| 126 | |
| 127 | # Get a maximal length for item texts |
| 128 | if ( isset( $args['item-max-length'] ) ) { |
| 129 | $this->ItemMaxLength = $args['item-max-length']; |
| 130 | } elseif ( is_numeric( $wgRSSItemMaxLength ) ) { |
| 131 | $this->ItemMaxLength = $wgRSSItemMaxLength; |
| 132 | } |
| 133 | |
| 134 | if ( isset( $args['filterout'] ) ) { |
| 135 | $this->filterOut = self::explodeOnSpaces( $args['filterout'] ); |
| 136 | } |
| 137 | |
| 138 | // 'template' is the pagename of a user's itemTemplate including |
| 139 | // a further pagename for the feedTemplate |
| 140 | // In that way everything is handled via these two pages |
| 141 | // and no default pages or templates are used. |
| 142 | |
| 143 | // 'templatename' is an optional pagename of a user's feedTemplate |
| 144 | // In that way it substitutes $1 (default: RSSPost) in MediaWiki:Rss-item |
| 145 | |
| 146 | if ( isset( $args['template'] ) ) { |
| 147 | $itemTemplateTitleObject = Title::newFromText( $args['template'], NS_TEMPLATE ); |
| 148 | |
| 149 | if ( $itemTemplateTitleObject->exists() ) { |
| 150 | $itemTemplatePageObject = MediaWikiServices::getInstance() |
| 151 | ->getWikiPageFactory() |
| 152 | ->newFromTitle( $itemTemplateTitleObject ); |
| 153 | $itemTemplateContentObject = $itemTemplatePageObject->getContent(); |
| 154 | |
| 155 | if ( $itemTemplateContentObject instanceof TextContent ) { |
| 156 | $this->itemTemplate = $itemTemplateContentObject->getText(); |
| 157 | } |
| 158 | } |
| 159 | } else { |
| 160 | if ( isset( $args['templatename'] ) ) { |
| 161 | $feedTemplatePagename = $args['templatename']; |
| 162 | } else { |
| 163 | |
| 164 | // compatibility patch for rss extension |
| 165 | |
| 166 | $feedTemplatePagename = 'RSSPost'; |
| 167 | $feedTemplateTitleObject = Title::newFromText( $feedTemplatePagename, NS_TEMPLATE ); |
| 168 | |
| 169 | if ( !$feedTemplateTitleObject->exists() ) { |
| 170 | $feedTemplatePagename = Title::makeTitleSafe( NS_MEDIAWIKI, 'Rss-feed' ); |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | // MediaWiki:Rss-item = {{ feedTemplatePagename | title = {{{title}}} | ... }} |
| 175 | |
| 176 | // if the attribute parameter templatename= is not present |
| 177 | // then it defaults to |
| 178 | // {{ Template:RSSPost | title = {{{title}}} | ... }} |
| 179 | // - if Template:RSSPost exists from pre-1.9 versions |
| 180 | // {{ MediaWiki:Rss-feed | title = {{{title}}} | ... }} |
| 181 | // - otherwise |
| 182 | |
| 183 | $this->itemTemplate = wfMessage( 'rss-item', $feedTemplatePagename )->plain(); |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | private function insertStripItem( string $item ): string { |
| 188 | $this->stripItems[] = $item; |
| 189 | $itemIndex = count( $this->stripItems ) - 1; |
| 190 | return "{$this->markerString}-{$itemIndex}-{$this->markerString}"; |
| 191 | } |
| 192 | |
| 193 | /** |
| 194 | * Return RSS object for the given URL, maintaining caching. |
| 195 | * |
| 196 | * NOTES ON RETRIEVING REMOTE FILES: |
| 197 | * No attempt will be made to fetch remote files if there is something in cache. |
| 198 | * |
| 199 | * NOTES ON FAILED REQUESTS: |
| 200 | * If there is an HTTP error while fetching an RSS object, the cached version |
| 201 | * will be returned, if it exists. |
| 202 | * |
| 203 | * @return Status object |
| 204 | */ |
| 205 | public function fetch() { |
| 206 | if ( $this->url === null ) { |
| 207 | return Status::newFatal( 'rss-fetch-nourl' ); |
| 208 | } |
| 209 | |
| 210 | // Flow |
| 211 | // 1. check cache |
| 212 | // 2. if there is a hit, make sure its fresh |
| 213 | // 3. if cached obj fails freshness check, fetch remote |
| 214 | // 4. if remote fails, return stale object, or error |
| 215 | $key = $this->cache->makeKey( 'rss-fetch', $this->url ); |
| 216 | $cachedFeed = $this->loadFromCache( $key ); |
| 217 | if ( $cachedFeed !== false ) { |
| 218 | wfDebugLog( 'RSS', 'Outputting cached feed for ' . $this->url ); |
| 219 | return Status::newGood(); |
| 220 | } |
| 221 | wfDebugLog( 'RSS', 'Cache Failed, fetching ' . $this->url . ' from remote.' ); |
| 222 | |
| 223 | return $this->fetchRemote( $key ); |
| 224 | } |
| 225 | |
| 226 | /** |
| 227 | * Retrieve the URL from the cache |
| 228 | * @param string $key lookup key to associate with this item |
| 229 | * @return bool |
| 230 | */ |
| 231 | protected function loadFromCache( $key ) { |
| 232 | global $wgRSSCacheCompare; |
| 233 | |
| 234 | $data = $this->cache->get( $key ); |
| 235 | if ( !is_array( $data ) ) { |
| 236 | return false; |
| 237 | } |
| 238 | |
| 239 | [ $etag, $lastModified, $rss ] = $data; |
| 240 | if ( !isset( $rss->items ) ) { |
| 241 | return false; |
| 242 | } |
| 243 | |
| 244 | wfDebugLog( 'RSS', "Got '$key' from cache" ); |
| 245 | |
| 246 | # Now that we've verified that we got useful data, keep it around. |
| 247 | $this->rss = $rss; |
| 248 | $this->etag = $etag; |
| 249 | $this->lastModified = $lastModified; |
| 250 | |
| 251 | // We only care if $wgRSSCacheCompare is > 0 |
| 252 | if ( $wgRSSCacheCompare && time() - $wgRSSCacheCompare > $lastModified ) { |
| 253 | wfDebugLog( 'RSS', 'Content is old enough that we need to check cached content' ); |
| 254 | return false; |
| 255 | } |
| 256 | |
| 257 | return true; |
| 258 | } |
| 259 | |
| 260 | /** |
| 261 | * Store these objects (i.e. etag, lastModified, and RSS) in the cache. |
| 262 | * @param string $key lookup key to associate with this item |
| 263 | * @return bool |
| 264 | */ |
| 265 | protected function storeInCache( $key ) { |
| 266 | global $wgRSSCacheAge; |
| 267 | |
| 268 | if ( $this->rss === null ) { |
| 269 | return false; |
| 270 | } |
| 271 | |
| 272 | $this->cache->set( |
| 273 | $key, |
| 274 | [ $this->etag, $this->lastModified, $this->rss ], |
| 275 | $wgRSSCacheAge |
| 276 | ); |
| 277 | |
| 278 | wfDebugLog( 'RSS', "Stored '$key' as in cache" ); |
| 279 | |
| 280 | return true; |
| 281 | } |
| 282 | |
| 283 | /** |
| 284 | * Retrieve a feed. |
| 285 | * @param string $key Cache key |
| 286 | * @param array $headers headers to send along with the request |
| 287 | * @return Status object |
| 288 | */ |
| 289 | protected function fetchRemote( $key, array $headers = [] ) { |
| 290 | global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy, |
| 291 | $wgRSSUrlNumberOfAllowedRedirects; |
| 292 | |
| 293 | if ( $this->etag ) { |
| 294 | wfDebugLog( 'RSS', 'Used etag: ' . $this->etag ); |
| 295 | $headers['If-None-Match'] = $this->etag; |
| 296 | } |
| 297 | if ( $this->lastModified ) { |
| 298 | $lastModified = gmdate( 'r', $this->lastModified ); |
| 299 | wfDebugLog( 'RSS', "Used last modified: $lastModified" ); |
| 300 | $headers['If-Modified-Since'] = $lastModified; |
| 301 | } |
| 302 | |
| 303 | /** |
| 304 | * 'noProxy' can conditionally be set as shown in the commented |
| 305 | * example below; in HttpRequest 'noProxy' takes precedence over |
| 306 | * any value of 'proxy' and disables the use of a proxy. |
| 307 | * |
| 308 | * This is useful if you run the wiki in an intranet and need to |
| 309 | * access external feed urls through a proxy but internal feed |
| 310 | * urls must be accessed without a proxy. |
| 311 | * |
| 312 | * The general handling of such cases will be subject of a |
| 313 | * forthcoming version. |
| 314 | */ |
| 315 | |
| 316 | $url = $this->url; |
| 317 | $noProxy = $wgRSSProxy === null; |
| 318 | |
| 319 | // Example for disabling proxy use for certain urls |
| 320 | // $noProxy = preg_match( '!\.internal\.example\.com$!i', parse_url( $url, PHP_URL_HOST ) ); |
| 321 | |
| 322 | if ( $wgRSSUrlNumberOfAllowedRedirects !== null |
| 323 | && is_numeric( $wgRSSUrlNumberOfAllowedRedirects ) ) { |
| 324 | $maxRedirects = $wgRSSUrlNumberOfAllowedRedirects; |
| 325 | } else { |
| 326 | $maxRedirects = 0; |
| 327 | } |
| 328 | |
| 329 | // we set followRedirects intentionally to true to see error messages |
| 330 | // in cases where the maximum number of redirects is reached |
| 331 | $client = MediaWikiServices::getInstance()->getHttpRequestFactory()->create( $url, |
| 332 | [ |
| 333 | 'timeout' => $wgRSSFetchTimeout, |
| 334 | 'followRedirects' => true, |
| 335 | 'maxRedirects' => $maxRedirects, |
| 336 | 'proxy' => $wgRSSProxy, |
| 337 | 'noProxy' => $noProxy, |
| 338 | 'userAgent' => $wgRSSUserAgent, |
| 339 | ], |
| 340 | __METHOD__ |
| 341 | ); |
| 342 | |
| 343 | foreach ( $headers as $header => $value ) { |
| 344 | $client->setHeader( $header, $value ); |
| 345 | } |
| 346 | |
| 347 | $fetch = $client->execute(); |
| 348 | $this->client = $client; |
| 349 | |
| 350 | if ( !$fetch->isGood() ) { |
| 351 | wfDebug( 'RSS', 'Request Failed: ' . Status::wrap( $fetch )->getWikitext() ); |
| 352 | return $fetch; |
| 353 | } |
| 354 | |
| 355 | return $this->responseToXML( $key ); |
| 356 | } |
| 357 | |
| 358 | /** |
| 359 | * @see https://bugzilla.wikimedia.org/show_bug.cgi?id=34763 |
| 360 | * @param string $wikiText |
| 361 | * @param Parser $origParser |
| 362 | * @return string |
| 363 | */ |
| 364 | protected function sandboxParse( $wikiText, $origParser ) { |
| 365 | $myParser = $this->parserFactory->getInstance(); |
| 366 | $result = $myParser->parse( |
| 367 | $wikiText, |
| 368 | $origParser->getTitle(), |
| 369 | $origParser->getOptions() |
| 370 | ); |
| 371 | |
| 372 | $stripItems = $this->stripItems; |
| 373 | return preg_replace_callback( |
| 374 | "/{$this->markerString}-(\d+)-{$this->markerString}/", |
| 375 | static function ( array $matches ) use ( $stripItems ) { |
| 376 | $markerIndex = (int)$matches[1]; |
| 377 | return $stripItems[$markerIndex]; |
| 378 | }, |
| 379 | $result->getRawText() |
| 380 | ); |
| 381 | } |
| 382 | |
| 383 | /** |
| 384 | * Render the entire feed so that each item is passed to the |
| 385 | * template which the MediaWiki then displays. |
| 386 | * |
| 387 | * @param Parser $parser |
| 388 | * @param PPFrame $frame The frame param to pass to recursiveTagParse() |
| 389 | * @return string |
| 390 | */ |
| 391 | public function renderFeed( $parser, $frame ) { |
| 392 | $renderedFeed = ''; |
| 393 | $wikitextFeed = ''; |
| 394 | |
| 395 | if ( $this->itemTemplate !== null && $parser && $frame ) { |
| 396 | $headcnt = 0; |
| 397 | if ( $this->reversed ) { |
| 398 | $this->rss->items = array_reverse( $this->rss->items ); |
| 399 | } |
| 400 | |
| 401 | foreach ( $this->rss->items as $item ) { |
| 402 | if ( $this->maxheads > 0 && $headcnt >= $this->maxheads ) { |
| 403 | continue; |
| 404 | } |
| 405 | |
| 406 | if ( $this->canDisplay( $item ) ) { |
| 407 | $wikitextFeed .= $this->renderItem( $item, $parser ) . "\n"; |
| 408 | $headcnt++; |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | $renderedFeed = $this->sandboxParse( $wikitextFeed, $parser ); |
| 413 | |
| 414 | } |
| 415 | |
| 416 | $parser->addTrackingCategory( 'rss-tracking-category' ); |
| 417 | |
| 418 | return $renderedFeed; |
| 419 | } |
| 420 | |
| 421 | /** |
| 422 | * Render each item, filtering it out if necessary, applying any highlighting. |
| 423 | * |
| 424 | * @param array $item an array produced by RSSData where keys are the names of the RSS elements |
| 425 | * @param Parser $parser |
| 426 | * @return mixed |
| 427 | */ |
| 428 | protected function renderItem( $item, $parser ) { |
| 429 | $renderedItem = $this->itemTemplate ?? ''; |
| 430 | |
| 431 | // $info will only be an XML element name, so we're safe using it. |
| 432 | // $item[$info] is handled by the XML parser -- |
| 433 | // and that means bad RSS with stuff like |
| 434 | // <description><script>alert("hi")</script></description> will find its |
| 435 | // rogue <script> tags neutered. |
| 436 | // use the overloaded multi byte wrapper functions in GlobalFunctions.php |
| 437 | |
| 438 | foreach ( array_keys( $item ) as $info ) { |
| 439 | if ( $item[$info] != "" ) { |
| 440 | switch ( $info ) { |
| 441 | // ATOM <id> elements and RSS <link> elements are item link urls |
| 442 | case 'id': |
| 443 | $txt = $this->sanitizeUrl( $item['id'] ); |
| 444 | $renderedItem = str_replace( '{{{link}}}', $txt, $renderedItem ); |
| 445 | break; |
| 446 | case 'link': |
| 447 | $txt = $this->sanitizeUrl( $item['link'] ); |
| 448 | $renderedItem = str_replace( '{{{link}}}', $txt, $renderedItem ); |
| 449 | break; |
| 450 | case 'date': |
| 451 | $tempTimezone = date_default_timezone_get(); |
| 452 | date_default_timezone_set( 'UTC' ); |
| 453 | $txt = date( $this->date, |
| 454 | strtotime( $this->escapeTemplateParameter( $item['date'] ) ) ); |
| 455 | date_default_timezone_set( $tempTimezone ); |
| 456 | $renderedItem = str_replace( '{{{date}}}', $txt, $renderedItem ); |
| 457 | break; |
| 458 | default: |
| 459 | $str = $this->escapeTemplateParameter( $item[$info] ); |
| 460 | $str = $parser->getTargetLanguage()->truncateHTML( $str, $this->ItemMaxLength ); |
| 461 | $str = $this->highlightTerms( $str ); |
| 462 | $renderedItem = str_replace( '{{{' . $info . '}}}', |
| 463 | $this->insertStripItem( $str ), $renderedItem ); |
| 464 | } |
| 465 | } |
| 466 | } |
| 467 | |
| 468 | // nullify all remaining info items in the template |
| 469 | // without a corresponding info in the current feed item |
| 470 | |
| 471 | return preg_replace( "!{{{[^}]+}}}!U", "", $renderedItem ); |
| 472 | } |
| 473 | |
| 474 | /** |
| 475 | * Sanitize a URL for inclusion in wikitext. Escapes characters that have |
| 476 | * a special meaning in wikitext, replacing them with URL escape codes, so |
| 477 | * that arbitrary input can be included as a free or bracketed external |
| 478 | * link and both work and be safe. |
| 479 | * @param string $url |
| 480 | * @return string |
| 481 | */ |
| 482 | protected function sanitizeUrl( $url ) { |
| 483 | # Remove control characters |
| 484 | $url = preg_replace( '/[\000-\037\177]/', '', trim( $url ) ); |
| 485 | # Escape other problematic characters |
| 486 | $out = ''; |
| 487 | for ( $i = 0, $len = strlen( $url ); $i < $len; $i++ ) { |
| 488 | $boringLength = strcspn( $url, '<>"[|]\ {', $i ); |
| 489 | if ( $boringLength ) { |
| 490 | $out .= substr( $url, $i, $boringLength ); |
| 491 | $i += $boringLength; |
| 492 | } |
| 493 | if ( $i < $len ) { |
| 494 | $out .= rawurlencode( $url[$i] ); |
| 495 | } |
| 496 | } |
| 497 | return $out; |
| 498 | } |
| 499 | |
| 500 | /** |
| 501 | * Sanitize user input for inclusion as a template parameter. |
| 502 | * |
| 503 | * Unlike in wfEscapeWikiText() as of r77127, this escapes }} in addition |
| 504 | * to the other kinds of markup, to avoid user input ending a template |
| 505 | * invocation. |
| 506 | * |
| 507 | * If you want to allow clickable link Urls (HTML <a> tag) in RSS feeds: |
| 508 | * $wgRSSAllowLinkTag = true; |
| 509 | * |
| 510 | * If you want to allow images (HTML <img> tag) in RSS feeds: |
| 511 | * $wgRSSAllowImageTag = true; |
| 512 | * @param string $text |
| 513 | * @return string |
| 514 | */ |
| 515 | protected function escapeTemplateParameter( $text ) { |
| 516 | global $wgRSSAllowLinkTag, $wgRSSAllowImageTag; |
| 517 | |
| 518 | $extraInclude = []; |
| 519 | $extraExclude = [ "iframe" ]; |
| 520 | |
| 521 | if ( $wgRSSAllowLinkTag ) { |
| 522 | $extraInclude[] = "a"; |
| 523 | } else { |
| 524 | $extraExclude[] = "a"; |
| 525 | } |
| 526 | |
| 527 | if ( $wgRSSAllowImageTag ) { |
| 528 | $extraInclude[] = "img"; |
| 529 | } else { |
| 530 | $extraExclude[] = "img"; |
| 531 | } |
| 532 | |
| 533 | if ( $wgRSSAllowLinkTag || $wgRSSAllowImageTag ) { |
| 534 | $ret = Sanitizer::removeSomeTags( $text, [ |
| 535 | 'extraTags' => $extraInclude, |
| 536 | 'removeTags' => $extraExclude, |
| 537 | ] ); |
| 538 | |
| 539 | } else { |
| 540 | // use the old escape method for a while |
| 541 | |
| 542 | $text = str_replace( |
| 543 | [ |
| 544 | '[', '|', ']', '\'', 'ISBN ', |
| 545 | 'RFC ', '://', "\n=", '{{', '}}', |
| 546 | ], |
| 547 | [ |
| 548 | '[', '|', ']', ''', 'ISBN ', |
| 549 | 'RFC ', '://', "\n=", '{{', '}}', |
| 550 | ], |
| 551 | htmlspecialchars( str_replace( "\n", "", $text ) ) |
| 552 | ); |
| 553 | |
| 554 | // keep some basic layout tags |
| 555 | $ret = str_replace( |
| 556 | [ |
| 557 | '<p>', '</p>', |
| 558 | '<br/>', '<br>', '</br>', |
| 559 | '<b>', '</b>', |
| 560 | '<i>', '</i>', |
| 561 | '<u>', '</u>', |
| 562 | '<s>', '</s>', |
| 563 | ], |
| 564 | [ |
| 565 | "", "<br/>", |
| 566 | "<br/>", "<br/>", "<br/>", |
| 567 | "'''", "'''", |
| 568 | "''", "''", |
| 569 | "<u>", "</u>", |
| 570 | "<s>", "</s>", |
| 571 | ], |
| 572 | $text |
| 573 | ); |
| 574 | } |
| 575 | |
| 576 | return $ret; |
| 577 | } |
| 578 | |
| 579 | /** |
| 580 | * Parse an HTTP response object into an array of relevant RSS data |
| 581 | * |
| 582 | * @param string $key the key to use to store the parsed response in the cache |
| 583 | * @return Status parsed RSS object (see RSSParse) or false |
| 584 | */ |
| 585 | protected function responseToXML( $key ) { |
| 586 | wfDebugLog( 'RSS', "Got '" . $this->client->getStatus() . "', updating cache for $key" ); |
| 587 | if ( $this->client->getStatus() === 304 ) { |
| 588 | # Not modified, update cache |
| 589 | wfDebugLog( 'RSS', "Got 304, updating cache for $key" ); |
| 590 | $this->storeInCache( $key ); |
| 591 | } else { |
| 592 | $this->xml = new DOMDocument; |
| 593 | $rawXML = $this->client->getContent(); |
| 594 | |
| 595 | if ( $rawXML == '' ) { |
| 596 | return Status::newFatal( 'rss-parse-error', 'No XML content' ); |
| 597 | } |
| 598 | |
| 599 | $oldDisable = false; |
| 600 | if ( LIBXML_VERSION < 20900 ) { |
| 601 | // Prevent loading external entities when parsing the XML (bug 46932) |
| 602 | $oldDisable = libxml_disable_entity_loader( true ); |
| 603 | } |
| 604 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
| 605 | @$this->xml->loadXML( $rawXML ); |
| 606 | if ( LIBXML_VERSION < 20900 ) { |
| 607 | libxml_disable_entity_loader( $oldDisable ); |
| 608 | } |
| 609 | |
| 610 | $this->rss = new RSSData( $this->xml ); |
| 611 | |
| 612 | // if RSS parsed successfully |
| 613 | if ( $this->rss && !$this->rss->error ) { |
| 614 | $this->etag = $this->client->getResponseHeader( 'Etag' ); |
| 615 | $lastModifiedHeader = $this->client->getResponseHeader( 'Last-Modified' ) ?? ''; |
| 616 | $this->lastModified = strtotime( $lastModifiedHeader ); |
| 617 | |
| 618 | wfDebugLog( 'RSS', 'Stored etag (' . $this->etag . ') and Last-Modified (' . |
| 619 | $lastModifiedHeader . ') and items (' . count( $this->rss->items ) . ')!' ); |
| 620 | $this->storeInCache( $key ); |
| 621 | } else { |
| 622 | return Status::newFatal( 'rss-parse-error', $this->rss->error ); |
| 623 | } |
| 624 | } |
| 625 | return Status::newGood(); |
| 626 | } |
| 627 | |
| 628 | /** |
| 629 | * Determine if a given item should or should not be displayed |
| 630 | * |
| 631 | * @param array $item associative array that RSSData produced for an <item> |
| 632 | * @return bool |
| 633 | */ |
| 634 | protected function canDisplay( array $item ) { |
| 635 | $check = ''; |
| 636 | |
| 637 | /* We're only going to check the displayable fields */ |
| 638 | foreach ( $this->displayFields as $field ) { |
| 639 | if ( isset( $item[$field] ) ) { |
| 640 | $check .= $item[$field]; |
| 641 | } |
| 642 | } |
| 643 | |
| 644 | if ( $this->filter( $check, 'filterOut' ) ) { |
| 645 | return false; |
| 646 | } |
| 647 | if ( $this->filter( $check, 'filter' ) ) { |
| 648 | return true; |
| 649 | } |
| 650 | return false; |
| 651 | } |
| 652 | |
| 653 | /** |
| 654 | * Filters items in or out if the match a string we're looking for. |
| 655 | * |
| 656 | * @param string $text the text to examine |
| 657 | * @param string $filterType "filterOut" to check for matches in the filterOut member list. |
| 658 | * Otherwise, uses the filter member list. |
| 659 | * @return bool Decision to filter or not. |
| 660 | */ |
| 661 | protected function filter( $text, $filterType ) { |
| 662 | if ( $filterType === 'filterOut' ) { |
| 663 | $filter = $this->filterOut; |
| 664 | } else { |
| 665 | $filter = $this->filter; |
| 666 | } |
| 667 | |
| 668 | if ( count( $filter ) == 0 ) { |
| 669 | return $filterType !== 'filterOut'; |
| 670 | } |
| 671 | |
| 672 | /* Using : for delimiter here since it'll be quoted automatically. */ |
| 673 | $match = preg_match( ':(' . implode( '|', |
| 674 | array_map( 'preg_quote', $filter ) ) . '):i', $text ); |
| 675 | if ( $match ) { |
| 676 | return true; |
| 677 | } |
| 678 | return false; |
| 679 | } |
| 680 | |
| 681 | /** |
| 682 | * Highlight the words we're supposed to be looking for |
| 683 | * |
| 684 | * @param string $text the text to look in. |
| 685 | * @return string with matched text highlighted in a <span> element |
| 686 | */ |
| 687 | protected function highlightTerms( $text ) { |
| 688 | if ( count( $this->highlight ) === 0 ) { |
| 689 | return $text; |
| 690 | } |
| 691 | |
| 692 | $terms = array_flip( array_map( 'strtolower', $this->highlight ) ); |
| 693 | $highlight = ':' . implode( '|', |
| 694 | array_map( 'preg_quote', array_values( $this->highlight ) ) ) . ':i'; |
| 695 | return preg_replace_callback( $highlight, static function ( $match ) use ( $terms ) { |
| 696 | $styleStart = "<span style='font-weight: bold; " . |
| 697 | "background: none repeat scroll 0%% 0%% rgb(%s); color: %s;'>"; |
| 698 | $styleEnd = '</span>'; |
| 699 | |
| 700 | # bg colors cribbed from Google's highlighting of search terms |
| 701 | $bgcolor = [ '255, 255, 102', '160, 255, 255', '153, 255, 153', |
| 702 | '255, 153, 153', '255, 102, 255', '136, 0, 0', '0, 170, 0', '136, 104, 0', |
| 703 | '0, 70, 153', '153, 0, 153' ]; |
| 704 | # Spelling out the fg colors instead of using processing time to create this list |
| 705 | $color = [ 'black', 'black', 'black', 'black', 'black', |
| 706 | 'white', 'white', 'white', 'white', 'white' ]; |
| 707 | |
| 708 | $index = $terms[strtolower( $match[0] )] % count( $bgcolor ); |
| 709 | |
| 710 | return sprintf( $styleStart, $bgcolor[$index], $color[$index] ) . $match[0] . $styleEnd; |
| 711 | }, $text ); |
| 712 | } |
| 713 | } |