Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 286 |
|
0.00% |
0 / 16 |
CRAP | |
0.00% |
0 / 1 |
RSSParser | |
0.00% |
0 / 286 |
|
0.00% |
0 / 16 |
6320 | |
0.00% |
0 / 1 |
explodeOnSpaces | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
__construct | |
0.00% |
0 / 40 |
|
0.00% |
0 / 1 |
240 | |||
insertStripItem | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
fetch | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
loadFromCache | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
30 | |||
storeInCache | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
fetchRemote | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
56 | |||
sandboxParse | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
renderFeed | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
90 | |||
renderItem | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
56 | |||
sanitizeUrl | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
escapeTemplateParameter | |
0.00% |
0 / 45 |
|
0.00% |
0 / 1 |
30 | |||
responseToXML | |
0.00% |
0 / 27 |
|
0.00% |
0 / 1 |
56 | |||
canDisplay | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
30 | |||
filter | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
highlightTerms | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\RSS; |
4 | |
5 | use DOMDocument; |
6 | use MediaWiki\MediaWikiServices; |
7 | use MediaWiki\Parser\Sanitizer; |
8 | use MediaWiki\Status\Status; |
9 | use MediaWiki\Title\Title; |
10 | use MWHttpRequest; |
11 | use Parser; |
12 | use ParserFactory; |
13 | use PPFrame; |
14 | use TextContent; |
15 | use WANObjectCache; |
16 | use Wikimedia\AtEase\AtEase; |
17 | |
18 | class RSSParser { |
19 | protected $maxheads = 32; |
20 | protected $date = "Y-m-d H:i:s"; |
21 | protected $ItemMaxLength = 200; |
22 | protected $reversed = false; |
23 | protected $highlight = []; |
24 | protected $filter = []; |
25 | protected $filterOut = []; |
26 | protected $itemTemplate; |
27 | protected $url; |
28 | protected $etag; |
29 | protected $lastModified; |
30 | protected $xml; |
31 | protected $error; |
32 | protected $displayFields = [ 'author', 'title', 'encodedContent', 'description' ]; |
33 | protected $stripItems; |
34 | protected $markerString; |
35 | |
36 | /** |
37 | * @var WANObjectCache |
38 | */ |
39 | private $cache; |
40 | |
41 | /** |
42 | * @var ParserFactory |
43 | */ |
44 | private $parserFactory; |
45 | |
46 | /** |
47 | * @var RSSData |
48 | */ |
49 | public $rss; |
50 | |
51 | /** |
52 | * @var MWHttpRequest |
53 | */ |
54 | public $client; |
55 | |
56 | /** |
57 | * Convenience function that takes a space-separated string and returns an array of words |
58 | * @param string $str list of words |
59 | * @return array words found |
60 | */ |
61 | private static function explodeOnSpaces( $str ) { |
62 | $found = preg_split( '# +#', $str ); |
63 | return is_array( $found ) ? $found : []; |
64 | } |
65 | |
66 | /** |
67 | * Take a bit of WikiText that looks like |
68 | * <rss max=5>http://example.com/</rss> |
69 | * and return an object that can produce rendered output. |
70 | * @param string $url |
71 | * @param array $args |
72 | */ |
73 | public function __construct( $url, $args ) { |
74 | global $wgRSSDateDefaultFormat, $wgRSSItemMaxLength; |
75 | |
76 | $this->url = $url; |
77 | |
78 | // Quote marks to prevent XSS (T307028) |
79 | $this->markerString = "'\"" . wfRandomString( 32 ); |
80 | $this->stripItems = []; |
81 | $this->cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
82 | $this->parserFactory = MediaWikiServices::getInstance()->getParserFactory(); |
83 | |
84 | # Get max number of headlines from argument-array |
85 | if ( isset( $args['max'] ) ) { |
86 | $this->maxheads = $args['max']; |
87 | } |
88 | |
89 | # Get reverse flag from argument array |
90 | if ( isset( $args['reverse'] ) ) { |
91 | $this->reversed = true; |
92 | } |
93 | |
94 | # Get date format from argument array |
95 | # or use a default value |
96 | # @todo FIXME: not used yet |
97 | if ( isset( $args['date'] ) ) { |
98 | $this->date = $args['date']; |
99 | } elseif ( isset( $wgRSSDateDefaultFormat ) ) { |
100 | $this->date = $wgRSSDateDefaultFormat; |
101 | } |
102 | |
103 | # Get highlight terms from argument array |
104 | if ( isset( $args['highlight'] ) ) { |
105 | # mapping to lowercase here so the regex can be case insensitive below. |
106 | $this->highlight = self::explodeOnSpaces( $args['highlight'] ); |
107 | } |
108 | |
109 | # Get filter terms from argument array |
110 | if ( isset( $args['filter'] ) ) { |
111 | $this->filter = self::explodeOnSpaces( $args['filter'] ); |
112 | } |
113 | |
114 | # Get a maximal length for item texts |
115 | if ( isset( $args['item-max-length'] ) ) { |
116 | $this->ItemMaxLength = $args['item-max-length']; |
117 | } elseif ( is_numeric( $wgRSSItemMaxLength ) ) { |
118 | $this->ItemMaxLength = $wgRSSItemMaxLength; |
119 | } |
120 | |
121 | if ( isset( $args['filterout'] ) ) { |
122 | $this->filterOut = self::explodeOnSpaces( $args['filterout'] ); |
123 | } |
124 | |
125 | // 'template' is the pagename of a user's itemTemplate including |
126 | // a further pagename for the feedTemplate |
127 | // In that way everything is handled via these two pages |
128 | // and no default pages or templates are used. |
129 | |
130 | // 'templatename' is an optional pagename of a user's feedTemplate |
131 | // In that way it substitutes $1 (default: RSSPost) in MediaWiki:Rss-item |
132 | |
133 | if ( isset( $args['template'] ) ) { |
134 | $itemTemplateTitleObject = Title::newFromText( $args['template'], NS_TEMPLATE ); |
135 | |
136 | if ( $itemTemplateTitleObject->exists() ) { |
137 | $itemTemplatePageObject = MediaWikiServices::getInstance() |
138 | ->getWikiPageFactory() |
139 | ->newFromTitle( $itemTemplateTitleObject ); |
140 | $itemTemplateContentObject = $itemTemplatePageObject->getContent(); |
141 | |
142 | if ( $itemTemplateContentObject instanceof TextContent ) { |
143 | $this->itemTemplate = $itemTemplateContentObject->getText(); |
144 | } |
145 | } |
146 | } else { |
147 | if ( isset( $args['templatename'] ) ) { |
148 | $feedTemplatePagename = $args['templatename']; |
149 | } else { |
150 | |
151 | // compatibility patch for rss extension |
152 | |
153 | $feedTemplatePagename = 'RSSPost'; |
154 | $feedTemplateTitleObject = Title::newFromText( $feedTemplatePagename, NS_TEMPLATE ); |
155 | |
156 | if ( !$feedTemplateTitleObject->exists() ) { |
157 | $feedTemplatePagename = Title::makeTitleSafe( NS_MEDIAWIKI, 'Rss-feed' ); |
158 | } |
159 | } |
160 | |
161 | // MediaWiki:Rss-item = {{ feedTemplatePagename | title = {{{title}}} | ... }} |
162 | |
163 | // if the attribute parameter templatename= is not present |
164 | // then it defaults to |
165 | // {{ Template:RSSPost | title = {{{title}}} | ... }} |
166 | // - if Template:RSSPost exists from pre-1.9 versions |
167 | // {{ MediaWiki:Rss-feed | title = {{{title}}} | ... }} |
168 | // - otherwise |
169 | |
170 | $this->itemTemplate = wfMessage( 'rss-item', $feedTemplatePagename )->plain(); |
171 | } |
172 | } |
173 | |
174 | private function insertStripItem( $item ) { |
175 | $this->stripItems[] = $item; |
176 | $itemIndex = count( $this->stripItems ) - 1; |
177 | return "{$this->markerString}-{$itemIndex}-{$this->markerString}"; |
178 | } |
179 | |
180 | /** |
181 | * Return RSS object for the given URL, maintaining caching. |
182 | * |
183 | * NOTES ON RETRIEVING REMOTE FILES: |
184 | * No attempt will be made to fetch remote files if there is something in cache. |
185 | * |
186 | * NOTES ON FAILED REQUESTS: |
187 | * If there is an HTTP error while fetching an RSS object, the cached version |
188 | * will be returned, if it exists. |
189 | * |
190 | * @return Status object |
191 | */ |
192 | public function fetch() { |
193 | if ( !isset( $this->url ) ) { |
194 | return Status::newFatal( 'rss-fetch-nourl' ); |
195 | } |
196 | |
197 | // Flow |
198 | // 1. check cache |
199 | // 2. if there is a hit, make sure its fresh |
200 | // 3. if cached obj fails freshness check, fetch remote |
201 | // 4. if remote fails, return stale object, or error |
202 | $key = $this->cache->makeKey( 'rss-fetch', $this->url ); |
203 | $cachedFeed = $this->loadFromCache( $key ); |
204 | if ( $cachedFeed !== false ) { |
205 | wfDebugLog( 'RSS', 'Outputting cached feed for ' . $this->url ); |
206 | return Status::newGood(); |
207 | } |
208 | wfDebugLog( 'RSS', 'Cache Failed, fetching ' . $this->url . ' from remote.' ); |
209 | |
210 | return $this->fetchRemote( $key ); |
211 | } |
212 | |
213 | /** |
214 | * Retrieve the URL from the cache |
215 | * @param string $key lookup key to associate with this item |
216 | * @return bool |
217 | */ |
218 | protected function loadFromCache( $key ) { |
219 | global $wgRSSCacheCompare; |
220 | |
221 | $data = $this->cache->get( $key ); |
222 | if ( !is_array( $data ) ) { |
223 | return false; |
224 | } |
225 | |
226 | [ $etag, $lastModified, $rss ] = $data; |
227 | if ( !isset( $rss->items ) ) { |
228 | return false; |
229 | } |
230 | |
231 | wfDebugLog( 'RSS', "Got '$key' from cache" ); |
232 | |
233 | # Now that we've verified that we got useful data, keep it around. |
234 | $this->rss = $rss; |
235 | $this->etag = $etag; |
236 | $this->lastModified = $lastModified; |
237 | |
238 | // We only care if $wgRSSCacheCompare is > 0 |
239 | if ( $wgRSSCacheCompare && time() - $wgRSSCacheCompare > $lastModified ) { |
240 | wfDebugLog( 'RSS', 'Content is old enough that we need to check cached content' ); |
241 | return false; |
242 | } |
243 | |
244 | return true; |
245 | } |
246 | |
247 | /** |
248 | * Store these objects (i.e. etag, lastModified, and RSS) in the cache. |
249 | * @param string $key lookup key to associate with this item |
250 | * @return bool |
251 | */ |
252 | protected function storeInCache( $key ) { |
253 | global $wgRSSCacheAge; |
254 | |
255 | if ( !isset( $this->rss ) ) { |
256 | return false; |
257 | } |
258 | |
259 | $this->cache->set( |
260 | $key, |
261 | [ $this->etag, $this->lastModified, $this->rss ], |
262 | $wgRSSCacheAge |
263 | ); |
264 | |
265 | wfDebugLog( 'RSS', "Stored '$key' as in cache" ); |
266 | |
267 | return true; |
268 | } |
269 | |
270 | /** |
271 | * Retrieve a feed. |
272 | * @param string $key Cache key |
273 | * @param array $headers headers to send along with the request |
274 | * @return Status object |
275 | */ |
276 | protected function fetchRemote( $key, array $headers = [] ) { |
277 | global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy, |
278 | $wgRSSUrlNumberOfAllowedRedirects; |
279 | |
280 | if ( $this->etag ) { |
281 | wfDebugLog( 'RSS', 'Used etag: ' . $this->etag ); |
282 | $headers['If-None-Match'] = $this->etag; |
283 | } |
284 | if ( $this->lastModified ) { |
285 | $lastModified = gmdate( 'r', $this->lastModified ); |
286 | wfDebugLog( 'RSS', "Used last modified: $lastModified" ); |
287 | $headers['If-Modified-Since'] = $lastModified; |
288 | } |
289 | |
290 | /** |
291 | * 'noProxy' can conditionally be set as shown in the commented |
292 | * example below; in HttpRequest 'noProxy' takes precedence over |
293 | * any value of 'proxy' and disables the use of a proxy. |
294 | * |
295 | * This is useful if you run the wiki in an intranet and need to |
296 | * access external feed urls through a proxy but internal feed |
297 | * urls must be accessed without a proxy. |
298 | * |
299 | * The general handling of such cases will be subject of a |
300 | * forthcoming version. |
301 | */ |
302 | |
303 | $url = $this->url; |
304 | $noProxy = !isset( $wgRSSProxy ); |
305 | |
306 | // Example for disabling proxy use for certain urls |
307 | // $noProxy = preg_match( '!\.internal\.example\.com$!i', parse_url( $url, PHP_URL_HOST ) ); |
308 | |
309 | if ( isset( $wgRSSUrlNumberOfAllowedRedirects ) |
310 | && is_numeric( $wgRSSUrlNumberOfAllowedRedirects ) ) { |
311 | $maxRedirects = $wgRSSUrlNumberOfAllowedRedirects; |
312 | } else { |
313 | $maxRedirects = 0; |
314 | } |
315 | |
316 | // we set followRedirects intentionally to true to see error messages |
317 | // in cases where the maximum number of redirects is reached |
318 | $client = MediaWikiServices::getInstance()->getHttpRequestFactory()->create( $url, |
319 | [ |
320 | 'timeout' => $wgRSSFetchTimeout, |
321 | 'followRedirects' => true, |
322 | 'maxRedirects' => $maxRedirects, |
323 | 'proxy' => $wgRSSProxy, |
324 | 'noProxy' => $noProxy, |
325 | 'userAgent' => $wgRSSUserAgent, |
326 | ], |
327 | __METHOD__ |
328 | ); |
329 | |
330 | foreach ( $headers as $header => $value ) { |
331 | $client->setHeader( $header, $value ); |
332 | } |
333 | |
334 | $fetch = $client->execute(); |
335 | $this->client = $client; |
336 | |
337 | if ( !$fetch->isGood() ) { |
338 | wfDebug( 'RSS', 'Request Failed: ' . Status::wrap( $fetch )->getWikitext() ); |
339 | return $fetch; |
340 | } |
341 | |
342 | return $this->responseToXML( $key ); |
343 | } |
344 | |
345 | /** |
346 | * @see https://bugzilla.wikimedia.org/show_bug.cgi?id=34763 |
347 | * @param string $wikiText |
348 | * @param Parser $origParser |
349 | * @return string |
350 | */ |
351 | protected function sandboxParse( $wikiText, $origParser ) { |
352 | $myParser = $this->parserFactory->getInstance(); |
353 | $result = $myParser->parse( |
354 | $wikiText, |
355 | $origParser->getTitle(), |
356 | $origParser->getOptions() |
357 | ); |
358 | |
359 | $stripItems = $this->stripItems; |
360 | return preg_replace_callback( |
361 | "/{$this->markerString}-(\d+)-{$this->markerString}/", |
362 | static function ( array $matches ) use ( $stripItems ) { |
363 | $markerIndex = (int)$matches[1]; |
364 | return $stripItems[$markerIndex]; |
365 | }, |
366 | $result->getText() |
367 | ); |
368 | } |
369 | |
370 | /** |
371 | * Render the entire feed so that each item is passed to the |
372 | * template which the MediaWiki then displays. |
373 | * |
374 | * @param Parser $parser |
375 | * @param PPFrame $frame The frame param to pass to recursiveTagParse() |
376 | * @return string |
377 | */ |
378 | public function renderFeed( $parser, $frame ) { |
379 | $renderedFeed = ''; |
380 | |
381 | if ( isset( $this->itemTemplate ) && isset( $parser ) && isset( $frame ) ) { |
382 | $headcnt = 0; |
383 | if ( $this->reversed ) { |
384 | $this->rss->items = array_reverse( $this->rss->items ); |
385 | } |
386 | |
387 | foreach ( $this->rss->items as $item ) { |
388 | if ( $this->maxheads > 0 && $headcnt >= $this->maxheads ) { |
389 | continue; |
390 | } |
391 | |
392 | if ( $this->canDisplay( $item ) ) { |
393 | $renderedFeed .= $this->renderItem( $item, $parser ) . "\n"; |
394 | $headcnt++; |
395 | } |
396 | } |
397 | |
398 | $renderedFeed = $this->sandboxParse( $renderedFeed, $parser ); |
399 | |
400 | } |
401 | |
402 | $parser->addTrackingCategory( 'rss-tracking-category' ); |
403 | |
404 | return $renderedFeed; |
405 | } |
406 | |
407 | /** |
408 | * Render each item, filtering it out if necessary, applying any highlighting. |
409 | * |
410 | * @param array $item an array produced by RSSData where keys are the names of the RSS elements |
411 | * @param Parser $parser |
412 | * @return mixed |
413 | */ |
414 | protected function renderItem( $item, $parser ) { |
415 | $renderedItem = $this->itemTemplate; |
416 | |
417 | // $info will only be an XML element name, so we're safe using it. |
418 | // $item[$info] is handled by the XML parser -- |
419 | // and that means bad RSS with stuff like |
420 | // <description><script>alert("hi")</script></description> will find its |
421 | // rogue <script> tags neutered. |
422 | // use the overloaded multi byte wrapper functions in GlobalFunctions.php |
423 | |
424 | foreach ( array_keys( $item ) as $info ) { |
425 | if ( $item[$info] != "" ) { |
426 | switch ( $info ) { |
427 | // ATOM <id> elements and RSS <link> elements are item link urls |
428 | case 'id': |
429 | $txt = $this->sanitizeUrl( $item['id'] ); |
430 | $renderedItem = str_replace( '{{{link}}}', $txt, $renderedItem ); |
431 | break; |
432 | case 'link': |
433 | $txt = $this->sanitizeUrl( $item['link'] ); |
434 | $renderedItem = str_replace( '{{{link}}}', $txt, $renderedItem ); |
435 | break; |
436 | case 'date': |
437 | $tempTimezone = date_default_timezone_get(); |
438 | date_default_timezone_set( 'UTC' ); |
439 | $txt = date( $this->date, |
440 | strtotime( $this->escapeTemplateParameter( $item['date'] ) ) ); |
441 | date_default_timezone_set( $tempTimezone ); |
442 | $renderedItem = str_replace( '{{{date}}}', $txt, $renderedItem ); |
443 | break; |
444 | default: |
445 | $str = $this->escapeTemplateParameter( $item[$info] ); |
446 | $str = $parser->getTargetLanguage()->truncateForVisual( $str, $this->ItemMaxLength ); |
447 | $str = $this->highlightTerms( $str ); |
448 | $renderedItem = str_replace( '{{{' . $info . '}}}', |
449 | $this->insertStripItem( $str ), $renderedItem ); |
450 | } |
451 | } |
452 | } |
453 | |
454 | // nullify all remaining info items in the template |
455 | // without a corresponding info in the current feed item |
456 | |
457 | return preg_replace( "!{{{[^}]+}}}!U", "", $renderedItem ); |
458 | } |
459 | |
460 | /** |
461 | * Sanitize a URL for inclusion in wikitext. Escapes characters that have |
462 | * a special meaning in wikitext, replacing them with URL escape codes, so |
463 | * that arbitrary input can be included as a free or bracketed external |
464 | * link and both work and be safe. |
465 | * @param string $url |
466 | * @return string |
467 | */ |
468 | protected function sanitizeUrl( $url ) { |
469 | # Remove control characters |
470 | $url = preg_replace( '/[\000-\037\177]/', '', trim( $url ) ); |
471 | # Escape other problematic characters |
472 | $out = ''; |
473 | for ( $i = 0, $len = strlen( $url ); $i < $len; $i++ ) { |
474 | $boringLength = strcspn( $url, '<>"[|]\ {', $i ); |
475 | if ( $boringLength ) { |
476 | $out .= substr( $url, $i, $boringLength ); |
477 | $i += $boringLength; |
478 | } |
479 | if ( $i < $len ) { |
480 | $out .= rawurlencode( $url[$i] ); |
481 | } |
482 | } |
483 | return $out; |
484 | } |
485 | |
486 | /** |
487 | * Sanitize user input for inclusion as a template parameter. |
488 | * |
489 | * Unlike in wfEscapeWikiText() as of r77127, this escapes }} in addition |
490 | * to the other kinds of markup, to avoid user input ending a template |
491 | * invocation. |
492 | * |
493 | * If you want to allow clickable link Urls (HTML <a> tag) in RSS feeds: |
494 | * $wgRSSAllowLinkTag = true; |
495 | * |
496 | * If you want to allow images (HTML <img> tag) in RSS feeds: |
497 | * $wgRSSAllowImageTag = true; |
498 | * @param string $text |
499 | * @return string |
500 | */ |
501 | protected function escapeTemplateParameter( $text ) { |
502 | global $wgRSSAllowLinkTag, $wgRSSAllowImageTag; |
503 | |
504 | $extraInclude = []; |
505 | $extraExclude = [ "iframe" ]; |
506 | |
507 | if ( $wgRSSAllowLinkTag ) { |
508 | $extraInclude[] = "a"; |
509 | } else { |
510 | $extraExclude[] = "a"; |
511 | } |
512 | |
513 | if ( $wgRSSAllowImageTag ) { |
514 | $extraInclude[] = "img"; |
515 | } else { |
516 | $extraExclude[] = "img"; |
517 | } |
518 | |
519 | if ( $wgRSSAllowLinkTag || $wgRSSAllowImageTag ) { |
520 | $ret = Sanitizer::removeSomeTags( $text, [ |
521 | 'extraTags' => $extraInclude, |
522 | 'removeTags' => $extraExclude, |
523 | ] ); |
524 | |
525 | } else { |
526 | // use the old escape method for a while |
527 | |
528 | $text = str_replace( |
529 | [ |
530 | '[', '|', ']', '\'', 'ISBN ', |
531 | 'RFC ', '://', "\n=", '{{', '}}', |
532 | ], |
533 | [ |
534 | '[', '|', ']', ''', 'ISBN ', |
535 | 'RFC ', '://', "\n=", '{{', '}}', |
536 | ], |
537 | htmlspecialchars( str_replace( "\n", "", $text ) ) |
538 | ); |
539 | |
540 | // keep some basic layout tags |
541 | $ret = str_replace( |
542 | [ |
543 | '<p>', '</p>', |
544 | '<br/>', '<br>', '</br>', |
545 | '<b>', '</b>', |
546 | '<i>', '</i>', |
547 | '<u>', '</u>', |
548 | '<s>', '</s>', |
549 | ], |
550 | [ |
551 | "", "<br/>", |
552 | "<br/>", "<br/>", "<br/>", |
553 | "'''", "'''", |
554 | "''", "''", |
555 | "<u>", "</u>", |
556 | "<s>", "</s>", |
557 | ], |
558 | $text |
559 | ); |
560 | } |
561 | |
562 | return $ret; |
563 | } |
564 | |
565 | /** |
566 | * Parse an HTTP response object into an array of relevant RSS data |
567 | * |
568 | * @param string $key the key to use to store the parsed response in the cache |
569 | * @return Status parsed RSS object (see RSSParse) or false |
570 | */ |
571 | protected function responseToXML( $key ) { |
572 | wfDebugLog( 'RSS', "Got '" . $this->client->getStatus() . "', updating cache for $key" ); |
573 | if ( $this->client->getStatus() === 304 ) { |
574 | # Not modified, update cache |
575 | wfDebugLog( 'RSS', "Got 304, updating cache for $key" ); |
576 | $this->storeInCache( $key ); |
577 | } else { |
578 | $this->xml = new DOMDocument; |
579 | $rawXML = $this->client->getContent(); |
580 | |
581 | if ( $rawXML == '' ) { |
582 | return Status::newFatal( 'rss-parse-error', 'No XML content' ); |
583 | } |
584 | |
585 | AtEase::suppressWarnings(); |
586 | |
587 | $oldDisable = false; |
588 | if ( LIBXML_VERSION < 20900 ) { |
589 | // Prevent loading external entities when parsing the XML (bug 46932) |
590 | $oldDisable = libxml_disable_entity_loader( true ); |
591 | } |
592 | $this->xml->loadXML( $rawXML ); |
593 | if ( LIBXML_VERSION < 20900 ) { |
594 | libxml_disable_entity_loader( $oldDisable ); |
595 | } |
596 | |
597 | AtEase::restoreWarnings(); |
598 | |
599 | $this->rss = new RSSData( $this->xml ); |
600 | |
601 | // if RSS parsed successfully |
602 | if ( $this->rss && !$this->rss->error ) { |
603 | $this->etag = $this->client->getResponseHeader( 'Etag' ); |
604 | $this->lastModified = |
605 | strtotime( $this->client->getResponseHeader( 'Last-Modified' ) ); |
606 | |
607 | wfDebugLog( 'RSS', 'Stored etag (' . $this->etag . ') and Last-Modified (' . |
608 | $this->client->getResponseHeader( 'Last-Modified' ) . ') and items (' . |
609 | count( $this->rss->items ) . ')!' ); |
610 | $this->storeInCache( $key ); |
611 | } else { |
612 | return Status::newFatal( 'rss-parse-error', $this->rss->error ); |
613 | } |
614 | } |
615 | return Status::newGood(); |
616 | } |
617 | |
618 | /** |
619 | * Determine if a given item should or should not be displayed |
620 | * |
621 | * @param array $item associative array that RSSData produced for an <item> |
622 | * @return bool |
623 | */ |
624 | protected function canDisplay( array $item ) { |
625 | $check = ''; |
626 | |
627 | /* We're only going to check the displayable fields */ |
628 | foreach ( $this->displayFields as $field ) { |
629 | if ( isset( $item[$field] ) ) { |
630 | $check .= $item[$field]; |
631 | } |
632 | } |
633 | |
634 | if ( $this->filter( $check, 'filterOut' ) ) { |
635 | return false; |
636 | } |
637 | if ( $this->filter( $check, 'filter' ) ) { |
638 | return true; |
639 | } |
640 | return false; |
641 | } |
642 | |
643 | /** |
644 | * Filters items in or out if the match a string we're looking for. |
645 | * |
646 | * @param string $text the text to examine |
647 | * @param string $filterType "filterOut" to check for matches in the filterOut member list. |
648 | * Otherwise, uses the filter member list. |
649 | * @return bool Decision to filter or not. |
650 | */ |
651 | protected function filter( $text, $filterType ) { |
652 | if ( $filterType === 'filterOut' ) { |
653 | $filter = $this->filterOut; |
654 | } else { |
655 | $filter = $this->filter; |
656 | } |
657 | |
658 | if ( count( $filter ) == 0 ) { |
659 | return $filterType !== 'filterOut'; |
660 | } |
661 | |
662 | /* Using : for delimiter here since it'll be quoted automatically. */ |
663 | $match = preg_match( ':(' . implode( '|', |
664 | array_map( 'preg_quote', $filter ) ) . '):i', $text ); |
665 | if ( $match ) { |
666 | return true; |
667 | } |
668 | return false; |
669 | } |
670 | |
671 | /** |
672 | * Highlight the words we're supposed to be looking for |
673 | * |
674 | * @param string $text the text to look in. |
675 | * @return string with matched text highlighted in a <span> element |
676 | */ |
677 | protected function highlightTerms( $text ) { |
678 | if ( count( $this->highlight ) === 0 ) { |
679 | return $text; |
680 | } |
681 | |
682 | $terms = array_flip( array_map( 'strtolower', $this->highlight ) ); |
683 | $highlight = ':' . implode( '|', |
684 | array_map( 'preg_quote', array_values( $this->highlight ) ) ) . ':i'; |
685 | return preg_replace_callback( $highlight, static function ( $match ) use ( $terms ) { |
686 | $styleStart = "<span style='font-weight: bold; " . |
687 | "background: none repeat scroll 0%% 0%% rgb(%s); color: %s;'>"; |
688 | $styleEnd = '</span>'; |
689 | |
690 | # bg colors cribbed from Google's highlighting of search terms |
691 | $bgcolor = [ '255, 255, 102', '160, 255, 255', '153, 255, 153', |
692 | '255, 153, 153', '255, 102, 255', '136, 0, 0', '0, 170, 0', '136, 104, 0', |
693 | '0, 70, 153', '153, 0, 153' ]; |
694 | # Spelling out the fg colors instead of using processing time to create this list |
695 | $color = [ 'black', 'black', 'black', 'black', 'black', |
696 | 'white', 'white', 'white', 'white', 'white' ]; |
697 | |
698 | $index = $terms[strtolower( $match[0] )] % count( $bgcolor ); |
699 | |
700 | return sprintf( $styleStart, $bgcolor[$index], $color[$index] ) . $match[0] . $styleEnd; |
701 | }, $text ); |
702 | } |
703 | } |