Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 287 |
|
0.00% |
0 / 16 |
CRAP | |
0.00% |
0 / 1 |
RSSParser | |
0.00% |
0 / 287 |
|
0.00% |
0 / 16 |
6320 | |
0.00% |
0 / 1 |
explodeOnSpaces | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
__construct | |
0.00% |
0 / 40 |
|
0.00% |
0 / 1 |
240 | |||
insertStripItem | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
fetch | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
loadFromCache | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
30 | |||
storeInCache | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
fetchRemote | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
56 | |||
sandboxParse | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
renderFeed | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
90 | |||
renderItem | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
56 | |||
sanitizeUrl | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
escapeTemplateParameter | |
0.00% |
0 / 45 |
|
0.00% |
0 / 1 |
30 | |||
responseToXML | |
0.00% |
0 / 27 |
|
0.00% |
0 / 1 |
56 | |||
canDisplay | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
30 | |||
filter | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
highlightTerms | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\RSS; |
4 | |
5 | use DOMDocument; |
6 | use MediaWiki\Content\TextContent; |
7 | use MediaWiki\MediaWikiServices; |
8 | use MediaWiki\Parser\Parser; |
9 | use MediaWiki\Parser\ParserFactory; |
10 | use MediaWiki\Parser\PPFrame; |
11 | use MediaWiki\Parser\Sanitizer; |
12 | use MediaWiki\Status\Status; |
13 | use MediaWiki\Title\Title; |
14 | use MWHttpRequest; |
15 | use Wikimedia\AtEase\AtEase; |
16 | use Wikimedia\ObjectCache\WANObjectCache; |
17 | |
18 | class RSSParser { |
19 | /** @var int */ |
20 | protected $maxheads = 32; |
21 | /** @var string */ |
22 | protected $date = "Y-m-d H:i:s"; |
23 | /** @var int */ |
24 | protected $ItemMaxLength = 200; |
25 | /** @var bool */ |
26 | protected $reversed = false; |
27 | /** @var string[] */ |
28 | protected $highlight = []; |
29 | /** @var string[] */ |
30 | protected $filter = []; |
31 | /** @var string[] */ |
32 | protected $filterOut = []; |
33 | /** @var string|null */ |
34 | protected $itemTemplate; |
35 | /** @var string|null */ |
36 | protected $url; |
37 | /** @var string */ |
38 | protected $etag; |
39 | /** @var int */ |
40 | protected $lastModified; |
41 | /** @var DOMDocument */ |
42 | protected $xml; |
43 | /** @var string[] */ |
44 | protected $displayFields = [ 'author', 'title', 'encodedContent', 'description' ]; |
45 | /** @var string[] */ |
46 | protected $stripItems; |
47 | /** @var string */ |
48 | protected $markerString; |
49 | |
50 | /** |
51 | * @var WANObjectCache |
52 | */ |
53 | private $cache; |
54 | |
55 | /** |
56 | * @var ParserFactory |
57 | */ |
58 | private $parserFactory; |
59 | |
60 | /** |
61 | * @var RSSData|null |
62 | */ |
63 | public $rss; |
64 | |
65 | /** |
66 | * @var MWHttpRequest |
67 | */ |
68 | public $client; |
69 | |
70 | /** |
71 | * Convenience function that takes a space-separated string and returns an array of words |
72 | * @param string $str list of words |
73 | * @return array words found |
74 | */ |
75 | private static function explodeOnSpaces( $str ) { |
76 | $found = preg_split( '# +#', $str ); |
77 | return is_array( $found ) ? $found : []; |
78 | } |
79 | |
80 | /** |
81 | * Take a bit of WikiText that looks like |
82 | * <rss max=5>http://example.com/</rss> |
83 | * and return an object that can produce rendered output. |
84 | * @param string $url |
85 | * @param array $args |
86 | */ |
87 | public function __construct( $url, $args ) { |
88 | global $wgRSSDateDefaultFormat, $wgRSSItemMaxLength; |
89 | |
90 | $this->url = $url; |
91 | |
92 | // Quote marks to prevent XSS (T307028) |
93 | $this->markerString = "'\"" . wfRandomString( 32 ); |
94 | $this->stripItems = []; |
95 | $this->cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
96 | $this->parserFactory = MediaWikiServices::getInstance()->getParserFactory(); |
97 | |
98 | # Get max number of headlines from argument-array |
99 | if ( isset( $args['max'] ) ) { |
100 | $this->maxheads = $args['max']; |
101 | } |
102 | |
103 | # Get reverse flag from argument array |
104 | if ( isset( $args['reverse'] ) ) { |
105 | $this->reversed = true; |
106 | } |
107 | |
108 | # Get date format from argument array |
109 | # or use a default value |
110 | # @todo FIXME: not used yet |
111 | if ( isset( $args['date'] ) ) { |
112 | $this->date = $args['date']; |
113 | } elseif ( isset( $wgRSSDateDefaultFormat ) ) { |
114 | $this->date = $wgRSSDateDefaultFormat; |
115 | } |
116 | |
117 | # Get highlight terms from argument array |
118 | if ( isset( $args['highlight'] ) ) { |
119 | # mapping to lowercase here so the regex can be case insensitive below. |
120 | $this->highlight = self::explodeOnSpaces( $args['highlight'] ); |
121 | } |
122 | |
123 | # Get filter terms from argument array |
124 | if ( isset( $args['filter'] ) ) { |
125 | $this->filter = self::explodeOnSpaces( $args['filter'] ); |
126 | } |
127 | |
128 | # Get a maximal length for item texts |
129 | if ( isset( $args['item-max-length'] ) ) { |
130 | $this->ItemMaxLength = $args['item-max-length']; |
131 | } elseif ( is_numeric( $wgRSSItemMaxLength ) ) { |
132 | $this->ItemMaxLength = $wgRSSItemMaxLength; |
133 | } |
134 | |
135 | if ( isset( $args['filterout'] ) ) { |
136 | $this->filterOut = self::explodeOnSpaces( $args['filterout'] ); |
137 | } |
138 | |
139 | // 'template' is the pagename of a user's itemTemplate including |
140 | // a further pagename for the feedTemplate |
141 | // In that way everything is handled via these two pages |
142 | // and no default pages or templates are used. |
143 | |
144 | // 'templatename' is an optional pagename of a user's feedTemplate |
145 | // In that way it substitutes $1 (default: RSSPost) in MediaWiki:Rss-item |
146 | |
147 | if ( isset( $args['template'] ) ) { |
148 | $itemTemplateTitleObject = Title::newFromText( $args['template'], NS_TEMPLATE ); |
149 | |
150 | if ( $itemTemplateTitleObject->exists() ) { |
151 | $itemTemplatePageObject = MediaWikiServices::getInstance() |
152 | ->getWikiPageFactory() |
153 | ->newFromTitle( $itemTemplateTitleObject ); |
154 | $itemTemplateContentObject = $itemTemplatePageObject->getContent(); |
155 | |
156 | if ( $itemTemplateContentObject instanceof TextContent ) { |
157 | $this->itemTemplate = $itemTemplateContentObject->getText(); |
158 | } |
159 | } |
160 | } else { |
161 | if ( isset( $args['templatename'] ) ) { |
162 | $feedTemplatePagename = $args['templatename']; |
163 | } else { |
164 | |
165 | // compatibility patch for rss extension |
166 | |
167 | $feedTemplatePagename = 'RSSPost'; |
168 | $feedTemplateTitleObject = Title::newFromText( $feedTemplatePagename, NS_TEMPLATE ); |
169 | |
170 | if ( !$feedTemplateTitleObject->exists() ) { |
171 | $feedTemplatePagename = Title::makeTitleSafe( NS_MEDIAWIKI, 'Rss-feed' ); |
172 | } |
173 | } |
174 | |
175 | // MediaWiki:Rss-item = {{ feedTemplatePagename | title = {{{title}}} | ... }} |
176 | |
177 | // if the attribute parameter templatename= is not present |
178 | // then it defaults to |
179 | // {{ Template:RSSPost | title = {{{title}}} | ... }} |
180 | // - if Template:RSSPost exists from pre-1.9 versions |
181 | // {{ MediaWiki:Rss-feed | title = {{{title}}} | ... }} |
182 | // - otherwise |
183 | |
184 | $this->itemTemplate = wfMessage( 'rss-item', $feedTemplatePagename )->plain(); |
185 | } |
186 | } |
187 | |
188 | private function insertStripItem( $item ) { |
189 | $this->stripItems[] = $item; |
190 | $itemIndex = count( $this->stripItems ) - 1; |
191 | return "{$this->markerString}-{$itemIndex}-{$this->markerString}"; |
192 | } |
193 | |
194 | /** |
195 | * Return RSS object for the given URL, maintaining caching. |
196 | * |
197 | * NOTES ON RETRIEVING REMOTE FILES: |
198 | * No attempt will be made to fetch remote files if there is something in cache. |
199 | * |
200 | * NOTES ON FAILED REQUESTS: |
201 | * If there is an HTTP error while fetching an RSS object, the cached version |
202 | * will be returned, if it exists. |
203 | * |
204 | * @return Status object |
205 | */ |
206 | public function fetch() { |
207 | if ( $this->url === null ) { |
208 | return Status::newFatal( 'rss-fetch-nourl' ); |
209 | } |
210 | |
211 | // Flow |
212 | // 1. check cache |
213 | // 2. if there is a hit, make sure its fresh |
214 | // 3. if cached obj fails freshness check, fetch remote |
215 | // 4. if remote fails, return stale object, or error |
216 | $key = $this->cache->makeKey( 'rss-fetch', $this->url ); |
217 | $cachedFeed = $this->loadFromCache( $key ); |
218 | if ( $cachedFeed !== false ) { |
219 | wfDebugLog( 'RSS', 'Outputting cached feed for ' . $this->url ); |
220 | return Status::newGood(); |
221 | } |
222 | wfDebugLog( 'RSS', 'Cache Failed, fetching ' . $this->url . ' from remote.' ); |
223 | |
224 | return $this->fetchRemote( $key ); |
225 | } |
226 | |
227 | /** |
228 | * Retrieve the URL from the cache |
229 | * @param string $key lookup key to associate with this item |
230 | * @return bool |
231 | */ |
232 | protected function loadFromCache( $key ) { |
233 | global $wgRSSCacheCompare; |
234 | |
235 | $data = $this->cache->get( $key ); |
236 | if ( !is_array( $data ) ) { |
237 | return false; |
238 | } |
239 | |
240 | [ $etag, $lastModified, $rss ] = $data; |
241 | if ( !isset( $rss->items ) ) { |
242 | return false; |
243 | } |
244 | |
245 | wfDebugLog( 'RSS', "Got '$key' from cache" ); |
246 | |
247 | # Now that we've verified that we got useful data, keep it around. |
248 | $this->rss = $rss; |
249 | $this->etag = $etag; |
250 | $this->lastModified = $lastModified; |
251 | |
252 | // We only care if $wgRSSCacheCompare is > 0 |
253 | if ( $wgRSSCacheCompare && time() - $wgRSSCacheCompare > $lastModified ) { |
254 | wfDebugLog( 'RSS', 'Content is old enough that we need to check cached content' ); |
255 | return false; |
256 | } |
257 | |
258 | return true; |
259 | } |
260 | |
261 | /** |
262 | * Store these objects (i.e. etag, lastModified, and RSS) in the cache. |
263 | * @param string $key lookup key to associate with this item |
264 | * @return bool |
265 | */ |
266 | protected function storeInCache( $key ) { |
267 | global $wgRSSCacheAge; |
268 | |
269 | if ( $this->rss === null ) { |
270 | return false; |
271 | } |
272 | |
273 | $this->cache->set( |
274 | $key, |
275 | [ $this->etag, $this->lastModified, $this->rss ], |
276 | $wgRSSCacheAge |
277 | ); |
278 | |
279 | wfDebugLog( 'RSS', "Stored '$key' as in cache" ); |
280 | |
281 | return true; |
282 | } |
283 | |
284 | /** |
285 | * Retrieve a feed. |
286 | * @param string $key Cache key |
287 | * @param array $headers headers to send along with the request |
288 | * @return Status object |
289 | */ |
290 | protected function fetchRemote( $key, array $headers = [] ) { |
291 | global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy, |
292 | $wgRSSUrlNumberOfAllowedRedirects; |
293 | |
294 | if ( $this->etag ) { |
295 | wfDebugLog( 'RSS', 'Used etag: ' . $this->etag ); |
296 | $headers['If-None-Match'] = $this->etag; |
297 | } |
298 | if ( $this->lastModified ) { |
299 | $lastModified = gmdate( 'r', $this->lastModified ); |
300 | wfDebugLog( 'RSS', "Used last modified: $lastModified" ); |
301 | $headers['If-Modified-Since'] = $lastModified; |
302 | } |
303 | |
304 | /** |
305 | * 'noProxy' can conditionally be set as shown in the commented |
306 | * example below; in HttpRequest 'noProxy' takes precedence over |
307 | * any value of 'proxy' and disables the use of a proxy. |
308 | * |
309 | * This is useful if you run the wiki in an intranet and need to |
310 | * access external feed urls through a proxy but internal feed |
311 | * urls must be accessed without a proxy. |
312 | * |
313 | * The general handling of such cases will be subject of a |
314 | * forthcoming version. |
315 | */ |
316 | |
317 | $url = $this->url; |
318 | $noProxy = !isset( $wgRSSProxy ); |
319 | |
320 | // Example for disabling proxy use for certain urls |
321 | // $noProxy = preg_match( '!\.internal\.example\.com$!i', parse_url( $url, PHP_URL_HOST ) ); |
322 | |
323 | if ( isset( $wgRSSUrlNumberOfAllowedRedirects ) |
324 | && is_numeric( $wgRSSUrlNumberOfAllowedRedirects ) ) { |
325 | $maxRedirects = $wgRSSUrlNumberOfAllowedRedirects; |
326 | } else { |
327 | $maxRedirects = 0; |
328 | } |
329 | |
330 | // we set followRedirects intentionally to true to see error messages |
331 | // in cases where the maximum number of redirects is reached |
332 | $client = MediaWikiServices::getInstance()->getHttpRequestFactory()->create( $url, |
333 | [ |
334 | 'timeout' => $wgRSSFetchTimeout, |
335 | 'followRedirects' => true, |
336 | 'maxRedirects' => $maxRedirects, |
337 | 'proxy' => $wgRSSProxy, |
338 | 'noProxy' => $noProxy, |
339 | 'userAgent' => $wgRSSUserAgent, |
340 | ], |
341 | __METHOD__ |
342 | ); |
343 | |
344 | foreach ( $headers as $header => $value ) { |
345 | $client->setHeader( $header, $value ); |
346 | } |
347 | |
348 | $fetch = $client->execute(); |
349 | $this->client = $client; |
350 | |
351 | if ( !$fetch->isGood() ) { |
352 | wfDebug( 'RSS', 'Request Failed: ' . Status::wrap( $fetch )->getWikitext() ); |
353 | return $fetch; |
354 | } |
355 | |
356 | return $this->responseToXML( $key ); |
357 | } |
358 | |
359 | /** |
360 | * @see https://bugzilla.wikimedia.org/show_bug.cgi?id=34763 |
361 | * @param string $wikiText |
362 | * @param Parser $origParser |
363 | * @return string |
364 | */ |
365 | protected function sandboxParse( $wikiText, $origParser ) { |
366 | $myParser = $this->parserFactory->getInstance(); |
367 | $result = $myParser->parse( |
368 | $wikiText, |
369 | $origParser->getTitle(), |
370 | $origParser->getOptions() |
371 | ); |
372 | |
373 | $stripItems = $this->stripItems; |
374 | return preg_replace_callback( |
375 | "/{$this->markerString}-(\d+)-{$this->markerString}/", |
376 | static function ( array $matches ) use ( $stripItems ) { |
377 | $markerIndex = (int)$matches[1]; |
378 | return $stripItems[$markerIndex]; |
379 | }, |
380 | $result->getRawText() |
381 | ); |
382 | } |
383 | |
384 | /** |
385 | * Render the entire feed so that each item is passed to the |
386 | * template which the MediaWiki then displays. |
387 | * |
388 | * @param Parser $parser |
389 | * @param PPFrame $frame The frame param to pass to recursiveTagParse() |
390 | * @return string |
391 | */ |
392 | public function renderFeed( $parser, $frame ) { |
393 | $renderedFeed = ''; |
394 | $wikitextFeed = ''; |
395 | |
396 | if ( $this->itemTemplate !== null && $parser && $frame ) { |
397 | $headcnt = 0; |
398 | if ( $this->reversed ) { |
399 | $this->rss->items = array_reverse( $this->rss->items ); |
400 | } |
401 | |
402 | foreach ( $this->rss->items as $item ) { |
403 | if ( $this->maxheads > 0 && $headcnt >= $this->maxheads ) { |
404 | continue; |
405 | } |
406 | |
407 | if ( $this->canDisplay( $item ) ) { |
408 | $wikitextFeed .= $this->renderItem( $item, $parser ) . "\n"; |
409 | $headcnt++; |
410 | } |
411 | } |
412 | |
413 | $renderedFeed = $this->sandboxParse( $wikitextFeed, $parser ); |
414 | |
415 | } |
416 | |
417 | $parser->addTrackingCategory( 'rss-tracking-category' ); |
418 | |
419 | return $renderedFeed; |
420 | } |
421 | |
422 | /** |
423 | * Render each item, filtering it out if necessary, applying any highlighting. |
424 | * |
425 | * @param array $item an array produced by RSSData where keys are the names of the RSS elements |
426 | * @param Parser $parser |
427 | * @return mixed |
428 | */ |
429 | protected function renderItem( $item, $parser ) { |
430 | $renderedItem = $this->itemTemplate ?? ''; |
431 | |
432 | // $info will only be an XML element name, so we're safe using it. |
433 | // $item[$info] is handled by the XML parser -- |
434 | // and that means bad RSS with stuff like |
435 | // <description><script>alert("hi")</script></description> will find its |
436 | // rogue <script> tags neutered. |
437 | // use the overloaded multi byte wrapper functions in GlobalFunctions.php |
438 | |
439 | foreach ( array_keys( $item ) as $info ) { |
440 | if ( $item[$info] != "" ) { |
441 | switch ( $info ) { |
442 | // ATOM <id> elements and RSS <link> elements are item link urls |
443 | case 'id': |
444 | $txt = $this->sanitizeUrl( $item['id'] ); |
445 | $renderedItem = str_replace( '{{{link}}}', $txt, $renderedItem ); |
446 | break; |
447 | case 'link': |
448 | $txt = $this->sanitizeUrl( $item['link'] ); |
449 | $renderedItem = str_replace( '{{{link}}}', $txt, $renderedItem ); |
450 | break; |
451 | case 'date': |
452 | $tempTimezone = date_default_timezone_get(); |
453 | date_default_timezone_set( 'UTC' ); |
454 | $txt = date( $this->date, |
455 | strtotime( $this->escapeTemplateParameter( $item['date'] ) ) ); |
456 | date_default_timezone_set( $tempTimezone ); |
457 | $renderedItem = str_replace( '{{{date}}}', $txt, $renderedItem ); |
458 | break; |
459 | default: |
460 | $str = $this->escapeTemplateParameter( $item[$info] ); |
461 | $str = $parser->getTargetLanguage()->truncateHTML( $str, $this->ItemMaxLength ); |
462 | $str = $this->highlightTerms( $str ); |
463 | $renderedItem = str_replace( '{{{' . $info . '}}}', |
464 | $this->insertStripItem( $str ), $renderedItem ); |
465 | } |
466 | } |
467 | } |
468 | |
469 | // nullify all remaining info items in the template |
470 | // without a corresponding info in the current feed item |
471 | |
472 | return preg_replace( "!{{{[^}]+}}}!U", "", $renderedItem ); |
473 | } |
474 | |
475 | /** |
476 | * Sanitize a URL for inclusion in wikitext. Escapes characters that have |
477 | * a special meaning in wikitext, replacing them with URL escape codes, so |
478 | * that arbitrary input can be included as a free or bracketed external |
479 | * link and both work and be safe. |
480 | * @param string $url |
481 | * @return string |
482 | */ |
483 | protected function sanitizeUrl( $url ) { |
484 | # Remove control characters |
485 | $url = preg_replace( '/[\000-\037\177]/', '', trim( $url ) ); |
486 | # Escape other problematic characters |
487 | $out = ''; |
488 | for ( $i = 0, $len = strlen( $url ); $i < $len; $i++ ) { |
489 | $boringLength = strcspn( $url, '<>"[|]\ {', $i ); |
490 | if ( $boringLength ) { |
491 | $out .= substr( $url, $i, $boringLength ); |
492 | $i += $boringLength; |
493 | } |
494 | if ( $i < $len ) { |
495 | $out .= rawurlencode( $url[$i] ); |
496 | } |
497 | } |
498 | return $out; |
499 | } |
500 | |
501 | /** |
502 | * Sanitize user input for inclusion as a template parameter. |
503 | * |
504 | * Unlike in wfEscapeWikiText() as of r77127, this escapes }} in addition |
505 | * to the other kinds of markup, to avoid user input ending a template |
506 | * invocation. |
507 | * |
508 | * If you want to allow clickable link Urls (HTML <a> tag) in RSS feeds: |
509 | * $wgRSSAllowLinkTag = true; |
510 | * |
511 | * If you want to allow images (HTML <img> tag) in RSS feeds: |
512 | * $wgRSSAllowImageTag = true; |
513 | * @param string $text |
514 | * @return string |
515 | */ |
516 | protected function escapeTemplateParameter( $text ) { |
517 | global $wgRSSAllowLinkTag, $wgRSSAllowImageTag; |
518 | |
519 | $extraInclude = []; |
520 | $extraExclude = [ "iframe" ]; |
521 | |
522 | if ( $wgRSSAllowLinkTag ) { |
523 | $extraInclude[] = "a"; |
524 | } else { |
525 | $extraExclude[] = "a"; |
526 | } |
527 | |
528 | if ( $wgRSSAllowImageTag ) { |
529 | $extraInclude[] = "img"; |
530 | } else { |
531 | $extraExclude[] = "img"; |
532 | } |
533 | |
534 | if ( $wgRSSAllowLinkTag || $wgRSSAllowImageTag ) { |
535 | $ret = Sanitizer::removeSomeTags( $text, [ |
536 | 'extraTags' => $extraInclude, |
537 | 'removeTags' => $extraExclude, |
538 | ] ); |
539 | |
540 | } else { |
541 | // use the old escape method for a while |
542 | |
543 | $text = str_replace( |
544 | [ |
545 | '[', '|', ']', '\'', 'ISBN ', |
546 | 'RFC ', '://', "\n=", '{{', '}}', |
547 | ], |
548 | [ |
549 | '[', '|', ']', ''', 'ISBN ', |
550 | 'RFC ', '://', "\n=", '{{', '}}', |
551 | ], |
552 | htmlspecialchars( str_replace( "\n", "", $text ) ) |
553 | ); |
554 | |
555 | // keep some basic layout tags |
556 | $ret = str_replace( |
557 | [ |
558 | '<p>', '</p>', |
559 | '<br/>', '<br>', '</br>', |
560 | '<b>', '</b>', |
561 | '<i>', '</i>', |
562 | '<u>', '</u>', |
563 | '<s>', '</s>', |
564 | ], |
565 | [ |
566 | "", "<br/>", |
567 | "<br/>", "<br/>", "<br/>", |
568 | "'''", "'''", |
569 | "''", "''", |
570 | "<u>", "</u>", |
571 | "<s>", "</s>", |
572 | ], |
573 | $text |
574 | ); |
575 | } |
576 | |
577 | return $ret; |
578 | } |
579 | |
580 | /** |
581 | * Parse an HTTP response object into an array of relevant RSS data |
582 | * |
583 | * @param string $key the key to use to store the parsed response in the cache |
584 | * @return Status parsed RSS object (see RSSParse) or false |
585 | */ |
586 | protected function responseToXML( $key ) { |
587 | wfDebugLog( 'RSS', "Got '" . $this->client->getStatus() . "', updating cache for $key" ); |
588 | if ( $this->client->getStatus() === 304 ) { |
589 | # Not modified, update cache |
590 | wfDebugLog( 'RSS', "Got 304, updating cache for $key" ); |
591 | $this->storeInCache( $key ); |
592 | } else { |
593 | $this->xml = new DOMDocument; |
594 | $rawXML = $this->client->getContent(); |
595 | |
596 | if ( $rawXML == '' ) { |
597 | return Status::newFatal( 'rss-parse-error', 'No XML content' ); |
598 | } |
599 | |
600 | AtEase::suppressWarnings(); |
601 | |
602 | $oldDisable = false; |
603 | if ( LIBXML_VERSION < 20900 ) { |
604 | // Prevent loading external entities when parsing the XML (bug 46932) |
605 | $oldDisable = libxml_disable_entity_loader( true ); |
606 | } |
607 | $this->xml->loadXML( $rawXML ); |
608 | if ( LIBXML_VERSION < 20900 ) { |
609 | libxml_disable_entity_loader( $oldDisable ); |
610 | } |
611 | |
612 | AtEase::restoreWarnings(); |
613 | |
614 | $this->rss = new RSSData( $this->xml ); |
615 | |
616 | // if RSS parsed successfully |
617 | if ( $this->rss && !$this->rss->error ) { |
618 | $this->etag = $this->client->getResponseHeader( 'Etag' ); |
619 | $this->lastModified = |
620 | strtotime( $this->client->getResponseHeader( 'Last-Modified' ) ); |
621 | |
622 | wfDebugLog( 'RSS', 'Stored etag (' . $this->etag . ') and Last-Modified (' . |
623 | $this->client->getResponseHeader( 'Last-Modified' ) . ') and items (' . |
624 | count( $this->rss->items ) . ')!' ); |
625 | $this->storeInCache( $key ); |
626 | } else { |
627 | return Status::newFatal( 'rss-parse-error', $this->rss->error ); |
628 | } |
629 | } |
630 | return Status::newGood(); |
631 | } |
632 | |
633 | /** |
634 | * Determine if a given item should or should not be displayed |
635 | * |
636 | * @param array $item associative array that RSSData produced for an <item> |
637 | * @return bool |
638 | */ |
639 | protected function canDisplay( array $item ) { |
640 | $check = ''; |
641 | |
642 | /* We're only going to check the displayable fields */ |
643 | foreach ( $this->displayFields as $field ) { |
644 | if ( isset( $item[$field] ) ) { |
645 | $check .= $item[$field]; |
646 | } |
647 | } |
648 | |
649 | if ( $this->filter( $check, 'filterOut' ) ) { |
650 | return false; |
651 | } |
652 | if ( $this->filter( $check, 'filter' ) ) { |
653 | return true; |
654 | } |
655 | return false; |
656 | } |
657 | |
658 | /** |
659 | * Filters items in or out if the match a string we're looking for. |
660 | * |
661 | * @param string $text the text to examine |
662 | * @param string $filterType "filterOut" to check for matches in the filterOut member list. |
663 | * Otherwise, uses the filter member list. |
664 | * @return bool Decision to filter or not. |
665 | */ |
666 | protected function filter( $text, $filterType ) { |
667 | if ( $filterType === 'filterOut' ) { |
668 | $filter = $this->filterOut; |
669 | } else { |
670 | $filter = $this->filter; |
671 | } |
672 | |
673 | if ( count( $filter ) == 0 ) { |
674 | return $filterType !== 'filterOut'; |
675 | } |
676 | |
677 | /* Using : for delimiter here since it'll be quoted automatically. */ |
678 | $match = preg_match( ':(' . implode( '|', |
679 | array_map( 'preg_quote', $filter ) ) . '):i', $text ); |
680 | if ( $match ) { |
681 | return true; |
682 | } |
683 | return false; |
684 | } |
685 | |
686 | /** |
687 | * Highlight the words we're supposed to be looking for |
688 | * |
689 | * @param string $text the text to look in. |
690 | * @return string with matched text highlighted in a <span> element |
691 | */ |
692 | protected function highlightTerms( $text ) { |
693 | if ( count( $this->highlight ) === 0 ) { |
694 | return $text; |
695 | } |
696 | |
697 | $terms = array_flip( array_map( 'strtolower', $this->highlight ) ); |
698 | $highlight = ':' . implode( '|', |
699 | array_map( 'preg_quote', array_values( $this->highlight ) ) ) . ':i'; |
700 | return preg_replace_callback( $highlight, static function ( $match ) use ( $terms ) { |
701 | $styleStart = "<span style='font-weight: bold; " . |
702 | "background: none repeat scroll 0%% 0%% rgb(%s); color: %s;'>"; |
703 | $styleEnd = '</span>'; |
704 | |
705 | # bg colors cribbed from Google's highlighting of search terms |
706 | $bgcolor = [ '255, 255, 102', '160, 255, 255', '153, 255, 153', |
707 | '255, 153, 153', '255, 102, 255', '136, 0, 0', '0, 170, 0', '136, 104, 0', |
708 | '0, 70, 153', '153, 0, 153' ]; |
709 | # Spelling out the fg colors instead of using processing time to create this list |
710 | $color = [ 'black', 'black', 'black', 'black', 'black', |
711 | 'white', 'white', 'white', 'white', 'white' ]; |
712 | |
713 | $index = $terms[strtolower( $match[0] )] % count( $bgcolor ); |
714 | |
715 | return sprintf( $styleStart, $bgcolor[$index], $color[$index] ) . $match[0] . $styleEnd; |
716 | }, $text ); |
717 | } |
718 | } |