MediaWiki  1.34.0
ExtractFormatterTest.php
Go to the documentation of this file.
1 <?php
2 
3 namespace TextExtracts\Test;
4 
5 use MediaWikiTestCase;
7 
14 class ExtractFormatterTest extends MediaWikiTestCase {
15 
19  public function testExtracts( $expected, $text, $plainText ) {
20  $fmt = new ExtractFormatter( $text, $plainText );
21  // .metadata class will be added via $wgExtractsRemoveClasses on WMF
22  $fmt->remove( [ 'div', '.metadata' ] );
23  $text = $fmt->getText();
24  $this->assertSame( $expected, $text );
25  }
26 
27  public function provideExtracts() {
28  // phpcs:ignore Generic.Files.LineLength
29  $dutch = '<b>Dutch</b> (<span class="unicode haudio" style="white-space:nowrap;"><span class="fn"><a href="/wiki/File:Nl-Nederlands.ogg" title="About this sound"><img alt="About this sound" src="https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/11px-Loudspeaker.svg.png" width="11" height="11" srcset="https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/17px-Loudspeaker.svg.png 1.5x, https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/22px-Loudspeaker.svg.png 2x" /></a>&#160;<a href="https://upload.wikimedia.org/wikipedia/commons/d/db/Nl-Nederlands.ogg" class="internal" title="Nl-Nederlands.ogg"><i>Nederlands</i></a></span>&#160;<small class="metadata audiolinkinfo" style="cursor:help;">(<a href="/w/index.php?title=Wikipedia:Media_help&amp;action=edit&amp;redlink=1" class="new" title="Wikipedia:Media help (page does not exist)"><span style="cursor:help;">help</span></a>ยท<a href="/wiki/File:Nl-Nederlands.ogg" title="File:Nl-Nederlands.ogg"><span style="cursor:help;">info</span></a>)</small></span>) is a <a href="/w/index.php?title=West_Germanic_languages&amp;action=edit&amp;redlink=1" class="new" title="West Germanic languages (page does not exist)">West Germanic language</a> and the native language of most of the population of the <a href="/w/index.php?title=Netherlands&amp;action=edit&amp;redlink=1" class="new" title="Netherlands (page does not exist)">Netherlands</a>';
30  $tocText = 'Lead<div id="toc" class="toc">TOC goes here</div>
31 <h1>Section</h1>
32 <p>Section text</p>';
33 
34  return [
35  [
36  'Dutch ( Nederlands ) is a West Germanic language and the native language of ' .
37  'most of the population of the Netherlands',
38  $dutch,
39  true,
40  ],
41 
42  'HTML cleanup in HTML mode' => [
43  "\u{00A0}A &amp; <b>B</b>",
44  "&#x0A;&nbsp;<a>A</a> &amp; <b>&#x42;</b>\r\n",
45  false
46  ],
47  'HTML cleanup in plain text mode' => [
48  'A & B',
49  "&#x0A;&nbsp;<a>A</a> &amp; <b>&#x42;</b>\r\n",
50  true
51  ],
52 
53  [
54  "<span><span lang=\"baz\">qux</span></span>",
55  '<span class="foo"><span lang="baz">qux</span></span>',
56  false,
57  ],
58  [
59  "<span><span lang=\"baz\">qux</span></span>",
60  '<span style="foo: bar;"><span lang="baz">qux</span></span>',
61  false,
62  ],
63  [
64  "<span><span lang=\"qux\">quux</span></span>",
65  '<span class="foo"><span style="bar: baz;" lang="qux">quux</span></span>',
66  false,
67  ],
68  [
69  // Verify that TOC is properly removed (HTML mode)
70  "Lead\n<h1>Section</h1>\n<p>Section text</p>",
71  $tocText,
72  false,
73  ],
74  [
75  // Verify that TOC is properly removed (plain text mode)
76  "Lead\n\n\x01\x021\2\1Section\nSection text",
77  $tocText,
78  true,
79  ],
80  ];
81  }
82 
83 }
true
return true
Definition: router.php:92
TextExtracts\Test\ExtractFormatterTest
@covers \TextExtracts\ExtractFormatter @group TextExtracts
Definition: ExtractFormatterTest.php:14
TextExtracts\Test
Definition: ApiQueryExtractsTest.php:3
TextExtracts\Test\ExtractFormatterTest\testExtracts
testExtracts( $expected, $text, $plainText)
@dataProvider provideExtracts
Definition: ExtractFormatterTest.php:19
TextExtracts\ExtractFormatter
Provides text-only or limited-HTML extracts of page HTML.
Definition: ExtractFormatter.php:13
TextExtracts\Test\ExtractFormatterTest\provideExtracts
provideExtracts()
Definition: ExtractFormatterTest.php:27