MediaWiki
REL1_34
TextTruncatorTest.php
Go to the documentation of this file.
1
<?php
2
3
namespace
TextExtracts\Test
;
4
5
use
MediaWiki\Tidy\TidyDriverBase
;
6
use
TextExtracts\TextTruncator
;
7
14
class
TextTruncatorTest
extends
\PHPUnit\Framework\TestCase {
15
use \PHPUnit4And6Compat;
16
23
public
function
testGetFirstSentences
( $text, $sentences, $expected ) {
24
$truncator =
new
TextTruncator
();
25
$this->assertSame( $expected, $truncator->getFirstSentences( $text, $sentences ) );
26
}
27
28
public
function
provideGetFirstSentences
() {
29
$longLine = str_repeat(
'word '
, 1000000 );
30
return
[
31
[
32
'Foo is a bar. Such a smart boy. But completely useless.'
,
33
2,
34
'Foo is a bar. Such a smart boy.'
,
35
],
36
[
37
'Foo is a bar. Such a smart boy. But completely useless.'
,
38
1,
39
'Foo is a bar.'
,
40
],
41
[
42
'Foo is a bar. Such a smart boy.'
,
43
2,
44
'Foo is a bar. Such a smart boy.'
,
45
],
46
[
47
'Foo is a bar.'
,
48
1,
49
'Foo is a bar.'
,
50
],
51
[
52
'Foo is a bar.'
,
53
2,
54
'Foo is a bar.'
,
55
],
56
[
57
''
,
58
1,
59
''
,
60
],
61
'0 sentences mean empty result'
=> [
62
'Foo is a bar. Such a smart boy.'
,
63
0,
64
''
,
65
],
66
"Don't explode on negative input"
=> [
67
'Foo is a bar. Such a smart boy.'
,
68
-1,
69
''
,
70
],
71
'More sentences requested than is available'
=> [
72
'Foo is a bar. Such a smart boy.'
,
73
3,
74
'Foo is a bar. Such a smart boy.'
,
75
],
76
// Exclamation points too!!!
77
[
78
'Foo is a bar! Such a smart boy! But completely useless!'
,
79
1,
80
'Foo is a bar!'
,
81
],
82
// A tricky one
83
[
84
"Acid phosphatase (EC 3.1.3.2) is a chemical you don't want to mess with. "
.
85
"Polyvinyl acetate, however, is another story."
,
86
1,
87
"Acid phosphatase (EC 3.1.3.2) is a chemical you don't want to mess with."
,
88
],
89
// No clear sentences
90
[
91
"foo\nbar\nbaz"
,
92
2,
93
'foo'
,
94
],
95
// Bug T118621
96
[
97
'Foo was born in 1977. He enjoys listening to Siouxsie and the Banshees.'
,
98
1,
99
'Foo was born in 1977.'
,
100
],
101
// Bug T115795 - Test no cropping after initials
102
[
103
'P.J. Harvey is a singer. She is awesome!'
,
104
1,
105
'P.J. Harvey is a singer.'
,
106
],
107
// Bug T115817 - Non-breaking space is not a delimiter
108
[
109
html_entity_decode(
'Pigeons (lat. Columbidae) are birds. '
.
110
'They primarily feed on seeds.'
),
111
1,
112
html_entity_decode(
'Pigeons (lat. Columbidae) are birds.'
),
113
],
114
// Bug T145231 - various problems with regexes
115
[
116
$longLine,
117
3,
118
trim( $longLine ),
119
],
120
[
121
str_repeat(
'Sentence. '
, 70000 ),
122
65536,
123
trim( str_repeat(
'Sentence. '
, 65536 ) ),
124
],
125
126
'Preserve whitespace before end character'
=> [
127
'Aa . Bb'
,
128
1,
129
'Aa .'
,
130
],
131
];
132
}
133
140
public
function
testGetFirstChars
( $text,
$chars
, $expected ) {
141
$truncator =
new
TextTruncator
();
142
$this->assertSame( $expected, $truncator->getFirstChars( $text,
$chars
) );
143
}
144
145
public
function
provideGetFirstChars
() {
146
$text =
'Lullzy lulz are lullzy!'
;
147
$html =
'foo<tag>bar</tag>'
;
148
$longText = str_repeat(
'тест '
, 50000 );
149
$longTextExpected = trim( str_repeat(
'тест '
, 13108 ) );
150
151
return
[
152
[ $text, -8,
''
],
153
[ $text, 0,
''
],
154
[ $text, 100, $text ],
155
[ $text, 1,
'Lullzy'
],
156
[ $text, 6,
'Lullzy'
],
157
// [ $text, 7, 'Lullzy' ],
158
[ $text, 8,
'Lullzy lulz'
],
159
// HTML processing
160
[ $html, 1,
'foo'
],
161
// let HTML sanitizer clean it up later
162
[ $html, 4,
'foo<tag>'
],
163
[ $html, 12,
'foo<tag>bar</tag>'
],
164
[ $html, 13,
'foo<tag>bar</tag>'
],
165
[ $html, 16,
'foo<tag>bar</tag>'
],
166
[ $html, 17,
'foo<tag>bar</tag>'
],
167
// T143178 - previously, characters were extracted using regexps which failed when
168
// requesting 64K chars or more.
169
[ $longText, 65536, $longTextExpected ],
170
];
171
}
172
173
public
function
testTidyIntegration
() {
174
$tidy = $this->createMock( TidyDriverBase::class );
175
$tidy->method(
'tidy'
)
176
->willReturnCallback(
function
( $text ) {
177
return
"<tidy>$text</tidy>"
;
178
} );
179
$truncator =
new
TextTruncator
( $tidy );
180
181
$text =
'Aa. Bb.'
;
182
$this->assertSame(
'<tidy>Aa.</tidy>'
, $truncator->getFirstSentences( $text, 1 ) );
183
$this->assertSame(
'<tidy>Aa</tidy>'
, $truncator->getFirstChars( $text, 1 ) );
184
}
185
186
}
MediaWiki\Tidy\TidyDriverBase
Base class for HTML cleanup utilities.
Definition
TidyDriverBase.php:8
TextExtracts\Test\TextTruncatorTest
@covers \TextExtracts\TextTruncator @group TextExtracts
Definition
TextTruncatorTest.php:14
TextExtracts\Test\TextTruncatorTest\testGetFirstSentences
testGetFirstSentences( $text, $sentences, $expected)
@dataProvider provideGetFirstSentences
Definition
TextTruncatorTest.php:23
TextExtracts\Test\TextTruncatorTest\testGetFirstChars
testGetFirstChars( $text, $chars, $expected)
@dataProvider provideGetFirstChars
Definition
TextTruncatorTest.php:140
TextExtracts\Test\TextTruncatorTest\provideGetFirstSentences
provideGetFirstSentences()
Definition
TextTruncatorTest.php:28
TextExtracts\Test\TextTruncatorTest\testTidyIntegration
testTidyIntegration()
Definition
TextTruncatorTest.php:173
TextExtracts\Test\TextTruncatorTest\provideGetFirstChars
provideGetFirstChars()
Definition
TextTruncatorTest.php:145
TextExtracts\TextTruncator
This class needs to understand HTML as well as plain text.
Definition
TextTruncator.php:14
$chars
if(PHP_SAPI !=='cli' &&PHP_SAPI !=='phpdbg' $chars)
Definition
make-tables.php:8
TextExtracts\Test
Definition
ApiQueryExtractsTest.php:3
extensions
TextExtracts
tests
phpunit
TextTruncatorTest.php
Generated on Mon Nov 25 2024 16:04:45 for MediaWiki by
1.10.0