Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
20 / 20 |
|
100.00% |
3 / 3 |
CRAP | |
100.00% |
1 / 1 |
TextExtractor | |
100.00% |
20 / 20 |
|
100.00% |
3 / 3 |
8 | |
100.00% |
1 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
revisionToString | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
4 | |||
contentToString | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\AbuseFilter; |
4 | |
5 | use Content; |
6 | use MediaWiki\Extension\AbuseFilter\Hooks\AbuseFilterHookRunner; |
7 | use MediaWiki\Permissions\Authority; |
8 | use MediaWiki\Revision\RevisionRecord; |
9 | use TextContent; |
10 | |
11 | /** |
12 | * This service provides an interface to convert RevisionRecord and Content objects to some text |
13 | * suitable for running abuse filters. |
14 | * |
15 | * @internal No external code should rely on this representation |
16 | */ |
17 | class TextExtractor { |
18 | public const SERVICE_NAME = 'AbuseFilterTextExtractor'; |
19 | |
20 | /** @var AbuseFilterHookRunner */ |
21 | private $hookRunner; |
22 | |
23 | /** |
24 | * @param AbuseFilterHookRunner $hookRunner |
25 | */ |
26 | public function __construct( AbuseFilterHookRunner $hookRunner ) { |
27 | $this->hookRunner = $hookRunner; |
28 | } |
29 | |
30 | /** |
31 | * Look up some text of a revision from its revision id |
32 | * |
33 | * Note that this is really *some* text, we do not make *any* guarantee |
34 | * that this text will be even close to what the user actually sees, or |
35 | * that the form is fit for any intended purpose. |
36 | * |
37 | * Note also that if the revision for any reason is not an Revision |
38 | * the function returns with an empty string. |
39 | * |
40 | * For now, this returns all the revision's slots, concatenated together. |
41 | * In future, this will be replaced by a better solution. See T208769 for |
42 | * discussion. |
43 | * |
44 | * @param RevisionRecord|null $revision a valid revision |
45 | * @param Authority $performer to check for privileged access |
46 | * @return string the content of the revision as some kind of string, |
47 | * or an empty string if it can not be found |
48 | * @return-taint none |
49 | */ |
50 | public function revisionToString( ?RevisionRecord $revision, Authority $performer ): string { |
51 | if ( !$revision ) { |
52 | return ''; |
53 | } |
54 | |
55 | $strings = []; |
56 | |
57 | foreach ( $revision->getSlotRoles() as $role ) { |
58 | $content = $revision->getContent( $role, RevisionRecord::FOR_THIS_USER, $performer ); |
59 | if ( $content === null ) { |
60 | continue; |
61 | } |
62 | $strings[$role] = $this->contentToString( $content ); |
63 | } |
64 | |
65 | return implode( "\n\n", $strings ); |
66 | } |
67 | |
68 | /** |
69 | * Converts the given Content object to a string. |
70 | * |
71 | * This uses TextContent::getText() if $content is an instance of TextContent, |
72 | * or Content::getTextForSearchIndex() otherwise. |
73 | * |
74 | * The hook AbuseFilterContentToString can be used to override this |
75 | * behavior. |
76 | * |
77 | * @param Content $content |
78 | * |
79 | * @return string a suitable string representation of the content. |
80 | */ |
81 | public function contentToString( Content $content ): string { |
82 | $text = null; |
83 | |
84 | if ( $this->hookRunner->onAbuseFilter_contentToString( |
85 | $content, |
86 | $text |
87 | ) ) { |
88 | $text = $content instanceof TextContent |
89 | ? $content->getText() |
90 | : $content->getTextForSearchIndex(); |
91 | } |
92 | |
93 | // T22310 |
94 | $text = TextContent::normalizeLineEndings( (string)$text ); |
95 | return $text; |
96 | } |
97 | } |