Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
100.00% |
19 / 19 |
|
100.00% |
3 / 3 |
CRAP | |
100.00% |
1 / 1 |
| TextExtractor | |
100.00% |
19 / 19 |
|
100.00% |
3 / 3 |
8 | |
100.00% |
1 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| revisionToString | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
4 | |||
| contentToString | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Extension\AbuseFilter; |
| 4 | |
| 5 | use MediaWiki\Content\Content; |
| 6 | use MediaWiki\Content\TextContent; |
| 7 | use MediaWiki\Extension\AbuseFilter\Hooks\AbuseFilterHookRunner; |
| 8 | use MediaWiki\Permissions\Authority; |
| 9 | use MediaWiki\Revision\RevisionRecord; |
| 10 | |
| 11 | /** |
| 12 | * This service provides an interface to convert RevisionRecord and Content objects to some text |
| 13 | * suitable for running abuse filters. |
| 14 | * |
| 15 | * @internal No external code should rely on this representation |
| 16 | */ |
| 17 | class TextExtractor { |
| 18 | public const SERVICE_NAME = 'AbuseFilterTextExtractor'; |
| 19 | |
| 20 | public function __construct( private readonly AbuseFilterHookRunner $hookRunner ) { |
| 21 | } |
| 22 | |
| 23 | /** |
| 24 | * Look up some text of a revision from its revision id |
| 25 | * |
| 26 | * Note that this is really *some* text, we do not make *any* guarantee |
| 27 | * that this text will be even close to what the user actually sees, or |
| 28 | * that the form is fit for any intended purpose. |
| 29 | * |
| 30 | * Note also that if the revision for any reason is not an Revision |
| 31 | * the function returns with an empty string. |
| 32 | * |
| 33 | * For now, this returns all the revision's slots, concatenated together. |
| 34 | * In future, this will be replaced by a better solution. See T208769 for |
| 35 | * discussion. |
| 36 | * |
| 37 | * @param RevisionRecord|null $revision a valid revision |
| 38 | * @param Authority $performer to check for privileged access |
| 39 | * @return string the content of the revision as some kind of string, |
| 40 | * or an empty string if it can not be found |
| 41 | * @return-taint none |
| 42 | */ |
| 43 | public function revisionToString( ?RevisionRecord $revision, Authority $performer ): string { |
| 44 | if ( !$revision ) { |
| 45 | return ''; |
| 46 | } |
| 47 | |
| 48 | $strings = []; |
| 49 | |
| 50 | foreach ( $revision->getSlotRoles() as $role ) { |
| 51 | $content = $revision->getContent( $role, RevisionRecord::FOR_THIS_USER, $performer ); |
| 52 | if ( $content === null ) { |
| 53 | continue; |
| 54 | } |
| 55 | $strings[$role] = $this->contentToString( $content ); |
| 56 | } |
| 57 | |
| 58 | return implode( "\n\n", $strings ); |
| 59 | } |
| 60 | |
| 61 | /** |
| 62 | * Converts the given Content object to a string. |
| 63 | * |
| 64 | * This uses TextContent::getText() if $content is an instance of TextContent, |
| 65 | * or Content::getTextForSearchIndex() otherwise. |
| 66 | * |
| 67 | * The hook AbuseFilterContentToString can be used to override this |
| 68 | * behavior. |
| 69 | * |
| 70 | * @param Content $content |
| 71 | * |
| 72 | * @return string a suitable string representation of the content. |
| 73 | */ |
| 74 | public function contentToString( Content $content ): string { |
| 75 | $text = null; |
| 76 | |
| 77 | if ( $this->hookRunner->onAbuseFilter_contentToString( |
| 78 | $content, |
| 79 | $text |
| 80 | ) ) { |
| 81 | $text = $content instanceof TextContent |
| 82 | ? $content->getText() |
| 83 | : $content->getTextForSearchIndex(); |
| 84 | } |
| 85 | |
| 86 | // T22310 |
| 87 | return TextContent::normalizeLineEndings( (string)$text ); |
| 88 | } |
| 89 | } |