Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
5.26% covered (danger)
5.26%
2 / 38
16.67% covered (danger)
16.67%
1 / 6
CRAP
0.00% covered (danger)
0.00%
0 / 1
StoryContentAnalyzer
5.26% covered (danger)
5.26%
2 / 38
16.67% covered (danger)
16.67%
1 / 6
206.31
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 hasOutdatedText
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
30
 isOutdatedText
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
2
 inText
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
12
 transformText
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
2
 getArticleText
0.00% covered (danger)
0.00%
0 / 12
0.00% covered (danger)
0.00%
0 / 1
12
1<?php
2
3namespace MediaWiki\Extension\Wikistories;
4
5use MediaWiki\Page\WikiPageFactory;
6use MediaWiki\Title\Title;
7
8class StoryContentAnalyzer {
9
10    /**
11     * This sentence separator works for many but not all languages.
12     * todo: re-visit when deploying to a new wiki
13     */
14    private const SENTENCE_SEPARATOR_REGEX = '/[.:;]/';
15
16    /**
17     * @var array Instance cache of article text indexed by title
18     */
19    private $cache = [];
20
21    public function __construct(
22        private readonly WikiPageFactory $wikiPageFactory,
23    ) {
24    }
25
26    public function hasOutdatedText( StoryContent $story ): bool {
27        $articleTitle = $story->getArticleTitle();
28        if ( $articleTitle === null ) {
29            return false;
30        }
31
32        $articleText = $this->getArticleText( $articleTitle );
33        if ( $articleText === null ) {
34            return false;
35        }
36
37        foreach ( $story->getFrames() as $frame ) {
38            if ( $this->isOutdatedText(
39                $articleText,
40                $frame->text->value,
41                $frame->text->fromArticle->originalText
42            ) ) {
43                return true;
44            }
45        }
46        return false;
47    }
48
49    public function isOutdatedText( string $articleText, string $currentText, string $originalText ): bool {
50        return !$this->inText( $currentText, $articleText )
51            && !$this->inText( $originalText, $articleText );
52    }
53
54    /**
55     * @param string $part Block of text containing one or more sentences
56     * originally selected from the article text. May have been manually edited
57     * by story editor.
58     * @param string $text Article text
59     * @return bool True if all the sentences in $part are present in the article text
60     */
61    private function inText( string $part, string $text ): bool {
62        $sentences = preg_split( self::SENTENCE_SEPARATOR_REGEX, $part );
63        foreach ( $sentences as $sentence ) {
64            if ( !str_contains( $text, trim( $sentence ) ) ) {
65                return false;
66            }
67        }
68        return true;
69    }
70
71    /**
72     * Remove unnecessary elements from the html text
73     */
74    public function transformText( string $html ): string {
75        // Remove HTML tags and convert entities
76        $text = html_entity_decode( strip_tags( $html ) );
77
78        // Convert multiple spaces to single space
79        $text = preg_replace( '/\s+/', ' ', $text );
80
81        // Remove references ([1])
82        $text = preg_replace( '/\[\d+\]/', '', $text );
83        return $text;
84    }
85
86    public function getArticleText( Title $articleTitle ): ?string {
87        $dbKey = $articleTitle->getDBkey();
88        if ( isset( $this->cache[ $dbKey ] ) ) {
89            return $this->cache[ $dbKey ];
90        }
91
92        $page = $this->wikiPageFactory->newFromTitle( $articleTitle );
93        $parserOptions = $page->makeParserOptions( 'canonical' );
94        $parserOutput = $page->getParserOutput( $parserOptions );
95
96        if ( !$parserOutput ) {
97            return null;
98        }
99
100        $html = $parserOutput->getRawText();
101        $text = $this->transformText( $html );
102        $this->cache[ $dbKey ] = $text;
103
104        return $text;
105    }
106
107}