Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
27 / 27
100.00% covered (success)
100.00%
3 / 3
CRAP
100.00% covered (success)
100.00%
1 / 1
CategoryExtractor
100.00% covered (success)
100.00%
27 / 27
100.00% covered (success)
100.00%
3 / 3
4
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 getCategoriesGrouped
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
1
 queryHiddenCategories
100.00% covered (success)
100.00%
16 / 16
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2
3namespace FileImporter\Services;
4
5use MediaWiki\Cache\LinkBatchFactory;
6use MediaWiki\Parser\ParserFactory;
7use MediaWiki\Parser\ParserOptions;
8use MediaWiki\Title\Title;
9use MediaWiki\User\UserIdentity;
10use Wikimedia\Rdbms\IConnectionProvider;
11
12/**
13 * @license GPL-2.0-or-later
14 */
15class CategoryExtractor {
16
17    private ParserFactory $parserFactory;
18    private IConnectionProvider $connectionProvider;
19    private LinkBatchFactory $linkBatchFactory;
20
21    public function __construct(
22        ParserFactory $parserFactory,
23        IConnectionProvider $connectionProvider,
24        LinkBatchFactory $linkBatchFactory
25    ) {
26        $this->parserFactory = $parserFactory;
27        $this->connectionProvider = $connectionProvider;
28        $this->linkBatchFactory = $linkBatchFactory;
29    }
30
31    /**
32     * Find categories for a given page.
33     *
34     * @param string $text Body of the page to scan.
35     * @param Title $title Page title for context, because parsing might depend on this
36     * @param UserIdentity $user User for context, because parsing might depend on this
37     *
38     * @return array Two lists of category names, grouped by local visibility.
39     *         [ $visibleCategories, $hiddenCategories ]
40     */
41    public function getCategoriesGrouped( string $text, Title $title, UserIdentity $user ): array {
42        $allCategories = $this->parserFactory->getInstance()->parse(
43            $text,
44            $title,
45            new ParserOptions( $user )
46        )->getCategoryNames();
47
48        $hiddenCategories = $this->queryHiddenCategories( $allCategories );
49        $visibleCategories = array_diff( $allCategories, $hiddenCategories );
50
51        return [ $visibleCategories, $hiddenCategories ];
52    }
53
54    /**
55     * Query categories to find which are hidden.
56     *
57     * @param string[] $categories List of all category names.
58     *
59     * @return string[] List of hidden categories.
60     */
61    private function queryHiddenCategories( array $categories ): array {
62        if ( $categories === [] ) {
63            return [];
64        }
65
66        $arr = [ NS_CATEGORY => array_flip( $categories ) ];
67        $lb = $this->linkBatchFactory->newLinkBatch();
68        $lb->setArray( $arr );
69
70        # Fetch categories having the `hiddencat` property.
71        $dbr = $this->connectionProvider->getReplicaDatabase();
72        return $dbr->newSelectQueryBuilder()
73            ->select( 'page_title' )
74            ->from( 'page' )
75            ->join( 'page_props', null, [
76                'pp_propname' => 'hiddencat',
77                'pp_page = page_id'
78            ] )
79            ->where( $lb->constructSet( 'page', $dbr ) )
80            ->caller( __METHOD__ )
81            ->fetchFieldValues();
82    }
83
84}