Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
25 / 25
100.00% covered (success)
100.00%
3 / 3
CRAP
100.00% covered (success)
100.00%
1 / 1
CategoryExtractor
100.00% covered (success)
100.00%
25 / 25
100.00% covered (success)
100.00%
3 / 3
4
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getCategoriesGrouped
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
1
 queryHiddenCategories
100.00% covered (success)
100.00%
16 / 16
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2
3namespace FileImporter\Services;
4
5use MediaWiki\Page\LinkBatchFactory;
6use MediaWiki\Parser\ParserFactory;
7use MediaWiki\Parser\ParserOptions;
8use MediaWiki\Title\Title;
9use MediaWiki\User\UserIdentity;
10use Wikimedia\Rdbms\IConnectionProvider;
11
12/**
13 * @license GPL-2.0-or-later
14 */
15class CategoryExtractor {
16
17    public function __construct(
18        private readonly ParserFactory $parserFactory,
19        private readonly IConnectionProvider $connectionProvider,
20        private readonly LinkBatchFactory $linkBatchFactory,
21    ) {
22    }
23
24    /**
25     * Find categories for a given page.
26     *
27     * @param string $text Body of the page to scan.
28     * @param Title $title Page title for context, because parsing might depend on this
29     * @param UserIdentity $user User for context, because parsing might depend on this
30     *
31     * @return array Two lists of category names, grouped by local visibility.
32     *         [ $visibleCategories, $hiddenCategories ]
33     */
34    public function getCategoriesGrouped( string $text, Title $title, UserIdentity $user ): array {
35        $allCategories = $this->parserFactory->getInstance()->parse(
36            $text,
37            $title,
38            new ParserOptions( $user )
39        )->getCategoryNames();
40
41        $hiddenCategories = $this->queryHiddenCategories( $allCategories );
42        $visibleCategories = array_diff( $allCategories, $hiddenCategories );
43
44        return [ $visibleCategories, $hiddenCategories ];
45    }
46
47    /**
48     * Query categories to find which are hidden.
49     *
50     * @param string[] $categories List of all category names.
51     *
52     * @return string[] List of hidden categories.
53     */
54    private function queryHiddenCategories( array $categories ): array {
55        if ( $categories === [] ) {
56            return [];
57        }
58
59        $arr = [ NS_CATEGORY => array_flip( $categories ) ];
60        $lb = $this->linkBatchFactory->newLinkBatch();
61        $lb->setArray( $arr );
62
63        # Fetch categories having the `hiddencat` property.
64        $dbr = $this->connectionProvider->getReplicaDatabase();
65        return $dbr->newSelectQueryBuilder()
66            ->select( 'page_title' )
67            ->from( 'page' )
68            ->join( 'page_props', null, [
69                'pp_propname' => 'hiddencat',
70                'pp_page = page_id'
71            ] )
72            ->where( $lb->constructSet( 'page', $dbr ) )
73            ->caller( __METHOD__ )
74            ->fetchFieldValues();
75    }
76
77}