Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
100.00% |
25 / 25 |
|
100.00% |
3 / 3 |
CRAP | |
100.00% |
1 / 1 |
| CategoryExtractor | |
100.00% |
25 / 25 |
|
100.00% |
3 / 3 |
4 | |
100.00% |
1 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getCategoriesGrouped | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
| queryHiddenCategories | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
2 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace FileImporter\Services; |
| 4 | |
| 5 | use MediaWiki\Page\LinkBatchFactory; |
| 6 | use MediaWiki\Parser\ParserFactory; |
| 7 | use MediaWiki\Parser\ParserOptions; |
| 8 | use MediaWiki\Title\Title; |
| 9 | use MediaWiki\User\UserIdentity; |
| 10 | use Wikimedia\Rdbms\IConnectionProvider; |
| 11 | |
| 12 | /** |
| 13 | * @license GPL-2.0-or-later |
| 14 | */ |
| 15 | class CategoryExtractor { |
| 16 | |
| 17 | public function __construct( |
| 18 | private readonly ParserFactory $parserFactory, |
| 19 | private readonly IConnectionProvider $connectionProvider, |
| 20 | private readonly LinkBatchFactory $linkBatchFactory, |
| 21 | ) { |
| 22 | } |
| 23 | |
| 24 | /** |
| 25 | * Find categories for a given page. |
| 26 | * |
| 27 | * @param string $text Body of the page to scan. |
| 28 | * @param Title $title Page title for context, because parsing might depend on this |
| 29 | * @param UserIdentity $user User for context, because parsing might depend on this |
| 30 | * |
| 31 | * @return array Two lists of category names, grouped by local visibility. |
| 32 | * [ $visibleCategories, $hiddenCategories ] |
| 33 | */ |
| 34 | public function getCategoriesGrouped( string $text, Title $title, UserIdentity $user ): array { |
| 35 | $allCategories = $this->parserFactory->getInstance()->parse( |
| 36 | $text, |
| 37 | $title, |
| 38 | new ParserOptions( $user ) |
| 39 | )->getCategoryNames(); |
| 40 | |
| 41 | $hiddenCategories = $this->queryHiddenCategories( $allCategories ); |
| 42 | $visibleCategories = array_diff( $allCategories, $hiddenCategories ); |
| 43 | |
| 44 | return [ $visibleCategories, $hiddenCategories ]; |
| 45 | } |
| 46 | |
| 47 | /** |
| 48 | * Query categories to find which are hidden. |
| 49 | * |
| 50 | * @param string[] $categories List of all category names. |
| 51 | * |
| 52 | * @return string[] List of hidden categories. |
| 53 | */ |
| 54 | private function queryHiddenCategories( array $categories ): array { |
| 55 | if ( $categories === [] ) { |
| 56 | return []; |
| 57 | } |
| 58 | |
| 59 | $arr = [ NS_CATEGORY => array_flip( $categories ) ]; |
| 60 | $lb = $this->linkBatchFactory->newLinkBatch(); |
| 61 | $lb->setArray( $arr ); |
| 62 | |
| 63 | # Fetch categories having the `hiddencat` property. |
| 64 | $dbr = $this->connectionProvider->getReplicaDatabase(); |
| 65 | return $dbr->newSelectQueryBuilder() |
| 66 | ->select( 'page_title' ) |
| 67 | ->from( 'page' ) |
| 68 | ->join( 'page_props', null, [ |
| 69 | 'pp_propname' => 'hiddencat', |
| 70 | 'pp_page = page_id' |
| 71 | ] ) |
| 72 | ->where( $lb->constructSet( 'page', $dbr ) ) |
| 73 | ->caller( __METHOD__ ) |
| 74 | ->fetchFieldValues(); |
| 75 | } |
| 76 | |
| 77 | } |