Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
100.00% |
27 / 27 |
|
100.00% |
3 / 3 |
CRAP | |
100.00% |
1 / 1 |
| CategoryExtractor | |
100.00% |
27 / 27 |
|
100.00% |
3 / 3 |
4 | |
100.00% |
1 / 1 |
| __construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| getCategoriesGrouped | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
| queryHiddenCategories | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
2 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace FileImporter\Services; |
| 4 | |
| 5 | use MediaWiki\Cache\LinkBatchFactory; |
| 6 | use MediaWiki\Parser\ParserFactory; |
| 7 | use MediaWiki\Parser\ParserOptions; |
| 8 | use MediaWiki\Title\Title; |
| 9 | use MediaWiki\User\UserIdentity; |
| 10 | use Wikimedia\Rdbms\IConnectionProvider; |
| 11 | |
| 12 | /** |
| 13 | * @license GPL-2.0-or-later |
| 14 | */ |
| 15 | class CategoryExtractor { |
| 16 | |
| 17 | private ParserFactory $parserFactory; |
| 18 | private IConnectionProvider $connectionProvider; |
| 19 | private LinkBatchFactory $linkBatchFactory; |
| 20 | |
| 21 | public function __construct( |
| 22 | ParserFactory $parserFactory, |
| 23 | IConnectionProvider $connectionProvider, |
| 24 | LinkBatchFactory $linkBatchFactory |
| 25 | ) { |
| 26 | $this->parserFactory = $parserFactory; |
| 27 | $this->connectionProvider = $connectionProvider; |
| 28 | $this->linkBatchFactory = $linkBatchFactory; |
| 29 | } |
| 30 | |
| 31 | /** |
| 32 | * Find categories for a given page. |
| 33 | * |
| 34 | * @param string $text Body of the page to scan. |
| 35 | * @param Title $title Page title for context, because parsing might depend on this |
| 36 | * @param UserIdentity $user User for context, because parsing might depend on this |
| 37 | * |
| 38 | * @return array Two lists of category names, grouped by local visibility. |
| 39 | * [ $visibleCategories, $hiddenCategories ] |
| 40 | */ |
| 41 | public function getCategoriesGrouped( string $text, Title $title, UserIdentity $user ): array { |
| 42 | $allCategories = $this->parserFactory->getInstance()->parse( |
| 43 | $text, |
| 44 | $title, |
| 45 | new ParserOptions( $user ) |
| 46 | )->getCategoryNames(); |
| 47 | |
| 48 | $hiddenCategories = $this->queryHiddenCategories( $allCategories ); |
| 49 | $visibleCategories = array_diff( $allCategories, $hiddenCategories ); |
| 50 | |
| 51 | return [ $visibleCategories, $hiddenCategories ]; |
| 52 | } |
| 53 | |
| 54 | /** |
| 55 | * Query categories to find which are hidden. |
| 56 | * |
| 57 | * @param string[] $categories List of all category names. |
| 58 | * |
| 59 | * @return string[] List of hidden categories. |
| 60 | */ |
| 61 | private function queryHiddenCategories( array $categories ): array { |
| 62 | if ( $categories === [] ) { |
| 63 | return []; |
| 64 | } |
| 65 | |
| 66 | $arr = [ NS_CATEGORY => array_flip( $categories ) ]; |
| 67 | $lb = $this->linkBatchFactory->newLinkBatch(); |
| 68 | $lb->setArray( $arr ); |
| 69 | |
| 70 | # Fetch categories having the `hiddencat` property. |
| 71 | $dbr = $this->connectionProvider->getReplicaDatabase(); |
| 72 | return $dbr->newSelectQueryBuilder() |
| 73 | ->select( 'page_title' ) |
| 74 | ->from( 'page' ) |
| 75 | ->join( 'page_props', null, [ |
| 76 | 'pp_propname' => 'hiddencat', |
| 77 | 'pp_page = page_id' |
| 78 | ] ) |
| 79 | ->where( $lb->constructSet( 'page', $dbr ) ) |
| 80 | ->caller( __METHOD__ ) |
| 81 | ->fetchFieldValues(); |
| 82 | } |
| 83 | |
| 84 | } |