Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
91.30% |
42 / 46 |
|
60.00% |
3 / 5 |
CRAP | |
0.00% |
0 / 1 |
ItemNotabilityFilter | |
91.30% |
42 / 46 |
|
60.00% |
3 / 5 |
17.19 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
getNotableEntityIds | |
86.67% |
13 / 15 |
|
0.00% |
0 / 1 |
8.15 | |||
getPagePropsByItem | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
selectPagePropsPage | |
88.24% |
15 / 17 |
|
0.00% |
0 / 1 |
3.01 | |||
getItemsWithoutArticle | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace ArticlePlaceholder; |
4 | |
5 | use Wikibase\DataModel\Entity\ItemId; |
6 | use Wikibase\Lib\Store\EntityNamespaceLookup; |
7 | use Wikibase\Lib\Store\SiteLinkLookup; |
8 | use Wikimedia\Rdbms\FakeResultWrapper; |
9 | use Wikimedia\Rdbms\IReadableDatabase; |
10 | use Wikimedia\Rdbms\IResultWrapper; |
11 | use Wikimedia\Rdbms\SessionConsistentConnectionManager; |
12 | |
13 | /** |
14 | * Filter a list of items by article placeholder notability. |
15 | * |
16 | * @license GPL-2.0-or-later |
17 | * @author Lucie-Aimée Kaffee |
18 | * @author Marius Hoch |
19 | */ |
20 | class ItemNotabilityFilter { |
21 | |
22 | /** |
23 | * Minimum number of statements for an item to be notable |
24 | */ |
25 | private const MIN_STATEMENTS = 3; |
26 | |
27 | /** |
28 | * Minimum number of sitelinks for an item to be notable |
29 | */ |
30 | private const MIN_SITELINKS = 2; |
31 | |
32 | /** |
33 | * @var SessionConsistentConnectionManager |
34 | */ |
35 | private $connectionManager; |
36 | |
37 | /** |
38 | * @var EntityNamespaceLookup |
39 | */ |
40 | private $entityNamespaceLookup; |
41 | |
42 | /** |
43 | * @var SiteLinkLookup |
44 | */ |
45 | private $siteLinkLookup; |
46 | |
47 | /** |
48 | * @var string |
49 | */ |
50 | private $siteGlobalId; |
51 | |
52 | /** |
53 | * @param SessionConsistentConnectionManager $connectionManager |
54 | * @param EntityNamespaceLookup $entityNamespaceLookup |
55 | * @param SiteLinkLookup $siteLinkLookup |
56 | * @param string $siteGlobalId |
57 | */ |
58 | public function __construct( |
59 | SessionConsistentConnectionManager $connectionManager, |
60 | EntityNamespaceLookup $entityNamespaceLookup, |
61 | SiteLinkLookup $siteLinkLookup, |
62 | $siteGlobalId |
63 | ) { |
64 | $this->connectionManager = $connectionManager; |
65 | $this->entityNamespaceLookup = $entityNamespaceLookup; |
66 | $this->siteLinkLookup = $siteLinkLookup; |
67 | $this->siteGlobalId = $siteGlobalId; |
68 | } |
69 | |
70 | /** |
71 | * @param ItemId[] $itemIds |
72 | * |
73 | * @return ItemId[] |
74 | */ |
75 | public function getNotableEntityIds( array $itemIds ) { |
76 | if ( $itemIds === [] ) { |
77 | return []; |
78 | } |
79 | |
80 | $byNumericId = []; |
81 | |
82 | $pagePropsByItem = $this->getPagePropsByItem( $itemIds ); |
83 | |
84 | foreach ( $itemIds as $itemId ) { |
85 | $itemIdSerialization = $itemId->getSerialization(); |
86 | |
87 | if ( !isset( $pagePropsByItem[$itemIdSerialization] ) ) { |
88 | continue; |
89 | } |
90 | $pageProps = $pagePropsByItem[$itemIdSerialization]; |
91 | |
92 | if ( |
93 | isset( $pageProps['wb-claims'] ) && |
94 | isset( $pageProps['wb-sitelinks'] ) && |
95 | $pageProps['wb-claims'] >= self::MIN_STATEMENTS && |
96 | $pageProps['wb-sitelinks'] >= self::MIN_SITELINKS |
97 | ) { |
98 | $byNumericId[$itemId->getNumericId()] = $itemId; |
99 | } |
100 | } |
101 | |
102 | return $this->getItemsWithoutArticle( $byNumericId ); |
103 | } |
104 | |
105 | /** |
106 | * @param ItemId[] $itemIds |
107 | * |
108 | * @return int[][] Map of page_title => propname => numeric value |
109 | */ |
110 | private function getPagePropsByItem( array $itemIds ) { |
111 | $values = []; |
112 | |
113 | $dbr = $this->connectionManager->getReadConnection(); |
114 | |
115 | $res = $this->selectPagePropsPage( $dbr, $itemIds ); |
116 | |
117 | foreach ( $res as $row ) { |
118 | $values[$row->page_title][$row->pp_propname] = intval( $row->pp_value ?: 0 ); |
119 | } |
120 | |
121 | return $values; |
122 | } |
123 | |
124 | /** |
125 | * @param IReadableDatabase $dbr |
126 | * @param ItemId[] $itemIds |
127 | * |
128 | * @return IResultWrapper |
129 | */ |
130 | private function selectPagePropsPage( IReadableDatabase $dbr, array $itemIds ) { |
131 | $entityNamespace = $this->entityNamespaceLookup->getEntityNamespace( 'item' ); |
132 | |
133 | if ( !is_int( $entityNamespace ) ) { |
134 | wfLogWarning( 'The ArticlePlaceholder extension requires an "item" namespace' ); |
135 | return new FakeResultWrapper( [] ); |
136 | } |
137 | |
138 | $itemIdSerializations = []; |
139 | foreach ( $itemIds as $itemId ) { |
140 | $itemIdSerializations[] = $itemId->getSerialization(); |
141 | } |
142 | |
143 | return $dbr->newSelectQueryBuilder() |
144 | ->select( [ 'page_title', 'pp_propname', 'pp_value' ] ) |
145 | ->from( 'page' ) |
146 | ->join( 'page_props', null, 'page_id=pp_page' ) |
147 | ->where( [ |
148 | 'page_namespace' => $entityNamespace, |
149 | 'page_title' => $itemIdSerializations, |
150 | 'pp_propname' => [ 'wb-sitelinks', 'wb-claims' ] |
151 | ] ) |
152 | ->caller( __METHOD__ )->fetchResultSet(); |
153 | } |
154 | |
155 | /** |
156 | * @param ItemId[] $itemIds expected to be indexed by numeric item ID |
157 | * |
158 | * @return ItemId[] |
159 | */ |
160 | private function getItemsWithoutArticle( array $itemIds ) { |
161 | $links = $this->siteLinkLookup->getLinks( array_keys( $itemIds ), [ $this->siteGlobalId ] ); |
162 | |
163 | foreach ( $links as [ , , $numericId ] ) { |
164 | unset( $itemIds[$numericId] ); |
165 | } |
166 | |
167 | return array_values( $itemIds ); |
168 | } |
169 | |
170 | } |