Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
17.33% |
13 / 75 |
|
9.09% |
1 / 11 |
CRAP | |
0.00% |
0 / 1 |
WikiProjectFullLookup | |
17.33% |
13 / 75 |
|
9.09% |
1 / 11 |
378.08 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
getWikiProjects | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
6 | |||
hasWikiProjects | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasWikiProjectsAfter | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
isKnownEntity | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDataForEntities | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
computeDataForEntities | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 | |||
queryWikidataAPI | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
queryWikidataAPIBatch | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
12 | |||
buildEntitySiteLink | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
invertDirection | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | declare( strict_types=1 ); |
4 | |
5 | namespace MediaWiki\Extension\WikimediaCampaignEvents\WikiProject; |
6 | |
7 | use InvalidArgumentException; |
8 | use JsonException; |
9 | use MediaWiki\Http\HttpRequestFactory; |
10 | use MediaWiki\WikiMap\WikiMap; |
11 | use Wikimedia\ObjectCache\WANObjectCache; |
12 | |
13 | /** |
14 | * This is the main lookup service for WikiProject data, intended to be used with an interface that supports pagination. |
15 | * Given a number of results and a starting entity to enumerate from, it queries Wikidata to get additional information |
16 | * about WikiProjects (such as labels and descriptions). |
17 | */ |
18 | class WikiProjectFullLookup { |
19 | public const SERVICE_NAME = 'WikimediaCampaignEventsWikiProjectFullLookup'; |
20 | |
21 | public const DIR_FORWARDS = 1; |
22 | public const DIR_BACKWARDS = 2; |
23 | |
24 | private WikiProjectIDLookup $wikiProjectIDLookup; |
25 | private WANObjectCache $cache; |
26 | private HttpRequestFactory $httpRequestFactory; |
27 | |
28 | public function __construct( |
29 | WikiProjectIDLookup $wikiProjectIDLookup, |
30 | WANObjectCache $cache, |
31 | HttpRequestFactory $httpRequestFactory |
32 | ) { |
33 | $this->wikiProjectIDLookup = $wikiProjectIDLookup; |
34 | $this->cache = $cache; |
35 | $this->httpRequestFactory = $httpRequestFactory; |
36 | } |
37 | |
38 | /** |
39 | * @param string $languageCode |
40 | * @param int $limit |
41 | * @param string|null $lastEntity When paginating results, this is the ID of the last entity shown to the user |
42 | * (i.e., the last row when going forwards, and the first row when going backwards). |
43 | * @param int $direction In which direction to scan, self::DIR_FORWARDS or self::DIR_BACKWARDS |
44 | * @return array<array|null> An array of arrays with information about the requested WikiProjects. Elements might be |
45 | * null if there is no sitelink for the current wiki (might happen when the sitelink is removed after the WDQS query |
46 | * was last run). QIDs are used as array keys, even for null elements. |
47 | * @phan-return array<string,array{label:string,description:string,sitelink:string}|null> |
48 | * @throws CannotQueryWDQSException |
49 | * @throws CannotQueryWikibaseException |
50 | */ |
51 | public function getWikiProjects( |
52 | string $languageCode, |
53 | int $limit, |
54 | ?string $lastEntity = null, |
55 | int $direction = self::DIR_FORWARDS |
56 | ): array { |
57 | $allIDs = $this->wikiProjectIDLookup->getWikiProjectIDs(); |
58 | $lastPos = false; |
59 | if ( $lastEntity !== null ) { |
60 | $lastPos = array_search( $lastEntity, $allIDs, true ); |
61 | } |
62 | |
63 | if ( $lastPos !== false ) { |
64 | if ( $direction === self::DIR_FORWARDS ) { |
65 | $wantedIDs = array_slice( $allIDs, $lastPos + 1, $limit ); |
66 | } elseif ( $lastPos > $limit ) { |
67 | $wantedIDs = array_slice( $allIDs, $lastPos - $limit, $limit ); |
68 | } else { |
69 | $wantedIDs = array_slice( $allIDs, 0, $lastPos ); |
70 | } |
71 | } else { |
72 | $offset = $direction === self::DIR_FORWARDS ? 0 : -$limit; |
73 | $wantedIDs = array_slice( $allIDs, $offset, $limit ); |
74 | } |
75 | return $this->getDataForEntities( $wantedIDs, $languageCode ); |
76 | } |
77 | |
78 | /** |
79 | * @return bool Whether any WikiProjects exist on the current wiki. |
80 | * @throws CannotQueryWDQSException |
81 | */ |
82 | public function hasWikiProjects(): bool { |
83 | return $this->wikiProjectIDLookup->getWikiProjectIDs() !== []; |
84 | } |
85 | |
86 | /** |
87 | * @param string $lastID Entity ID to check. The caller must verify that this is a valid ID, or an exception |
88 | * will be thrown. |
89 | * @param int $direction self::DIR_FORWARDS or self::DIR_BACKWARDS |
90 | * @return bool Whether any WikiProjects exist after the specified offset in the given direction (for pagination). |
91 | */ |
92 | public function hasWikiProjectsAfter( string $lastID, int $direction ): bool { |
93 | $allIDs = $this->wikiProjectIDLookup->getWikiProjectIDs(); |
94 | $offsetKey = array_search( $lastID, $allIDs, true ); |
95 | if ( $offsetKey === false ) { |
96 | throw new InvalidArgumentException( "Entity $lastID not found." ); |
97 | } |
98 | return $direction === self::DIR_FORWARDS |
99 | ? $offsetKey < array_key_last( $allIDs ) |
100 | : $offsetKey > 0; |
101 | } |
102 | |
103 | /** |
104 | * @param string $entityID |
105 | * @return bool Whether the given ID corresponds to a known entity. |
106 | * @throws CannotQueryWikiProjectsException |
107 | */ |
108 | public function isKnownEntity( string $entityID ): bool { |
109 | return in_array( $entityID, $this->wikiProjectIDLookup->getWikiProjectIDs(), true ); |
110 | } |
111 | |
112 | /** |
113 | * @param array $entityIDs |
114 | * @param string $languageCode |
115 | * @return array<array|null> |
116 | * @phan-return array<string,array{label:string,description:string,sitelink:string}|null> |
117 | * @throws CannotQueryWikibaseException |
118 | */ |
119 | private function getDataForEntities( array $entityIDs, string $languageCode ): array { |
120 | $entitiesHash = sha1( implode( ',', $entityIDs ) ); |
121 | return $this->cache->getWithSetCallback( |
122 | // Can be cached globally, since entity IDs are unique. |
123 | $this->cache->makeGlobalKey( 'WikimediaCampaignEvents-WikiProjects', $languageCode, $entitiesHash ), |
124 | WANObjectCache::TTL_HOUR, |
125 | fn () => $this->computeDataForEntities( $entityIDs, $languageCode ) |
126 | ); |
127 | } |
128 | |
129 | /** |
130 | * @param string[] $entityIDs |
131 | * @param string $languageCode |
132 | * @return array<array|null> |
133 | * @phan-return array<string,array{label:string,description:string,sitelink:string}|null> |
134 | * @throws CannotQueryWikibaseException |
135 | */ |
136 | private function computeDataForEntities( array $entityIDs, string $languageCode ): array { |
137 | $entities = $this->queryWikidataAPI( $entityIDs, $languageCode ); |
138 | $wikiProjects = []; |
139 | foreach ( $entities as $id => $entity ) { |
140 | $siteLink = $this->buildEntitySiteLink( $entity ); |
141 | $wikiProjectData = null; |
142 | if ( $siteLink ) { |
143 | $wikiProjectData = [ |
144 | 'label' => $entity['labels'][$languageCode]['value'] ?? '', |
145 | 'description' => $entity['descriptions'][$languageCode]['value'] ?? '', |
146 | 'sitelink' => $siteLink, |
147 | ]; |
148 | } |
149 | $wikiProjects[$id] = $wikiProjectData; |
150 | } |
151 | return $wikiProjects; |
152 | } |
153 | |
154 | /** |
155 | * @param string[] $entityIDs |
156 | * @param string $languageCode |
157 | * @return array[] |
158 | * @phan-return array<string,array{labels:array,descriptions:array}> |
159 | * @throws CannotQueryWikibaseException |
160 | */ |
161 | private function queryWikidataAPI( array $entityIDs, string $languageCode ): array { |
162 | $batches = array_chunk( $entityIDs, 50 ); |
163 | $entities = []; |
164 | foreach ( $batches as $batch ) { |
165 | $batchResponse = $this->queryWikidataAPIBatch( $batch, $languageCode ); |
166 | $entities = array_merge( $entities, $batchResponse['entities'] ); |
167 | } |
168 | return $entities; |
169 | } |
170 | |
171 | /** |
172 | * @param string[] $entityIDs |
173 | * @param string $languageCode |
174 | * @return array |
175 | * @throws CannotQueryWikibaseException |
176 | */ |
177 | private function queryWikidataAPIBatch( array $entityIDs, string $languageCode ): array { |
178 | // 'claims' to be added for more data. |
179 | $props = [ 'labels', 'descriptions', 'sitelinks/urls' ]; |
180 | $params = [ |
181 | 'action' => 'wbgetentities', |
182 | 'format' => 'json', |
183 | 'ids' => implode( '|', $entityIDs ), |
184 | 'props' => implode( '|', $props ), |
185 | 'languages' => $languageCode, |
186 | 'languagefallback' => true, |
187 | 'formatversion' => 2, |
188 | ]; |
189 | $url = 'https://www.wikidata.org/w/api.php' . '?' . http_build_query( $params ); |
190 | $options = [ |
191 | 'method' => 'GET' |
192 | ]; |
193 | $req = $this->httpRequestFactory->create( $url, $options, __METHOD__ ); |
194 | |
195 | $status = $req->execute(); |
196 | if ( !$status->isGood() ) { |
197 | throw new CannotQueryWikibaseException( "Bad status from WD API: $status" ); |
198 | } |
199 | |
200 | try { |
201 | $parsedResponse = json_decode( $req->getContent(), true, 512, JSON_THROW_ON_ERROR ); |
202 | } catch ( JsonException $e ) { |
203 | throw new CannotQueryWikibaseException( "Invalid JSON from WD API", 0, $e ); |
204 | } |
205 | |
206 | return $parsedResponse; |
207 | } |
208 | |
209 | /** |
210 | * @param array $entity |
211 | * @return string|null |
212 | */ |
213 | private function buildEntitySiteLink( array $entity ): ?string { |
214 | $siteId = WikiMap::getCurrentWikiId(); |
215 | return array_key_exists( $siteId, $entity['sitelinks'] ) ? $entity['sitelinks'][$siteId]['url'] : null; |
216 | } |
217 | |
218 | public static function invertDirection( int $direction ): int { |
219 | return $direction === self::DIR_FORWARDS ? self::DIR_BACKWARDS : self::DIR_FORWARDS; |
220 | } |
221 | } |