Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 162 |
|
0.00% |
0 / 10 |
CRAP | |
0.00% |
0 / 1 |
ApiQueryAutomaticTranslationDenseLanguages | |
0.00% |
0 / 162 |
|
0.00% |
0 / 10 |
702 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
fetchWikipediaSites | |
0.00% |
0 / 36 |
|
0.00% |
0 / 1 |
56 | |||
getWikipediaSites | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
getArticleSiteLinks | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
30 | |||
getArticleSizeInformation | |
0.00% |
0 / 64 |
|
0.00% |
0 / 1 |
56 | |||
execute | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
executeGenerator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
run | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
getAllowedParams | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
2 | |||
getExamplesMessages | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace ContentTranslation\ActionApi; |
5 | |
6 | use MediaWiki\Api\ApiQuery; |
7 | use MediaWiki\Api\ApiQueryGeneratorBase; |
8 | use MediaWiki\Http\HttpRequestFactory; |
9 | use Wikimedia\LightweightObjectStore\ExpirationAwareness; |
10 | use Wikimedia\ObjectCache\WANObjectCache; |
11 | use Wikimedia\ParamValidator\ParamValidator; |
12 | |
13 | /** |
14 | * Api module for fetching the list of sitelinks for the article that corresponds |
15 | * to the Wikidata ID, ordered by article size, with server-side limiting and searching. |
16 | * |
17 | * @author Nik Gkountas |
18 | * @license GPL-2.0-or-later |
19 | * @since 2024.06 |
20 | */ |
21 | class ApiQueryAutomaticTranslationDenseLanguages extends ApiQueryGeneratorBase { |
22 | |
23 | private const WIKIPEDIA_API_URL = 'https://en.wikipedia.org/w/api.php'; |
24 | private const WIKIDATA_API_URL = 'https://www.wikidata.org/w/api.php'; |
25 | private const WIKIPEDIA_URL_FRAGMENT = 'wikipedia'; |
26 | private const ARTICLE_SITE_LINK_LIMIT = 15; |
27 | |
28 | private HttpRequestFactory $httpRequestFactory; |
29 | private WANObjectCache $cache; |
30 | |
31 | public function __construct( |
32 | ApiQuery $query, |
33 | string $moduleName, |
34 | HttpRequestFactory $httpRequestFactory, |
35 | WANObjectCache $cache |
36 | ) { |
37 | parent::__construct( $query, $moduleName ); |
38 | $this->httpRequestFactory = $httpRequestFactory; |
39 | $this->cache = $cache; |
40 | } |
41 | |
42 | /** |
43 | * Fetch Wikipedia sites from the sitematrix API. |
44 | * @return array |
45 | * e.g. [ |
46 | * 'aawiki' => [ |
47 | * 'code' => 'aa', 'url' => 'https://aa.wikipedia.org/w/api.php', 'localname' => 'Afar' |
48 | * ] |
49 | * ] |
50 | */ |
51 | private function fetchWikipediaSites(): array { |
52 | $queryParams = [ |
53 | 'action' => 'sitematrix', |
54 | 'format' => 'json', |
55 | 'formatversion' => 2, |
56 | 'smtype' => 'language', |
57 | 'uselang' => $this->getContext()->getLanguage()->getCode(), |
58 | 'smlangprop' => 'site|code|localname', |
59 | 'smsiteprop' => 'dbname|url', |
60 | 'origin' => '*' |
61 | ]; |
62 | |
63 | $apiUrl = wfAppendQuery( self::WIKIPEDIA_API_URL, $queryParams ); |
64 | |
65 | $response = $this->httpRequestFactory->get( $apiUrl, [], __METHOD__ ); |
66 | |
67 | $responseBody = json_decode( $response, true ) ?: []; |
68 | |
69 | $siteMatrix = $responseBody['sitematrix']; |
70 | unset( $siteMatrix['count'] ); |
71 | |
72 | $results = []; |
73 | |
74 | foreach ( $siteMatrix as $site ) { |
75 | $code = $site['code']; |
76 | $subSites = $site['site']; |
77 | $localname = $site['localname'] ?? null; |
78 | if ( $localname === null ) { |
79 | // If a site doesn't have a proper localname, skip it |
80 | // as it's probably not properly configured or disabled. |
81 | continue; |
82 | } |
83 | $wikipediaSiteUrl = null; |
84 | $wikipediaSiteDb = null; |
85 | foreach ( $subSites as $subSite ) { |
86 | if ( strpos( $subSite['url'], self::WIKIPEDIA_URL_FRAGMENT ) !== false ) { |
87 | $wikipediaSiteUrl = $subSite['url']; |
88 | $wikipediaSiteDb = $subSite['dbname']; |
89 | break; |
90 | } |
91 | } |
92 | |
93 | if ( $wikipediaSiteUrl ) { |
94 | $results[ $wikipediaSiteDb ] = [ |
95 | 'code' => $code, |
96 | 'url' => $wikipediaSiteUrl, |
97 | 'localname' => $localname |
98 | ]; |
99 | } |
100 | } |
101 | |
102 | return $results; |
103 | } |
104 | |
105 | private function getWikipediaSites(): array { |
106 | return $this->cache->getWithSetCallback( |
107 | $this->cache->makeGlobalKey( 'ax-wikipedia-sites' ), |
108 | ExpirationAwareness::TTL_DAY, |
109 | fn () => $this->fetchWikipediaSites() |
110 | ); |
111 | } |
112 | |
113 | /** |
114 | * Fetch site links for a given Wikidata ID. |
115 | * @param string $qid |
116 | * @return array [ |
117 | * 'site' => string, |
118 | * 'title' => string, |
119 | * 'siteUrl' => string, |
120 | * 'siteCode' => string, |
121 | * 'localname' => string |
122 | * ] |
123 | */ |
124 | private function getArticleSiteLinks( string $qid ): array { |
125 | $queryParams = [ |
126 | 'action' => 'wbgetentities', |
127 | 'format' => 'json', |
128 | 'props' => 'sitelinks', |
129 | 'ids' => $qid, |
130 | ]; |
131 | |
132 | $apiUrl = wfAppendQuery( self::WIKIDATA_API_URL, $queryParams ); |
133 | |
134 | $response = $this->httpRequestFactory->get( $apiUrl, [], __METHOD__ ); |
135 | if ( !$response ) { |
136 | $this->dieWithError( 'apierror-query+automatictranslationdenselanguages-sitelink-request-failure' ); |
137 | } |
138 | |
139 | $responseBody = json_decode( $response, true ) ?: []; |
140 | |
141 | $siteLinks = $responseBody['entities'][$qid]['sitelinks'] ?? []; |
142 | |
143 | $sites = $this->getWikipediaSites(); |
144 | $results = []; |
145 | |
146 | foreach ( $siteLinks as $siteDb => $siteLink ) { |
147 | $currentSite = $sites[$siteDb] ?? null; |
148 | if ( !$currentSite ) { |
149 | continue; |
150 | } |
151 | |
152 | $results[] = [ |
153 | 'site' => $siteDb, |
154 | 'title' => $siteLink['title'], |
155 | 'localname' => $currentSite['localname'], |
156 | 'siteUrl' => $currentSite['url'], |
157 | 'siteCode' => $currentSite['code'] |
158 | ]; |
159 | } |
160 | |
161 | return $results; |
162 | } |
163 | |
164 | /** |
165 | * Get size information for articles, with server-side filtering and limiting. |
166 | * @param string $qid |
167 | * @return array ['languages' => array, 'total' => int] |
168 | */ |
169 | private function getArticleSizeInformation( string $qid ): array { |
170 | $params = $this->extractRequestParams(); |
171 | $limit = $params['limit'] ?? self::ARTICLE_SITE_LINK_LIMIT; |
172 | |
173 | $siteLinks = $this->getArticleSiteLinks( $qid ); |
174 | if ( !$siteLinks ) { |
175 | return [ 'languages' => [], 'total' => 0 ]; |
176 | } |
177 | |
178 | $total = count( $siteLinks ); |
179 | |
180 | $requests = array_map( static function ( $siteLink ) { |
181 | $queryParams = [ |
182 | 'action' => 'parse', |
183 | 'format' => 'json', |
184 | 'formatversion' => 2, |
185 | 'prop' => 'sections', |
186 | 'page' => $siteLink['title'] |
187 | ]; |
188 | $siteUrl = $siteLink['siteUrl']; |
189 | $apiUrl = wfAppendQuery( $siteUrl . '/w/api.php', $queryParams ); |
190 | |
191 | return [ 'method' => 'GET', 'url' => $apiUrl ]; |
192 | }, $siteLinks ); |
193 | |
194 | $multiHttpClient = $this->httpRequestFactory->createMultiClient(); |
195 | $responses = $multiHttpClient->runMulti( $requests ); |
196 | |
197 | $params = $this->extractRequestParams(); |
198 | $sectionTitlesOn = $params['section-titles']; |
199 | |
200 | $sizeInfos = array_map( |
201 | static function ( $response, $siteLink ) use ( $sectionTitlesOn ) { |
202 | $responseBody = json_decode( $response['response']['body'], true ) ?: []; |
203 | if ( !isset( $responseBody['parse'] ) ) { |
204 | return null; |
205 | } |
206 | $sections = $responseBody['parse']['sections']; |
207 | $lastSection = end( $sections ); |
208 | |
209 | $size = 0; |
210 | if ( $lastSection ) { |
211 | $size = $lastSection['byteoffset']; |
212 | } |
213 | |
214 | $sections = array_filter( $sections, static function ( $section ) { |
215 | return $section['toclevel'] === 1; |
216 | } ); |
217 | |
218 | $infos = [ |
219 | 'siteUrl' => $siteLink['siteUrl'], |
220 | 'language' => $siteLink['siteCode'], |
221 | 'languageName' => $siteLink['localname'], |
222 | 'title' => $siteLink['title'], |
223 | 'sections' => count( $sections ), |
224 | 'size' => $size, |
225 | ]; |
226 | |
227 | if ( $sectionTitlesOn ) { |
228 | $sectionTitles = array_values( array_map( static function ( $section ) { |
229 | return $section['line']; |
230 | }, $sections ) ); |
231 | |
232 | $infos['sectionTitles'] = $sectionTitles; |
233 | } |
234 | |
235 | return $infos; |
236 | }, |
237 | $responses, |
238 | $siteLinks |
239 | ); |
240 | |
241 | $sizeInfos = array_filter( $sizeInfos ); |
242 | |
243 | usort( $sizeInfos, static function ( $a, $b ) { |
244 | return $b['size'] <=> $a['size']; |
245 | } ); |
246 | |
247 | $limitedSizeInfos = ( count( $sizeInfos ) >= $limit ) |
248 | ? array_slice( $sizeInfos, 0, 10 ) |
249 | : $sizeInfos; |
250 | |
251 | return [ |
252 | 'languages' => $limitedSizeInfos, |
253 | 'total' => $total |
254 | ]; |
255 | } |
256 | |
257 | public function execute() { |
258 | $this->run(); |
259 | } |
260 | |
261 | /** @inheritDoc */ |
262 | public function executeGenerator( $resultPageSet ) { |
263 | $this->run(); |
264 | } |
265 | |
266 | private function run() { |
267 | $params = $this->extractRequestParams(); |
268 | $qid = $params['qid']; |
269 | |
270 | $resultData = $this->getArticleSizeInformation( $qid ); |
271 | $result = $this->getResult(); |
272 | $result->addValue( [ 'query', $this->getModuleName() ], 'languages', $resultData['languages'] ); |
273 | $result->addValue( [ 'query', $this->getModuleName() ], 'total', $resultData['total'] ); |
274 | } |
275 | |
276 | /** @inheritDoc */ |
277 | public function getAllowedParams() { |
278 | return [ |
279 | 'qid' => [ |
280 | ParamValidator::PARAM_TYPE => 'string', |
281 | ParamValidator::PARAM_REQUIRED => true, |
282 | ], |
283 | 'section-titles' => [ |
284 | ParamValidator::PARAM_TYPE => 'boolean', |
285 | ParamValidator::PARAM_DEFAULT => false |
286 | ], |
287 | 'limit' => [ |
288 | ParamValidator::PARAM_TYPE => 'integer', |
289 | ParamValidator::PARAM_DEFAULT => self::ARTICLE_SITE_LINK_LIMIT |
290 | ] |
291 | ]; |
292 | } |
293 | |
294 | /** @inheritDoc */ |
295 | protected function getExamplesMessages() { |
296 | return [ |
297 | 'action=query&list=automatictranslationdenselanguages&qid=Q405&limit=15' => |
298 | 'apihelp-query+automatictranslationdenselanguages-example-1', |
299 | 'action=query&list=automatictranslationdenselanguages&qid=Q405§ion-titles=true' => |
300 | 'apihelp-query+automatictranslationdenselanguages-example-2' |
301 | ]; |
302 | } |
303 | } |