Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 148 |
|
0.00% |
0 / 10 |
CRAP | |
0.00% |
0 / 1 |
ApiQueryAutomaticTranslationDenseLanguages | |
0.00% |
0 / 148 |
|
0.00% |
0 / 10 |
650 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
fetchWikipediaSites | |
0.00% |
0 / 36 |
|
0.00% |
0 / 1 |
56 | |||
getWikipediaSites | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
getArticleSiteLinks | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
30 | |||
getArticleSizeInformation | |
0.00% |
0 / 55 |
|
0.00% |
0 / 1 |
42 | |||
execute | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
executeGenerator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
run | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
getAllowedParams | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
getExamplesMessages | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace ContentTranslation\ActionApi; |
5 | |
6 | use MediaWiki\Api\ApiQuery; |
7 | use MediaWiki\Api\ApiQueryGeneratorBase; |
8 | use MediaWiki\Http\HttpRequestFactory; |
9 | use Wikimedia\LightweightObjectStore\ExpirationAwareness; |
10 | use Wikimedia\ObjectCache\WANObjectCache; |
11 | use Wikimedia\ParamValidator\ParamValidator; |
12 | |
13 | /** |
14 | * Api module for fetching the list of sitelinks for the article that corresponds |
15 | * to the Wikidata ID that is given as request parameter, ordered by article size. |
16 | * |
17 | * @author Nik Gkountas |
18 | * @license GPL-2.0-or-later |
19 | * @since 2024.06 |
20 | */ |
21 | class ApiQueryAutomaticTranslationDenseLanguages extends ApiQueryGeneratorBase { |
22 | |
23 | private const WIKIPEDIA_API_URL = 'https://en.wikipedia.org/w/api.php'; |
24 | private const WIKIDATA_API_URL = 'https://www.wikidata.org/w/api.php'; |
25 | private const WIKIPEDIA_URL_FRAGMENT = 'wikipedia'; |
26 | |
27 | private HttpRequestFactory $httpRequestFactory; |
28 | private WANObjectCache $cache; |
29 | |
30 | public function __construct( |
31 | ApiQuery $query, |
32 | string $moduleName, |
33 | HttpRequestFactory $httpRequestFactory, |
34 | WANObjectCache $cache |
35 | ) { |
36 | parent::__construct( $query, $moduleName ); |
37 | $this->httpRequestFactory = $httpRequestFactory; |
38 | $this->cache = $cache; |
39 | } |
40 | |
41 | /** |
42 | * @return array e.g. [ 'aawiki' => [ 'code' => 'aa', 'url' => 'https://aa.wikipedia.org/w/api.php' ] |
43 | */ |
44 | private function fetchWikipediaSites(): array { |
45 | $queryParams = [ |
46 | 'action' => 'sitematrix', |
47 | 'format' => 'json', |
48 | 'formatversion' => 2, |
49 | 'smtype' => 'language', |
50 | 'uselang' => $this->getContext()->getLanguage()->getCode(), |
51 | 'smlangprop' => 'site|code|localname', |
52 | 'smsiteprop' => 'dbname|url', |
53 | 'origin' => '*' |
54 | ]; |
55 | |
56 | $apiUrl = wfAppendQuery( self::WIKIPEDIA_API_URL, $queryParams ); |
57 | |
58 | $response = $this->httpRequestFactory->get( $apiUrl, [], __METHOD__ ); |
59 | |
60 | $responseBody = json_decode( $response, true ) ?: []; |
61 | |
62 | $siteMatrix = $responseBody['sitematrix']; |
63 | unset( $siteMatrix['count'] ); |
64 | |
65 | $results = []; |
66 | |
67 | foreach ( $siteMatrix as $site ) { |
68 | $code = $site['code']; |
69 | $subSites = $site['site']; |
70 | $localname = $site['localname'] ?? null; |
71 | if ( $localname === null ) { |
72 | // If a site doesn't have a proper localname, skip it |
73 | // as it's probably not properly configured or disabled. |
74 | continue; |
75 | } |
76 | $wikipediaSiteUrl = null; |
77 | $wikipediaSiteDb = null; |
78 | foreach ( $subSites as $subSite ) { |
79 | if ( strpos( $subSite['url'], self::WIKIPEDIA_URL_FRAGMENT ) !== false ) { |
80 | $wikipediaSiteUrl = $subSite['url']; |
81 | $wikipediaSiteDb = $subSite['dbname']; |
82 | break; |
83 | } |
84 | } |
85 | |
86 | if ( $wikipediaSiteUrl ) { |
87 | $results[ $wikipediaSiteDb ] = [ |
88 | 'code' => $code, |
89 | 'url' => $wikipediaSiteUrl, |
90 | 'localname' => $localname |
91 | ]; |
92 | } |
93 | } |
94 | |
95 | return $results; |
96 | } |
97 | |
98 | private function getWikipediaSites(): array { |
99 | return $this->cache->getWithSetCallback( |
100 | $this->cache->makeGlobalKey( 'ax-wikipedia-sites' ), |
101 | ExpirationAwareness::TTL_DAY, |
102 | fn () => $this->fetchWikipediaSites() |
103 | ); |
104 | } |
105 | |
106 | /** |
107 | * @param string $qid |
108 | * @return array ['site' => string, 'title' => string, 'siteUrl' => string, 'siteCode' => string] |
109 | */ |
110 | private function getArticleSiteLinks( string $qid ): array { |
111 | $queryParams = [ |
112 | 'action' => 'wbgetentities', |
113 | 'format' => 'json', |
114 | 'props' => 'sitelinks', |
115 | 'ids' => $qid, |
116 | ]; |
117 | |
118 | $apiUrl = wfAppendQuery( self::WIKIDATA_API_URL, $queryParams ); |
119 | |
120 | $response = $this->httpRequestFactory->get( $apiUrl, [], __METHOD__ ); |
121 | if ( !$response ) { |
122 | $this->dieWithError( 'apierror-query+automatictranslationdenselanguages-sitelink-request-failure' ); |
123 | } |
124 | |
125 | $responseBody = json_decode( $response, true ) ?: []; |
126 | |
127 | $siteLinks = $responseBody['entities'][$qid]['sitelinks'] ?? []; |
128 | |
129 | $sites = $this->getWikipediaSites(); |
130 | $results = []; |
131 | |
132 | foreach ( $siteLinks as $siteDb => $siteLink ) { |
133 | $currentSite = $sites[$siteDb] ?? null; |
134 | if ( !$currentSite ) { |
135 | continue; |
136 | } |
137 | |
138 | $results[] = [ |
139 | 'site' => $siteDb, |
140 | 'title' => $siteLink['title'], |
141 | 'localname' => $currentSite['localname'], |
142 | 'siteUrl' => $currentSite['url'], |
143 | 'siteCode' => $currentSite['code'] |
144 | ]; |
145 | } |
146 | |
147 | return $results; |
148 | } |
149 | |
150 | /** |
151 | * @param string $qid |
152 | * @return array ['site', 'title', 'siteUrl', 'siteCode', 'sections' => int, 'size' => int ] |
153 | */ |
154 | private function getArticleSizeInformation( string $qid ): array { |
155 | $siteLinks = $this->getArticleSiteLinks( $qid ); |
156 | if ( !$siteLinks ) { |
157 | return []; |
158 | } |
159 | |
160 | $requests = array_map( static function ( $siteLink ) { |
161 | $queryParams = [ |
162 | 'action' => 'parse', |
163 | 'format' => 'json', |
164 | 'formatversion' => 2, |
165 | 'prop' => 'sections', |
166 | 'page' => $siteLink['title'] |
167 | ]; |
168 | $siteUrl = $siteLink['siteUrl']; |
169 | $apiUrl = wfAppendQuery( $siteUrl . '/w/api.php', $queryParams ); |
170 | |
171 | return [ 'method' => 'GET', 'url' => $apiUrl ]; |
172 | }, $siteLinks ); |
173 | |
174 | $multiHttpClient = $this->httpRequestFactory->createMultiClient(); |
175 | $responses = $multiHttpClient->runMulti( $requests ); |
176 | |
177 | $params = $this->extractRequestParams(); |
178 | $sectionTitlesOn = $params['section-titles']; |
179 | |
180 | $sizeInfos = array_map( |
181 | static function ( $response, $siteLink ) use ( $sectionTitlesOn ) { |
182 | $responseBody = json_decode( $response['response']['body'], true ) ?: []; |
183 | if ( !isset( $responseBody['parse'] ) ) { |
184 | return null; |
185 | } |
186 | $sections = $responseBody['parse']['sections']; |
187 | $lastSection = end( $sections ); |
188 | |
189 | $size = 0; |
190 | if ( $lastSection ) { |
191 | $size = $lastSection['byteoffset']; |
192 | } |
193 | |
194 | $sections = array_filter( $sections, static function ( $section ) { |
195 | return $section['toclevel'] === 1; |
196 | } ); |
197 | |
198 | $infos = [ |
199 | 'siteUrl' => $siteLink['siteUrl'], |
200 | 'language' => $siteLink['siteCode'], |
201 | 'languageName' => $siteLink['localname'], |
202 | 'title' => $siteLink['title'], |
203 | 'sections' => count( $sections ), |
204 | 'size' => $size, |
205 | ]; |
206 | |
207 | if ( $sectionTitlesOn ) { |
208 | $sectionTitles = array_values( array_map( static function ( $section ) { |
209 | return $section['line']; |
210 | }, $sections ) ); |
211 | |
212 | $infos['sectionTitles'] = $sectionTitles; |
213 | } |
214 | |
215 | return $infos; |
216 | }, |
217 | $responses, |
218 | $siteLinks |
219 | ); |
220 | |
221 | $sizeInfos = array_filter( $sizeInfos ); |
222 | |
223 | usort( $sizeInfos, static function ( $a, $b ) { |
224 | return $b['size'] <=> $a['size']; |
225 | } ); |
226 | |
227 | return $sizeInfos; |
228 | } |
229 | |
230 | public function execute() { |
231 | $this->run(); |
232 | } |
233 | |
234 | /** @inheritDoc */ |
235 | public function executeGenerator( $resultPageSet ) { |
236 | $this->run(); |
237 | } |
238 | |
239 | private function run() { |
240 | $params = $this->extractRequestParams(); |
241 | $qid = $params['qid']; |
242 | |
243 | $sizeInformationArray = $this->getArticleSizeInformation( $qid ); |
244 | |
245 | $result = $this->getResult(); |
246 | $result->addValue( [ 'query', $this->getModuleName() ], 'sizeInfo', $sizeInformationArray ); |
247 | } |
248 | |
249 | /** @inheritDoc */ |
250 | public function getAllowedParams() { |
251 | return [ |
252 | 'qid' => [ |
253 | ParamValidator::PARAM_TYPE => 'string', |
254 | ParamValidator::PARAM_REQUIRED => true, |
255 | ], |
256 | 'section-titles' => [ |
257 | ParamValidator::PARAM_TYPE => 'boolean', |
258 | ParamValidator::PARAM_DEFAULT => false |
259 | ] |
260 | ]; |
261 | } |
262 | |
263 | /** @inheritDoc */ |
264 | protected function getExamplesMessages() { |
265 | return [ |
266 | 'action=query&list=automatictranslationdenselanguages&qid=Q405' => |
267 | 'apihelp-query+automatictranslationdenselanguages-example-1', |
268 | 'action=query&list=automatictranslationdenselanguages&qid=Q405§ion-titles=true' => |
269 | 'apihelp-query+automatictranslationdenselanguages-example-2' |
270 | ]; |
271 | } |
272 | } |