Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 141 |
|
0.00% |
0 / 10 |
CRAP | |
0.00% |
0 / 1 |
ApiQueryAutomaticTranslationDenseLanguages | |
0.00% |
0 / 141 |
|
0.00% |
0 / 10 |
600 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
fetchWikipediaSites | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
42 | |||
getWikipediaSites | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
getArticleSiteLinks | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
30 | |||
getArticleSizeInformation | |
0.00% |
0 / 54 |
|
0.00% |
0 / 1 |
42 | |||
execute | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
executeGenerator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
run | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
getAllowedParams | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
getExamplesMessages | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace ContentTranslation\ActionApi; |
5 | |
6 | use MediaWiki\Api\ApiQuery; |
7 | use MediaWiki\Api\ApiQueryGeneratorBase; |
8 | use MediaWiki\Http\HttpRequestFactory; |
9 | use Wikimedia\LightweightObjectStore\ExpirationAwareness; |
10 | use Wikimedia\ObjectCache\WANObjectCache; |
11 | use Wikimedia\ParamValidator\ParamValidator; |
12 | |
13 | /** |
14 | * Api module for fetching the list of sitelinks for the article that corresponds |
15 | * to the Wikidata ID that is given as request parameter, ordered by article size. |
16 | * |
17 | * @author Nik Gkountas |
18 | * @license GPL-2.0-or-later |
19 | * @since 2024.06 |
20 | */ |
21 | class ApiQueryAutomaticTranslationDenseLanguages extends ApiQueryGeneratorBase { |
22 | |
23 | private const WIKIPEDIA_API_URL = 'https://en.wikipedia.org/w/api.php'; |
24 | private const WIKIDATA_API_URL = 'https://www.wikidata.org/w/api.php'; |
25 | private const WIKIPEDIA_URL_FRAGMENT = 'wikipedia'; |
26 | |
27 | private HttpRequestFactory $httpRequestFactory; |
28 | private WANObjectCache $cache; |
29 | |
30 | public function __construct( |
31 | ApiQuery $query, |
32 | string $moduleName, |
33 | HttpRequestFactory $httpRequestFactory, |
34 | WANObjectCache $cache |
35 | ) { |
36 | parent::__construct( $query, $moduleName ); |
37 | $this->httpRequestFactory = $httpRequestFactory; |
38 | $this->cache = $cache; |
39 | } |
40 | |
41 | /** |
42 | * @return array e.g. [ 'aawiki' => [ 'code' => 'aa', 'url' => 'https://aa.wikipedia.org/w/api.php' ] |
43 | */ |
44 | private function fetchWikipediaSites(): array { |
45 | $queryParams = [ |
46 | 'action' => 'sitematrix', |
47 | 'format' => 'json', |
48 | 'formatversion' => 2, |
49 | 'smtype' => 'language', |
50 | 'smlangprop' => 'site|code', |
51 | 'smsiteprop' => 'dbname|url', |
52 | 'origin' => '*' |
53 | ]; |
54 | |
55 | $apiUrl = wfAppendQuery( self::WIKIPEDIA_API_URL, $queryParams ); |
56 | |
57 | $response = $this->httpRequestFactory->get( $apiUrl, [], __METHOD__ ); |
58 | |
59 | $responseBody = json_decode( $response, true ) ?: []; |
60 | |
61 | $siteMatrix = $responseBody['sitematrix']; |
62 | unset( $siteMatrix['count'] ); |
63 | |
64 | $results = []; |
65 | |
66 | foreach ( $siteMatrix as $site ) { |
67 | $code = $site['code']; |
68 | $subSites = $site['site']; |
69 | $wikipediaSiteUrl = null; |
70 | $wikipediaSiteDb = null; |
71 | foreach ( $subSites as $subSite ) { |
72 | if ( strpos( $subSite['url'], self::WIKIPEDIA_URL_FRAGMENT ) !== false ) { |
73 | $wikipediaSiteUrl = $subSite['url']; |
74 | $wikipediaSiteDb = $subSite['dbname']; |
75 | break; |
76 | } |
77 | } |
78 | |
79 | if ( $wikipediaSiteUrl ) { |
80 | $results[ $wikipediaSiteDb ] = [ |
81 | 'code' => $code, |
82 | 'url' => $wikipediaSiteUrl |
83 | ]; |
84 | } |
85 | } |
86 | |
87 | return $results; |
88 | } |
89 | |
90 | private function getWikipediaSites(): array { |
91 | return $this->cache->getWithSetCallback( |
92 | $this->cache->makeGlobalKey( 'ax-wikipedia-sites' ), |
93 | ExpirationAwareness::TTL_DAY, |
94 | fn () => $this->fetchWikipediaSites() |
95 | ); |
96 | } |
97 | |
98 | /** |
99 | * @param string $qid |
100 | * @return array ['site' => string, 'title' => string, 'siteUrl' => string, 'siteCode' => string] |
101 | */ |
102 | private function getArticleSiteLinks( string $qid ): array { |
103 | $queryParams = [ |
104 | 'action' => 'wbgetentities', |
105 | 'format' => 'json', |
106 | 'props' => 'sitelinks', |
107 | 'ids' => $qid, |
108 | ]; |
109 | |
110 | $apiUrl = wfAppendQuery( self::WIKIDATA_API_URL, $queryParams ); |
111 | |
112 | $response = $this->httpRequestFactory->get( $apiUrl, [], __METHOD__ ); |
113 | if ( !$response ) { |
114 | $this->dieWithError( 'apierror-query+automatictranslationdenselanguages-sitelink-request-failure' ); |
115 | } |
116 | |
117 | $responseBody = json_decode( $response, true ) ?: []; |
118 | |
119 | $siteLinks = $responseBody['entities'][$qid]['sitelinks'] ?? []; |
120 | |
121 | $sites = $this->getWikipediaSites(); |
122 | $results = []; |
123 | |
124 | foreach ( $siteLinks as $siteDb => $siteLink ) { |
125 | $currentSite = $sites[$siteDb] ?? null; |
126 | if ( !$currentSite ) { |
127 | continue; |
128 | } |
129 | |
130 | $results[] = [ |
131 | 'site' => $siteDb, |
132 | 'title' => $siteLink['title'], |
133 | 'siteUrl' => $currentSite['url'], |
134 | 'siteCode' => $currentSite['code'] |
135 | ]; |
136 | } |
137 | |
138 | return $results; |
139 | } |
140 | |
141 | /** |
142 | * @param string $qid |
143 | * @return array ['site', 'title', 'siteUrl', 'siteCode', 'sections' => int, 'size' => int ] |
144 | */ |
145 | private function getArticleSizeInformation( string $qid ): array { |
146 | $siteLinks = $this->getArticleSiteLinks( $qid ); |
147 | if ( !$siteLinks ) { |
148 | return []; |
149 | } |
150 | |
151 | $requests = array_map( static function ( $siteLink ) { |
152 | $queryParams = [ |
153 | 'action' => 'parse', |
154 | 'format' => 'json', |
155 | 'formatversion' => 2, |
156 | 'prop' => 'sections', |
157 | 'page' => $siteLink['title'] |
158 | ]; |
159 | $siteUrl = $siteLink['siteUrl']; |
160 | $apiUrl = wfAppendQuery( $siteUrl . '/w/api.php', $queryParams ); |
161 | |
162 | return [ 'method' => 'GET', 'url' => $apiUrl ]; |
163 | }, $siteLinks ); |
164 | |
165 | $multiHttpClient = $this->httpRequestFactory->createMultiClient(); |
166 | $responses = $multiHttpClient->runMulti( $requests ); |
167 | |
168 | $params = $this->extractRequestParams(); |
169 | $sectionTitlesOn = $params['section-titles']; |
170 | |
171 | $sizeInfos = array_map( |
172 | static function ( $response, $siteLink ) use ( $sectionTitlesOn ) { |
173 | $responseBody = json_decode( $response['response']['body'], true ) ?: []; |
174 | if ( !isset( $responseBody['parse'] ) ) { |
175 | return null; |
176 | } |
177 | $sections = $responseBody['parse']['sections']; |
178 | $lastSection = end( $sections ); |
179 | |
180 | $size = 0; |
181 | if ( $lastSection ) { |
182 | $size = $lastSection['byteoffset']; |
183 | } |
184 | |
185 | $sections = array_filter( $sections, static function ( $section ) { |
186 | return $section['toclevel'] === 1; |
187 | } ); |
188 | |
189 | $infos = [ |
190 | 'siteUrl' => $siteLink['siteUrl'], |
191 | 'language' => $siteLink['siteCode'], |
192 | 'title' => $siteLink['title'], |
193 | 'sections' => count( $sections ), |
194 | 'size' => $size, |
195 | ]; |
196 | |
197 | if ( $sectionTitlesOn ) { |
198 | $sectionTitles = array_values( array_map( static function ( $section ) { |
199 | return $section['line']; |
200 | }, $sections ) ); |
201 | |
202 | $infos['sectionTitles'] = $sectionTitles; |
203 | } |
204 | |
205 | return $infos; |
206 | }, |
207 | $responses, |
208 | $siteLinks |
209 | ); |
210 | |
211 | $sizeInfos = array_filter( $sizeInfos ); |
212 | |
213 | usort( $sizeInfos, static function ( $a, $b ) { |
214 | return $b['size'] <=> $a['size']; |
215 | } ); |
216 | |
217 | return $sizeInfos; |
218 | } |
219 | |
220 | public function execute() { |
221 | $this->run(); |
222 | } |
223 | |
224 | public function executeGenerator( $resultPageSet ) { |
225 | $this->run(); |
226 | } |
227 | |
228 | private function run() { |
229 | $params = $this->extractRequestParams(); |
230 | $qid = $params['qid']; |
231 | |
232 | $sizeInformationArray = $this->getArticleSizeInformation( $qid ); |
233 | |
234 | $result = $this->getResult(); |
235 | $result->addValue( [ 'query', $this->getModuleName() ], 'sizeInfo', $sizeInformationArray ); |
236 | } |
237 | |
238 | public function getAllowedParams() { |
239 | return [ |
240 | 'qid' => [ |
241 | ParamValidator::PARAM_TYPE => 'string', |
242 | ParamValidator::PARAM_REQUIRED => true, |
243 | ], |
244 | 'section-titles' => [ |
245 | ParamValidator::PARAM_TYPE => 'boolean', |
246 | ParamValidator::PARAM_DEFAULT => false |
247 | ] |
248 | ]; |
249 | } |
250 | |
251 | protected function getExamplesMessages() { |
252 | return [ |
253 | 'action=query&list=automatictranslationdenselanguages&qid=Q405' => |
254 | 'apihelp-query+automatictranslationdenselanguages-example-1', |
255 | 'action=query&list=automatictranslationdenselanguages&qid=Q405§ion-titles=true' => |
256 | 'apihelp-query+automatictranslationdenselanguages-example-2' |
257 | ]; |
258 | } |
259 | } |