Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 137 |
|
0.00% |
0 / 10 |
CRAP | |
0.00% |
0 / 1 |
ApiQueryAutomaticTranslationDenseLanguages | |
0.00% |
0 / 137 |
|
0.00% |
0 / 10 |
506 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
fetchWikipediaSites | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
42 | |||
getWikipediaSites | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
getArticleSiteLinks | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
20 | |||
getArticleSizeInformation | |
0.00% |
0 / 52 |
|
0.00% |
0 / 1 |
30 | |||
execute | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
executeGenerator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
run | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
getAllowedParams | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
getExamplesMessages | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace ContentTranslation\ActionApi; |
5 | |
6 | use ApiQueryGeneratorBase; |
7 | use MediaWiki\Http\HttpRequestFactory; |
8 | use WANObjectCache; |
9 | use Wikimedia\LightweightObjectStore\ExpirationAwareness; |
10 | use Wikimedia\ParamValidator\ParamValidator; |
11 | |
12 | /** |
13 | * Api module for fetching the list of sitelinks for the article that corresponds |
14 | * to the Wikidata ID that is given as request parameter, ordered by article size. |
15 | * |
16 | * @author Nik Gkountas |
17 | * @license GPL-2.0-or-later |
18 | * @since 2024.06 |
19 | */ |
20 | class ApiQueryAutomaticTranslationDenseLanguages extends ApiQueryGeneratorBase { |
21 | |
22 | private const WIKIPEDIA_API_URL = 'https://en.wikipedia.org/w/api.php'; |
23 | private const WIKIDATA_API_URL = 'https://www.wikidata.org/w/api.php'; |
24 | private const WIKIPEDIA_URL_FRAGMENT = 'wikipedia'; |
25 | |
26 | private HttpRequestFactory $httpRequestFactory; |
27 | private WANObjectCache $cache; |
28 | |
29 | public function __construct( |
30 | $query, |
31 | $moduleName, |
32 | HttpRequestFactory $httpRequestFactory, |
33 | WANObjectCache $cache |
34 | ) { |
35 | parent::__construct( $query, $moduleName ); |
36 | $this->httpRequestFactory = $httpRequestFactory; |
37 | $this->cache = $cache; |
38 | } |
39 | |
40 | /** |
41 | * @return array e.g. [ 'aawiki' => [ 'code' => 'aa', 'url' => 'https://aa.wikipedia.org/w/api.php' ] |
42 | */ |
43 | private function fetchWikipediaSites(): array { |
44 | $queryParams = [ |
45 | 'action' => 'sitematrix', |
46 | 'format' => 'json', |
47 | 'formatversion' => 2, |
48 | 'smtype' => 'language', |
49 | 'smlangprop' => 'site|code', |
50 | 'smsiteprop' => 'dbname|url', |
51 | 'origin' => '*' |
52 | ]; |
53 | |
54 | $apiUrl = wfAppendQuery( self::WIKIPEDIA_API_URL, $queryParams ); |
55 | |
56 | $response = $this->httpRequestFactory->get( $apiUrl ); |
57 | |
58 | $responseBody = json_decode( $response, true ) ?: []; |
59 | |
60 | $siteMatrix = $responseBody['sitematrix']; |
61 | unset( $siteMatrix['count'] ); |
62 | |
63 | $results = []; |
64 | |
65 | foreach ( $siteMatrix as $site ) { |
66 | $code = $site['code']; |
67 | $subSites = $site['site']; |
68 | $wikipediaSiteUrl = null; |
69 | $wikipediaSiteDb = null; |
70 | foreach ( $subSites as $subSite ) { |
71 | if ( strpos( $subSite['url'], self::WIKIPEDIA_URL_FRAGMENT ) !== false ) { |
72 | $wikipediaSiteUrl = $subSite['url']; |
73 | $wikipediaSiteDb = $subSite['dbname']; |
74 | break; |
75 | } |
76 | } |
77 | |
78 | if ( $wikipediaSiteUrl ) { |
79 | $results[ $wikipediaSiteDb ] = [ |
80 | 'code' => $code, |
81 | 'url' => $wikipediaSiteUrl |
82 | ]; |
83 | } |
84 | } |
85 | |
86 | return $results; |
87 | } |
88 | |
89 | private function getWikipediaSites(): array { |
90 | return $this->cache->getWithSetCallback( |
91 | $this->cache->makeGlobalKey( 'ax-wikipedia-sites' ), |
92 | ExpirationAwareness::TTL_DAY, |
93 | fn () => $this->fetchWikipediaSites() |
94 | ); |
95 | } |
96 | |
97 | /** |
98 | * @param string $qid |
99 | * @return array ['site' => string, 'title' => string, 'siteUrl' => string, 'siteCode' => string] |
100 | */ |
101 | private function getArticleSiteLinks( string $qid ): array { |
102 | $queryParams = [ |
103 | 'action' => 'wbgetentities', |
104 | 'format' => 'json', |
105 | 'props' => 'sitelinks', |
106 | 'ids' => $qid, |
107 | ]; |
108 | |
109 | $apiUrl = wfAppendQuery( self::WIKIDATA_API_URL, $queryParams ); |
110 | |
111 | $response = $this->httpRequestFactory->get( $apiUrl ); |
112 | |
113 | $responseBody = json_decode( $response, true ) ?: []; |
114 | |
115 | $siteLinks = $responseBody['entities'][$qid]['sitelinks']; |
116 | |
117 | $sites = $this->getWikipediaSites(); |
118 | $results = []; |
119 | |
120 | foreach ( $siteLinks as $siteDb => $siteLink ) { |
121 | $currentSite = $sites[$siteDb] ?? null; |
122 | if ( !$currentSite ) { |
123 | continue; |
124 | } |
125 | |
126 | $results[] = [ |
127 | 'site' => $siteDb, |
128 | 'title' => $siteLink['title'], |
129 | 'siteUrl' => $currentSite['url'], |
130 | 'siteCode' => $currentSite['code'] |
131 | ]; |
132 | } |
133 | |
134 | return $results; |
135 | } |
136 | |
137 | /** |
138 | * @param string $qid |
139 | * @return array ['site', 'title', 'siteUrl', 'siteCode', 'sections' => int, 'size' => int ] |
140 | */ |
141 | private function getArticleSizeInformation( string $qid ): array { |
142 | $siteLinks = $this->getArticleSiteLinks( $qid ); |
143 | $requests = array_map( static function ( $siteLink ) { |
144 | $queryParams = [ |
145 | 'action' => 'parse', |
146 | 'format' => 'json', |
147 | 'formatversion' => 2, |
148 | 'prop' => 'sections', |
149 | 'page' => $siteLink['title'] |
150 | ]; |
151 | $siteUrl = $siteLink['siteUrl']; |
152 | $apiUrl = wfAppendQuery( $siteUrl . '/w/api.php', $queryParams ); |
153 | |
154 | return [ 'method' => 'GET', 'url' => $apiUrl ]; |
155 | }, $siteLinks ); |
156 | |
157 | $multiHttpClient = $this->httpRequestFactory->createMultiClient(); |
158 | $responses = $multiHttpClient->runMulti( $requests ); |
159 | |
160 | $params = $this->extractRequestParams(); |
161 | $sectionTitlesOn = $params['section-titles']; |
162 | |
163 | $sizeInfos = array_map( |
164 | static function ( $response, $siteLink ) use ( $sectionTitlesOn ) { |
165 | $responseBody = json_decode( $response['response']['body'], true ) ?: []; |
166 | if ( !isset( $responseBody['parse'] ) ) { |
167 | return null; |
168 | } |
169 | $sections = $responseBody['parse']['sections']; |
170 | $lastSection = end( $sections ); |
171 | |
172 | $size = 0; |
173 | if ( $lastSection ) { |
174 | $size = $lastSection['byteoffset']; |
175 | } |
176 | |
177 | $sections = array_filter( $sections, static function ( $section ) { |
178 | return $section['toclevel'] === 1; |
179 | } ); |
180 | |
181 | $infos = [ |
182 | 'siteUrl' => $siteLink['siteUrl'], |
183 | 'language' => $siteLink['siteCode'], |
184 | 'title' => $siteLink['title'], |
185 | 'sections' => count( $sections ), |
186 | 'size' => $size, |
187 | ]; |
188 | |
189 | if ( $sectionTitlesOn ) { |
190 | $sectionTitles = array_values( array_map( static function ( $section ) { |
191 | return $section['line']; |
192 | }, $sections ) ); |
193 | |
194 | $infos['sectionTitles'] = $sectionTitles; |
195 | } |
196 | |
197 | return $infos; |
198 | }, |
199 | $responses, |
200 | $siteLinks |
201 | ); |
202 | |
203 | $sizeInfos = array_filter( $sizeInfos ); |
204 | |
205 | usort( $sizeInfos, static function ( $a, $b ) { |
206 | return $b['size'] <=> $a['size']; |
207 | } ); |
208 | |
209 | return $sizeInfos; |
210 | } |
211 | |
212 | public function execute() { |
213 | $this->run(); |
214 | } |
215 | |
216 | public function executeGenerator( $resultPageSet ) { |
217 | $this->run(); |
218 | } |
219 | |
220 | private function run() { |
221 | $params = $this->extractRequestParams(); |
222 | $qid = $params['qid']; |
223 | |
224 | $sizeInformationArray = $this->getArticleSizeInformation( $qid ); |
225 | |
226 | $result = $this->getResult(); |
227 | $result->addValue( [ 'query', $this->getModuleName() ], 'sizeInfo', $sizeInformationArray ); |
228 | } |
229 | |
230 | public function getAllowedParams() { |
231 | return [ |
232 | 'qid' => [ |
233 | ParamValidator::PARAM_TYPE => 'string', |
234 | ParamValidator::PARAM_REQUIRED => true, |
235 | ], |
236 | 'section-titles' => [ |
237 | ParamValidator::PARAM_TYPE => 'boolean', |
238 | ParamValidator::PARAM_DEFAULT => false |
239 | ] |
240 | ]; |
241 | } |
242 | |
243 | protected function getExamplesMessages() { |
244 | return [ |
245 | 'action=query&list=automatictranslationdenselanguages&qid=Q405' => |
246 | 'apihelp-query+automatictranslationdenselanguages-example-1', |
247 | 'action=query&list=automatictranslationdenselanguages&qid=Q405§ion-titles=true' => |
248 | 'apihelp-query+automatictranslationdenselanguages-example-2' |
249 | ]; |
250 | } |
251 | } |