Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 162
0.00% covered (danger)
0.00%
0 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
ApiQueryAutomaticTranslationDenseLanguages
0.00% covered (danger)
0.00%
0 / 162
0.00% covered (danger)
0.00%
0 / 10
702
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 fetchWikipediaSites
0.00% covered (danger)
0.00%
0 / 36
0.00% covered (danger)
0.00%
0 / 1
56
 getWikipediaSites
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
2
 getArticleSiteLinks
0.00% covered (danger)
0.00%
0 / 26
0.00% covered (danger)
0.00%
0 / 1
30
 getArticleSizeInformation
0.00% covered (danger)
0.00%
0 / 64
0.00% covered (danger)
0.00%
0 / 1
56
 execute
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 executeGenerator
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 run
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 getAllowedParams
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
2
 getExamplesMessages
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2declare( strict_types = 1 );
3
4namespace ContentTranslation\ActionApi;
5
6use MediaWiki\Api\ApiQuery;
7use MediaWiki\Api\ApiQueryGeneratorBase;
8use MediaWiki\Http\HttpRequestFactory;
9use Wikimedia\LightweightObjectStore\ExpirationAwareness;
10use Wikimedia\ObjectCache\WANObjectCache;
11use Wikimedia\ParamValidator\ParamValidator;
12
13/**
14 * Api module for fetching the list of sitelinks for the article that corresponds
15 * to the Wikidata ID, ordered by article size, with server-side limiting and searching.
16 *
17 * @author Nik Gkountas
18 * @license GPL-2.0-or-later
19 * @since 2024.06
20 */
21class ApiQueryAutomaticTranslationDenseLanguages extends ApiQueryGeneratorBase {
22
23    private const WIKIPEDIA_API_URL = 'https://en.wikipedia.org/w/api.php';
24    private const WIKIDATA_API_URL = 'https://www.wikidata.org/w/api.php';
25    private const WIKIPEDIA_URL_FRAGMENT = 'wikipedia';
26    private const ARTICLE_SITE_LINK_LIMIT = 15;
27
28    private HttpRequestFactory $httpRequestFactory;
29    private WANObjectCache $cache;
30
31    public function __construct(
32        ApiQuery $query,
33        string $moduleName,
34        HttpRequestFactory $httpRequestFactory,
35        WANObjectCache $cache
36    ) {
37        parent::__construct( $query, $moduleName );
38        $this->httpRequestFactory = $httpRequestFactory;
39        $this->cache = $cache;
40    }
41
42    /**
43     * Fetch Wikipedia sites from the sitematrix API.
44     * @return array
45     * e.g. [
46     *  'aawiki' => [
47     *    'code' => 'aa', 'url' => 'https://aa.wikipedia.org/w/api.php', 'localname' => 'Afar'
48     *    ]
49     *  ]
50     */
51    private function fetchWikipediaSites(): array {
52        $queryParams = [
53            'action' => 'sitematrix',
54            'format' => 'json',
55            'formatversion' => 2,
56            'smtype' => 'language',
57            'uselang' => $this->getContext()->getLanguage()->getCode(),
58            'smlangprop' => 'site|code|localname',
59            'smsiteprop' => 'dbname|url',
60            'origin' => '*'
61        ];
62
63        $apiUrl = wfAppendQuery( self::WIKIPEDIA_API_URL, $queryParams );
64
65        $response = $this->httpRequestFactory->get( $apiUrl, [], __METHOD__ );
66
67        $responseBody = json_decode( $response, true ) ?: [];
68
69        $siteMatrix = $responseBody['sitematrix'];
70        unset( $siteMatrix['count'] );
71
72        $results = [];
73
74        foreach ( $siteMatrix as $site ) {
75            $code = $site['code'];
76            $subSites = $site['site'];
77            $localname = $site['localname'] ?? null;
78            if ( $localname === null ) {
79                // If a site doesn't have a proper localname, skip it
80                // as it's probably not properly configured or disabled.
81                continue;
82            }
83            $wikipediaSiteUrl = null;
84            $wikipediaSiteDb = null;
85            foreach ( $subSites as $subSite ) {
86                if ( strpos( $subSite['url'], self::WIKIPEDIA_URL_FRAGMENT ) !== false ) {
87                    $wikipediaSiteUrl = $subSite['url'];
88                    $wikipediaSiteDb = $subSite['dbname'];
89                    break;
90                }
91            }
92
93            if ( $wikipediaSiteUrl ) {
94                $results[ $wikipediaSiteDb ] = [
95                    'code' => $code,
96                    'url' => $wikipediaSiteUrl,
97                    'localname' => $localname
98                ];
99            }
100        }
101
102        return $results;
103    }
104
105    private function getWikipediaSites(): array {
106        return $this->cache->getWithSetCallback(
107            $this->cache->makeGlobalKey( 'ax-wikipedia-sites' ),
108            ExpirationAwareness::TTL_DAY,
109            fn () => $this->fetchWikipediaSites()
110        );
111    }
112
113    /**
114     * Fetch site links for a given Wikidata ID.
115     * @param string $qid
116     * @return array [
117     *  'site' => string,
118     *  'title' => string,
119     *  'siteUrl' => string,
120     *  'siteCode' => string,
121     *  'localname' => string
122     * ]
123     */
124    private function getArticleSiteLinks( string $qid ): array {
125        $queryParams = [
126            'action' => 'wbgetentities',
127            'format' => 'json',
128            'props' => 'sitelinks',
129            'ids' => $qid,
130        ];
131
132        $apiUrl = wfAppendQuery( self::WIKIDATA_API_URL, $queryParams );
133
134        $response = $this->httpRequestFactory->get( $apiUrl, [], __METHOD__ );
135        if ( !$response ) {
136            $this->dieWithError( 'apierror-query+automatictranslationdenselanguages-sitelink-request-failure' );
137        }
138
139        $responseBody = json_decode( $response, true ) ?: [];
140
141        $siteLinks = $responseBody['entities'][$qid]['sitelinks'] ?? [];
142
143        $sites = $this->getWikipediaSites();
144        $results = [];
145
146        foreach ( $siteLinks as $siteDb => $siteLink ) {
147            $currentSite = $sites[$siteDb] ?? null;
148            if ( !$currentSite ) {
149                continue;
150            }
151
152            $results[] = [
153                'site' => $siteDb,
154                'title' => $siteLink['title'],
155                'localname' => $currentSite['localname'],
156                'siteUrl' => $currentSite['url'],
157                'siteCode' => $currentSite['code']
158            ];
159        }
160
161        return $results;
162    }
163
164    /**
165     * Get size information for articles, with server-side filtering and limiting.
166     * @param string $qid
167     * @return array ['languages' => array, 'total' => int]
168     */
169    private function getArticleSizeInformation( string $qid ): array {
170        $params = $this->extractRequestParams();
171        $limit = $params['limit'] ?? self::ARTICLE_SITE_LINK_LIMIT;
172
173        $siteLinks = $this->getArticleSiteLinks( $qid );
174        if ( !$siteLinks ) {
175            return [ 'languages' => [], 'total' => 0 ];
176        }
177
178        $total = count( $siteLinks );
179
180        $requests = array_map( static function ( $siteLink ) {
181            $queryParams = [
182                'action' => 'parse',
183                'format' => 'json',
184                'formatversion' => 2,
185                'prop' => 'sections',
186                'page' => $siteLink['title']
187            ];
188            $siteUrl = $siteLink['siteUrl'];
189            $apiUrl = wfAppendQuery( $siteUrl . '/w/api.php', $queryParams );
190
191            return [ 'method' => 'GET', 'url' => $apiUrl ];
192        }, $siteLinks );
193
194        $multiHttpClient = $this->httpRequestFactory->createMultiClient();
195        $responses = $multiHttpClient->runMulti( $requests );
196
197        $params = $this->extractRequestParams();
198        $sectionTitlesOn = $params['section-titles'];
199
200        $sizeInfos = array_map(
201            static function ( $response, $siteLink ) use ( $sectionTitlesOn ) {
202                $responseBody = json_decode( $response['response']['body'], true ) ?: [];
203                if ( !isset( $responseBody['parse'] ) ) {
204                    return null;
205                }
206                $sections = $responseBody['parse']['sections'];
207                $lastSection = end( $sections );
208
209                $size = 0;
210                if ( $lastSection ) {
211                    $size = $lastSection['byteoffset'];
212                }
213
214                $sections = array_filter( $sections, static function ( $section ) {
215                    return $section['toclevel'] === 1;
216                } );
217
218                $infos = [
219                    'siteUrl' => $siteLink['siteUrl'],
220                    'language' => $siteLink['siteCode'],
221                    'languageName' => $siteLink['localname'],
222                    'title' => $siteLink['title'],
223                    'sections' => count( $sections ),
224                    'size' => $size,
225                ];
226
227                if ( $sectionTitlesOn ) {
228                    $sectionTitles = array_values( array_map( static function ( $section ) {
229                        return $section['line'];
230                    }, $sections ) );
231
232                    $infos['sectionTitles'] = $sectionTitles;
233                }
234
235                return $infos;
236            },
237            $responses,
238            $siteLinks
239        );
240
241        $sizeInfos = array_filter( $sizeInfos );
242
243        usort( $sizeInfos, static function ( $a, $b ) {
244            return $b['size'] <=> $a['size'];
245        } );
246
247        $limitedSizeInfos = ( count( $sizeInfos ) >= $limit )
248            ? array_slice( $sizeInfos, 0, 10 )
249            : $sizeInfos;
250
251        return [
252            'languages' => $limitedSizeInfos,
253            'total' => $total
254        ];
255    }
256
257    public function execute() {
258        $this->run();
259    }
260
261    /** @inheritDoc */
262    public function executeGenerator( $resultPageSet ) {
263        $this->run();
264    }
265
266    private function run() {
267        $params = $this->extractRequestParams();
268        $qid = $params['qid'];
269
270        $resultData = $this->getArticleSizeInformation( $qid );
271        $result = $this->getResult();
272        $result->addValue( [ 'query', $this->getModuleName() ], 'languages', $resultData['languages'] );
273        $result->addValue( [ 'query', $this->getModuleName() ], 'total', $resultData['total'] );
274    }
275
276    /** @inheritDoc */
277    public function getAllowedParams() {
278        return [
279            'qid' => [
280                ParamValidator::PARAM_TYPE => 'string',
281                ParamValidator::PARAM_REQUIRED => true,
282            ],
283            'section-titles' => [
284                ParamValidator::PARAM_TYPE => 'boolean',
285                ParamValidator::PARAM_DEFAULT => false
286            ],
287            'limit' => [
288                ParamValidator::PARAM_TYPE => 'integer',
289                ParamValidator::PARAM_DEFAULT => self::ARTICLE_SITE_LINK_LIMIT
290            ]
291        ];
292    }
293
294    /** @inheritDoc */
295    protected function getExamplesMessages() {
296        return [
297            'action=query&list=automatictranslationdenselanguages&qid=Q405&limit=15' =>
298                'apihelp-query+automatictranslationdenselanguages-example-1',
299            'action=query&list=automatictranslationdenselanguages&qid=Q405&section-titles=true' =>
300                'apihelp-query+automatictranslationdenselanguages-example-2'
301        ];
302    }
303}