Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 158
0.00% covered (danger)
0.00%
0 / 8
CRAP
0.00% covered (danger)
0.00%
0 / 1
ApiQueryAutomaticTranslationDenseLanguages
0.00% covered (danger)
0.00%
0 / 158
0.00% covered (danger)
0.00%
0 / 8
600
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 fetchWikipediaSites
0.00% covered (danger)
0.00%
0 / 36
0.00% covered (danger)
0.00%
0 / 1
56
 getWikipediaSites
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
2
 getArticleSiteLinks
0.00% covered (danger)
0.00%
0 / 26
0.00% covered (danger)
0.00%
0 / 1
30
 getArticleSizeInformation
0.00% covered (danger)
0.00%
0 / 64
0.00% covered (danger)
0.00%
0 / 1
56
 execute
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 getAllowedParams
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
2
 getExamplesMessages
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2declare( strict_types = 1 );
3
4namespace ContentTranslation\ActionApi;
5
6use MediaWiki\Api\ApiQuery;
7use MediaWiki\Api\ApiQueryBase;
8use MediaWiki\Http\HttpRequestFactory;
9use Wikimedia\LightweightObjectStore\ExpirationAwareness;
10use Wikimedia\ObjectCache\WANObjectCache;
11use Wikimedia\ParamValidator\ParamValidator;
12
13/**
14 * Api module for fetching the list of sitelinks for the article that corresponds
15 * to the Wikidata ID, ordered by article size, with server-side limiting and searching.
16 *
17 * @author Nik Gkountas
18 * @license GPL-2.0-or-later
19 * @since 2024.06
20 */
21class ApiQueryAutomaticTranslationDenseLanguages extends ApiQueryBase {
22    private const WIKIPEDIA_API_URL = 'https://en.wikipedia.org/w/api.php';
23    private const WIKIDATA_API_URL = 'https://www.wikidata.org/w/api.php';
24    private const WIKIPEDIA_URL_FRAGMENT = 'wikipedia';
25    private const ARTICLE_SITE_LINK_LIMIT = 15;
26
27    public function __construct(
28        ApiQuery $query,
29        string $moduleName,
30        private readonly HttpRequestFactory $httpRequestFactory,
31        private readonly WANObjectCache $cache
32    ) {
33        parent::__construct( $query, $moduleName );
34    }
35
36    /**
37     * Fetch Wikipedia sites from the sitematrix API.
38     * @return array
39     * e.g. [
40     *  'aawiki' => [
41     *    'code' => 'aa', 'url' => 'https://aa.wikipedia.org/w/api.php', 'localname' => 'Afar'
42     *    ]
43     *  ]
44     */
45    private function fetchWikipediaSites(): array {
46        $queryParams = [
47            'action' => 'sitematrix',
48            'format' => 'json',
49            'formatversion' => 2,
50            'smtype' => 'language',
51            'uselang' => $this->getContext()->getLanguage()->getCode(),
52            'smlangprop' => 'site|code|localname',
53            'smsiteprop' => 'dbname|url',
54            'origin' => '*'
55        ];
56
57        $apiUrl = wfAppendQuery( self::WIKIPEDIA_API_URL, $queryParams );
58
59        $response = $this->httpRequestFactory->get( $apiUrl, [], __METHOD__ );
60
61        $responseBody = json_decode( $response, true ) ?: [];
62
63        $siteMatrix = $responseBody['sitematrix'];
64        unset( $siteMatrix['count'] );
65
66        $results = [];
67
68        foreach ( $siteMatrix as $site ) {
69            $code = $site['code'];
70            $subSites = $site['site'];
71            $localname = $site['localname'] ?? null;
72            if ( $localname === null ) {
73                // If a site doesn't have a proper localname, skip it
74                // as it's probably not properly configured or disabled.
75                continue;
76            }
77            $wikipediaSiteUrl = null;
78            $wikipediaSiteDb = null;
79            foreach ( $subSites as $subSite ) {
80                if ( strpos( $subSite['url'], self::WIKIPEDIA_URL_FRAGMENT ) !== false ) {
81                    $wikipediaSiteUrl = $subSite['url'];
82                    $wikipediaSiteDb = $subSite['dbname'];
83                    break;
84                }
85            }
86
87            if ( $wikipediaSiteUrl ) {
88                $results[ $wikipediaSiteDb ] = [
89                    'code' => $code,
90                    'url' => $wikipediaSiteUrl,
91                    'localname' => $localname
92                ];
93            }
94        }
95
96        return $results;
97    }
98
99    private function getWikipediaSites(): array {
100        return $this->cache->getWithSetCallback(
101            $this->cache->makeGlobalKey( 'ax-wikipedia-sites' ),
102            ExpirationAwareness::TTL_DAY,
103            $this->fetchWikipediaSites( ... )
104        );
105    }
106
107    /**
108     * Fetch site links for a given Wikidata ID.
109     * @param string $qid
110     * @return array [
111     *  'site' => string,
112     *  'title' => string,
113     *  'siteUrl' => string,
114     *  'siteCode' => string,
115     *  'localname' => string
116     * ]
117     */
118    private function getArticleSiteLinks( string $qid ): array {
119        $queryParams = [
120            'action' => 'wbgetentities',
121            'format' => 'json',
122            'props' => 'sitelinks',
123            'ids' => $qid,
124        ];
125
126        $apiUrl = wfAppendQuery( self::WIKIDATA_API_URL, $queryParams );
127
128        $response = $this->httpRequestFactory->get( $apiUrl, [], __METHOD__ );
129        if ( !$response ) {
130            $this->dieWithError( 'apierror-query+automatictranslationdenselanguages-sitelink-request-failure' );
131        }
132
133        $responseBody = json_decode( $response, true ) ?: [];
134
135        $siteLinks = $responseBody['entities'][$qid]['sitelinks'] ?? [];
136
137        $sites = $this->getWikipediaSites();
138        $results = [];
139
140        foreach ( $siteLinks as $siteDb => $siteLink ) {
141            $currentSite = $sites[$siteDb] ?? null;
142            if ( !$currentSite ) {
143                continue;
144            }
145
146            $results[] = [
147                'site' => $siteDb,
148                'title' => $siteLink['title'],
149                'localname' => $currentSite['localname'],
150                'siteUrl' => $currentSite['url'],
151                'siteCode' => $currentSite['code']
152            ];
153        }
154
155        return $results;
156    }
157
158    /**
159     * Get size information for articles, with server-side filtering and limiting.
160     * @param string $qid
161     * @return array ['languages' => array, 'total' => int]
162     */
163    private function getArticleSizeInformation( string $qid ): array {
164        $params = $this->extractRequestParams();
165        $limit = $params['limit'] ?? self::ARTICLE_SITE_LINK_LIMIT;
166
167        $siteLinks = $this->getArticleSiteLinks( $qid );
168        if ( !$siteLinks ) {
169            return [ 'languages' => [], 'total' => 0 ];
170        }
171
172        $total = count( $siteLinks );
173
174        $requests = array_map( static function ( $siteLink ) {
175            $queryParams = [
176                'action' => 'parse',
177                'format' => 'json',
178                'formatversion' => 2,
179                'prop' => 'sections',
180                'page' => $siteLink['title']
181            ];
182            $siteUrl = $siteLink['siteUrl'];
183            $apiUrl = wfAppendQuery( $siteUrl . '/w/api.php', $queryParams );
184
185            return [ 'method' => 'GET', 'url' => $apiUrl ];
186        }, $siteLinks );
187
188        $multiHttpClient = $this->httpRequestFactory->createMultiClient();
189        $responses = $multiHttpClient->runMulti( $requests );
190
191        $params = $this->extractRequestParams();
192        $sectionTitlesOn = $params['section-titles'];
193
194        $sizeInfos = array_map(
195            static function ( $response, $siteLink ) use ( $sectionTitlesOn ) {
196                $responseBody = json_decode( $response['response']['body'], true ) ?: [];
197                if ( !isset( $responseBody['parse'] ) ) {
198                    return null;
199                }
200                $sections = $responseBody['parse']['sections'];
201                $lastSection = end( $sections );
202
203                $size = 0;
204                if ( $lastSection ) {
205                    $size = $lastSection['byteoffset'];
206                }
207
208                $sections = array_filter( $sections, static function ( $section ) {
209                    return $section['toclevel'] === 1;
210                } );
211
212                $infos = [
213                    'siteUrl' => $siteLink['siteUrl'],
214                    'language' => $siteLink['siteCode'],
215                    'languageName' => $siteLink['localname'],
216                    'title' => $siteLink['title'],
217                    'sections' => count( $sections ),
218                    'size' => $size,
219                ];
220
221                if ( $sectionTitlesOn ) {
222                    $sectionTitles = array_values( array_map( static function ( $section ) {
223                        return $section['line'];
224                    }, $sections ) );
225
226                    $infos['sectionTitles'] = $sectionTitles;
227                }
228
229                return $infos;
230            },
231            $responses,
232            $siteLinks
233        );
234
235        $sizeInfos = array_filter( $sizeInfos );
236
237        usort( $sizeInfos, static function ( $a, $b ) {
238            return $b['size'] <=> $a['size'];
239        } );
240
241        $limitedSizeInfos = ( count( $sizeInfos ) >= $limit )
242            ? array_slice( $sizeInfos, 0, 10 )
243            : $sizeInfos;
244
245        return [
246            'languages' => $limitedSizeInfos,
247            'total' => $total
248        ];
249    }
250
251    public function execute() {
252        $params = $this->extractRequestParams();
253        $qid = $params['qid'];
254
255        $resultData = $this->getArticleSizeInformation( $qid );
256        $result = $this->getResult();
257        $result->addValue( [ 'query', $this->getModuleName() ], 'languages', $resultData['languages'] );
258        $result->addValue( [ 'query', $this->getModuleName() ], 'total', $resultData['total'] );
259    }
260
261    /** @inheritDoc */
262    public function getAllowedParams() {
263        return [
264            'qid' => [
265                ParamValidator::PARAM_TYPE => 'string',
266                ParamValidator::PARAM_REQUIRED => true,
267            ],
268            'section-titles' => [
269                ParamValidator::PARAM_TYPE => 'boolean',
270                ParamValidator::PARAM_DEFAULT => false
271            ],
272            'limit' => [
273                ParamValidator::PARAM_TYPE => 'integer',
274                ParamValidator::PARAM_DEFAULT => self::ARTICLE_SITE_LINK_LIMIT
275            ]
276        ];
277    }
278
279    /** @inheritDoc */
280    protected function getExamplesMessages() {
281        return [
282            'action=query&list=automatictranslationdenselanguages&qid=Q405&limit=15' =>
283                'apihelp-query+automatictranslationdenselanguages-example-1',
284            'action=query&list=automatictranslationdenselanguages&qid=Q405&section-titles=true' =>
285                'apihelp-query+automatictranslationdenselanguages-example-2'
286        ];
287    }
288}