Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 148
0.00% covered (danger)
0.00%
0 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
ApiQueryAutomaticTranslationDenseLanguages
0.00% covered (danger)
0.00%
0 / 148
0.00% covered (danger)
0.00%
0 / 10
650
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 fetchWikipediaSites
0.00% covered (danger)
0.00%
0 / 36
0.00% covered (danger)
0.00%
0 / 1
56
 getWikipediaSites
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
2
 getArticleSiteLinks
0.00% covered (danger)
0.00%
0 / 26
0.00% covered (danger)
0.00%
0 / 1
30
 getArticleSizeInformation
0.00% covered (danger)
0.00%
0 / 55
0.00% covered (danger)
0.00%
0 / 1
42
 execute
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 executeGenerator
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 run
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
2
 getAllowedParams
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
2
 getExamplesMessages
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2declare( strict_types = 1 );
3
4namespace ContentTranslation\ActionApi;
5
6use MediaWiki\Api\ApiQuery;
7use MediaWiki\Api\ApiQueryGeneratorBase;
8use MediaWiki\Http\HttpRequestFactory;
9use Wikimedia\LightweightObjectStore\ExpirationAwareness;
10use Wikimedia\ObjectCache\WANObjectCache;
11use Wikimedia\ParamValidator\ParamValidator;
12
13/**
14 * Api module for fetching the list of sitelinks for the article that corresponds
15 * to the Wikidata ID that is given as request parameter, ordered by article size.
16 *
17 * @author Nik Gkountas
18 * @license GPL-2.0-or-later
19 * @since 2024.06
20 */
21class ApiQueryAutomaticTranslationDenseLanguages extends ApiQueryGeneratorBase {
22
23    private const WIKIPEDIA_API_URL = 'https://en.wikipedia.org/w/api.php';
24    private const WIKIDATA_API_URL = 'https://www.wikidata.org/w/api.php';
25    private const WIKIPEDIA_URL_FRAGMENT = 'wikipedia';
26
27    private HttpRequestFactory $httpRequestFactory;
28    private WANObjectCache $cache;
29
30    public function __construct(
31        ApiQuery $query,
32        string $moduleName,
33        HttpRequestFactory $httpRequestFactory,
34        WANObjectCache $cache
35    ) {
36        parent::__construct( $query, $moduleName );
37        $this->httpRequestFactory = $httpRequestFactory;
38        $this->cache = $cache;
39    }
40
41    /**
42     * @return array e.g. [ 'aawiki' => [ 'code' => 'aa', 'url' => 'https://aa.wikipedia.org/w/api.php' ]
43     */
44    private function fetchWikipediaSites(): array {
45        $queryParams = [
46            'action' => 'sitematrix',
47            'format' => 'json',
48            'formatversion' => 2,
49            'smtype' => 'language',
50            'uselang' => $this->getContext()->getLanguage()->getCode(),
51            'smlangprop' => 'site|code|localname',
52            'smsiteprop' => 'dbname|url',
53            'origin' => '*'
54        ];
55
56        $apiUrl = wfAppendQuery( self::WIKIPEDIA_API_URL, $queryParams );
57
58        $response = $this->httpRequestFactory->get( $apiUrl, [], __METHOD__ );
59
60        $responseBody = json_decode( $response, true ) ?: [];
61
62        $siteMatrix = $responseBody['sitematrix'];
63        unset( $siteMatrix['count'] );
64
65        $results = [];
66
67        foreach ( $siteMatrix as $site ) {
68            $code = $site['code'];
69            $subSites = $site['site'];
70            $localname = $site['localname'] ?? null;
71            if ( $localname === null ) {
72                // If a site doesn't have a proper localname, skip it
73                // as it's probably not properly configured or disabled.
74                continue;
75            }
76            $wikipediaSiteUrl = null;
77            $wikipediaSiteDb = null;
78            foreach ( $subSites as $subSite ) {
79                if ( strpos( $subSite['url'], self::WIKIPEDIA_URL_FRAGMENT ) !== false ) {
80                    $wikipediaSiteUrl = $subSite['url'];
81                    $wikipediaSiteDb = $subSite['dbname'];
82                    break;
83                }
84            }
85
86            if ( $wikipediaSiteUrl ) {
87                $results[ $wikipediaSiteDb ] = [
88                    'code' => $code,
89                    'url' => $wikipediaSiteUrl,
90                    'localname' => $localname
91                ];
92            }
93        }
94
95        return $results;
96    }
97
98    private function getWikipediaSites(): array {
99        return $this->cache->getWithSetCallback(
100            $this->cache->makeGlobalKey( 'ax-wikipedia-sites' ),
101            ExpirationAwareness::TTL_DAY,
102            fn () => $this->fetchWikipediaSites()
103        );
104    }
105
106    /**
107     * @param string $qid
108     * @return array ['site' => string, 'title' => string, 'siteUrl' => string, 'siteCode' => string]
109     */
110    private function getArticleSiteLinks( string $qid ): array {
111        $queryParams = [
112            'action' => 'wbgetentities',
113            'format' => 'json',
114            'props' => 'sitelinks',
115            'ids' => $qid,
116        ];
117
118        $apiUrl = wfAppendQuery( self::WIKIDATA_API_URL, $queryParams );
119
120        $response = $this->httpRequestFactory->get( $apiUrl, [], __METHOD__ );
121        if ( !$response ) {
122            $this->dieWithError( 'apierror-query+automatictranslationdenselanguages-sitelink-request-failure' );
123        }
124
125        $responseBody = json_decode( $response, true ) ?: [];
126
127        $siteLinks = $responseBody['entities'][$qid]['sitelinks'] ?? [];
128
129        $sites = $this->getWikipediaSites();
130        $results = [];
131
132        foreach ( $siteLinks as $siteDb => $siteLink ) {
133            $currentSite = $sites[$siteDb] ?? null;
134            if ( !$currentSite ) {
135                continue;
136            }
137
138            $results[] = [
139                'site' => $siteDb,
140                'title' => $siteLink['title'],
141                'localname' => $currentSite['localname'],
142                'siteUrl' => $currentSite['url'],
143                'siteCode' => $currentSite['code']
144            ];
145        }
146
147        return $results;
148    }
149
150    /**
151     * @param string $qid
152     * @return array ['site', 'title', 'siteUrl', 'siteCode', 'sections' => int, 'size' => int ]
153     */
154    private function getArticleSizeInformation( string $qid ): array {
155        $siteLinks = $this->getArticleSiteLinks( $qid );
156        if ( !$siteLinks ) {
157            return [];
158        }
159
160        $requests = array_map( static function ( $siteLink ) {
161            $queryParams = [
162                'action' => 'parse',
163                'format' => 'json',
164                'formatversion' => 2,
165                'prop' => 'sections',
166                'page' => $siteLink['title']
167            ];
168            $siteUrl = $siteLink['siteUrl'];
169            $apiUrl = wfAppendQuery( $siteUrl . '/w/api.php', $queryParams );
170
171            return [ 'method' => 'GET', 'url' => $apiUrl ];
172        }, $siteLinks );
173
174        $multiHttpClient = $this->httpRequestFactory->createMultiClient();
175        $responses = $multiHttpClient->runMulti( $requests );
176
177        $params = $this->extractRequestParams();
178        $sectionTitlesOn = $params['section-titles'];
179
180        $sizeInfos = array_map(
181            static function ( $response, $siteLink ) use ( $sectionTitlesOn ) {
182                $responseBody = json_decode( $response['response']['body'], true ) ?: [];
183                if ( !isset( $responseBody['parse'] ) ) {
184                    return null;
185                }
186                $sections = $responseBody['parse']['sections'];
187                $lastSection = end( $sections );
188
189                $size = 0;
190                if ( $lastSection ) {
191                    $size = $lastSection['byteoffset'];
192                }
193
194                $sections = array_filter( $sections, static function ( $section ) {
195                    return $section['toclevel'] === 1;
196                } );
197
198                $infos = [
199                    'siteUrl' => $siteLink['siteUrl'],
200                    'language' => $siteLink['siteCode'],
201                    'languageName' => $siteLink['localname'],
202                    'title' => $siteLink['title'],
203                    'sections' => count( $sections ),
204                    'size' => $size,
205                ];
206
207                if ( $sectionTitlesOn ) {
208                    $sectionTitles = array_values( array_map( static function ( $section ) {
209                        return $section['line'];
210                    }, $sections ) );
211
212                    $infos['sectionTitles'] = $sectionTitles;
213                }
214
215                return $infos;
216            },
217            $responses,
218            $siteLinks
219        );
220
221        $sizeInfos = array_filter( $sizeInfos );
222
223        usort( $sizeInfos, static function ( $a, $b ) {
224            return $b['size'] <=> $a['size'];
225        } );
226
227        return $sizeInfos;
228    }
229
230    public function execute() {
231        $this->run();
232    }
233
234    /** @inheritDoc */
235    public function executeGenerator( $resultPageSet ) {
236        $this->run();
237    }
238
239    private function run() {
240        $params = $this->extractRequestParams();
241        $qid = $params['qid'];
242
243        $sizeInformationArray = $this->getArticleSizeInformation( $qid );
244
245        $result = $this->getResult();
246        $result->addValue( [ 'query', $this->getModuleName() ], 'sizeInfo', $sizeInformationArray );
247    }
248
249    /** @inheritDoc */
250    public function getAllowedParams() {
251        return [
252            'qid' => [
253                ParamValidator::PARAM_TYPE => 'string',
254                ParamValidator::PARAM_REQUIRED => true,
255            ],
256            'section-titles' => [
257                ParamValidator::PARAM_TYPE => 'boolean',
258                ParamValidator::PARAM_DEFAULT => false
259            ]
260        ];
261    }
262
263    /** @inheritDoc */
264    protected function getExamplesMessages() {
265        return [
266            'action=query&list=automatictranslationdenselanguages&qid=Q405' =>
267                'apihelp-query+automatictranslationdenselanguages-example-1',
268            'action=query&list=automatictranslationdenselanguages&qid=Q405&section-titles=true' =>
269                'apihelp-query+automatictranslationdenselanguages-example-2'
270        ];
271    }
272}