Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 137
0.00% covered (danger)
0.00%
0 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
ApiQueryAutomaticTranslationDenseLanguages
0.00% covered (danger)
0.00%
0 / 137
0.00% covered (danger)
0.00%
0 / 10
506
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 fetchWikipediaSites
0.00% covered (danger)
0.00%
0 / 31
0.00% covered (danger)
0.00%
0 / 1
42
 getWikipediaSites
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
2
 getArticleSiteLinks
0.00% covered (danger)
0.00%
0 / 23
0.00% covered (danger)
0.00%
0 / 1
20
 getArticleSizeInformation
0.00% covered (danger)
0.00%
0 / 52
0.00% covered (danger)
0.00%
0 / 1
30
 execute
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 executeGenerator
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 run
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
2
 getAllowedParams
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
2
 getExamplesMessages
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2declare( strict_types = 1 );
3
4namespace ContentTranslation\ActionApi;
5
6use ApiQueryGeneratorBase;
7use MediaWiki\Http\HttpRequestFactory;
8use WANObjectCache;
9use Wikimedia\LightweightObjectStore\ExpirationAwareness;
10use Wikimedia\ParamValidator\ParamValidator;
11
12/**
13 * Api module for fetching the list of sitelinks for the article that corresponds
14 * to the Wikidata ID that is given as request parameter, ordered by article size.
15 *
16 * @author Nik Gkountas
17 * @license GPL-2.0-or-later
18 * @since 2024.06
19 */
20class ApiQueryAutomaticTranslationDenseLanguages extends ApiQueryGeneratorBase {
21
22    private const WIKIPEDIA_API_URL = 'https://en.wikipedia.org/w/api.php';
23    private const WIKIDATA_API_URL = 'https://www.wikidata.org/w/api.php';
24    private const WIKIPEDIA_URL_FRAGMENT = 'wikipedia';
25
26    private HttpRequestFactory $httpRequestFactory;
27    private WANObjectCache $cache;
28
29    public function __construct(
30        $query,
31        $moduleName,
32        HttpRequestFactory $httpRequestFactory,
33        WANObjectCache $cache
34    ) {
35        parent::__construct( $query, $moduleName );
36        $this->httpRequestFactory = $httpRequestFactory;
37        $this->cache = $cache;
38    }
39
40    /**
41     * @return array e.g. [ 'aawiki' => [ 'code' => 'aa', 'url' => 'https://aa.wikipedia.org/w/api.php' ]
42     */
43    private function fetchWikipediaSites(): array {
44        $queryParams = [
45            'action' => 'sitematrix',
46            'format' => 'json',
47            'formatversion' => 2,
48            'smtype' => 'language',
49            'smlangprop' => 'site|code',
50            'smsiteprop' => 'dbname|url',
51            'origin' => '*'
52        ];
53
54        $apiUrl = wfAppendQuery( self::WIKIPEDIA_API_URL, $queryParams );
55
56        $response = $this->httpRequestFactory->get( $apiUrl );
57
58        $responseBody = json_decode( $response, true ) ?: [];
59
60        $siteMatrix = $responseBody['sitematrix'];
61        unset( $siteMatrix['count'] );
62
63        $results = [];
64
65        foreach ( $siteMatrix as $site ) {
66            $code = $site['code'];
67            $subSites = $site['site'];
68            $wikipediaSiteUrl = null;
69            $wikipediaSiteDb = null;
70            foreach ( $subSites as $subSite ) {
71                if ( strpos( $subSite['url'], self::WIKIPEDIA_URL_FRAGMENT ) !== false ) {
72                    $wikipediaSiteUrl = $subSite['url'];
73                    $wikipediaSiteDb = $subSite['dbname'];
74                    break;
75                }
76            }
77
78            if ( $wikipediaSiteUrl ) {
79                $results[ $wikipediaSiteDb ] = [
80                    'code' => $code,
81                    'url' => $wikipediaSiteUrl
82                ];
83            }
84        }
85
86        return $results;
87    }
88
89    private function getWikipediaSites(): array {
90        return $this->cache->getWithSetCallback(
91            $this->cache->makeGlobalKey( 'ax-wikipedia-sites' ),
92            ExpirationAwareness::TTL_DAY,
93            fn () => $this->fetchWikipediaSites()
94        );
95    }
96
97    /**
98     * @param string $qid
99     * @return array ['site' => string, 'title' => string, 'siteUrl' => string, 'siteCode' => string]
100     */
101    private function getArticleSiteLinks( string $qid ): array {
102        $queryParams = [
103            'action' => 'wbgetentities',
104            'format' => 'json',
105            'props' => 'sitelinks',
106            'ids' => $qid,
107        ];
108
109        $apiUrl = wfAppendQuery( self::WIKIDATA_API_URL, $queryParams );
110
111        $response = $this->httpRequestFactory->get( $apiUrl );
112
113        $responseBody = json_decode( $response, true ) ?: [];
114
115        $siteLinks = $responseBody['entities'][$qid]['sitelinks'];
116
117        $sites = $this->getWikipediaSites();
118        $results = [];
119
120        foreach ( $siteLinks as $siteDb => $siteLink ) {
121            $currentSite = $sites[$siteDb] ?? null;
122            if ( !$currentSite ) {
123                continue;
124            }
125
126            $results[] = [
127                'site' => $siteDb,
128                'title' => $siteLink['title'],
129                'siteUrl' => $currentSite['url'],
130                'siteCode' => $currentSite['code']
131            ];
132        }
133
134        return $results;
135    }
136
137    /**
138     * @param string $qid
139     * @return array ['site', 'title', 'siteUrl', 'siteCode', 'sections' => int, 'size' => int ]
140     */
141    private function getArticleSizeInformation( string $qid ): array {
142        $siteLinks = $this->getArticleSiteLinks( $qid );
143        $requests = array_map( static function ( $siteLink ) {
144            $queryParams = [
145                'action' => 'parse',
146                'format' => 'json',
147                'formatversion' => 2,
148                'prop' => 'sections',
149                'page' => $siteLink['title']
150            ];
151            $siteUrl = $siteLink['siteUrl'];
152            $apiUrl = wfAppendQuery( $siteUrl . '/w/api.php', $queryParams );
153
154            return [ 'method' => 'GET', 'url' => $apiUrl ];
155        }, $siteLinks );
156
157        $multiHttpClient = $this->httpRequestFactory->createMultiClient();
158        $responses = $multiHttpClient->runMulti( $requests );
159
160        $params = $this->extractRequestParams();
161        $sectionTitlesOn = $params['section-titles'];
162
163        $sizeInfos = array_map(
164            static function ( $response, $siteLink ) use ( $sectionTitlesOn ) {
165                $responseBody = json_decode( $response['response']['body'], true ) ?: [];
166                if ( !isset( $responseBody['parse'] ) ) {
167                    return null;
168                }
169                $sections = $responseBody['parse']['sections'];
170                $lastSection = end( $sections );
171
172                $size = 0;
173                if ( $lastSection ) {
174                    $size = $lastSection['byteoffset'];
175                }
176
177                $sections = array_filter( $sections, static function ( $section ) {
178                    return $section['toclevel'] === 1;
179                } );
180
181                $infos = [
182                    'siteUrl' => $siteLink['siteUrl'],
183                    'language' => $siteLink['siteCode'],
184                    'title' => $siteLink['title'],
185                    'sections' => count( $sections ),
186                    'size' => $size,
187                ];
188
189                if ( $sectionTitlesOn ) {
190                    $sectionTitles = array_values( array_map( static function ( $section ) {
191                        return $section['line'];
192                    }, $sections ) );
193
194                    $infos['sectionTitles'] = $sectionTitles;
195                }
196
197                return $infos;
198            },
199            $responses,
200            $siteLinks
201        );
202
203        $sizeInfos = array_filter( $sizeInfos );
204
205        usort( $sizeInfos, static function ( $a, $b ) {
206            return $b['size'] <=> $a['size'];
207        } );
208
209        return $sizeInfos;
210    }
211
212    public function execute() {
213        $this->run();
214    }
215
216    public function executeGenerator( $resultPageSet ) {
217        $this->run();
218    }
219
220    private function run() {
221        $params = $this->extractRequestParams();
222        $qid = $params['qid'];
223
224        $sizeInformationArray = $this->getArticleSizeInformation( $qid );
225
226        $result = $this->getResult();
227        $result->addValue( [ 'query', $this->getModuleName() ], 'sizeInfo', $sizeInformationArray );
228    }
229
230    public function getAllowedParams() {
231        return [
232            'qid' => [
233                ParamValidator::PARAM_TYPE => 'string',
234                ParamValidator::PARAM_REQUIRED => true,
235            ],
236            'section-titles' => [
237                ParamValidator::PARAM_TYPE => 'boolean',
238                ParamValidator::PARAM_DEFAULT => false
239            ]
240        ];
241    }
242
243    protected function getExamplesMessages() {
244        return [
245            'action=query&list=automatictranslationdenselanguages&qid=Q405' =>
246                'apihelp-query+automatictranslationdenselanguages-example-1',
247            'action=query&list=automatictranslationdenselanguages&qid=Q405&section-titles=true' =>
248                'apihelp-query+automatictranslationdenselanguages-example-2'
249        ];
250    }
251}