Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 158 |
|
0.00% |
0 / 8 |
CRAP | |
0.00% |
0 / 1 |
| ApiQueryAutomaticTranslationDenseLanguages | |
0.00% |
0 / 158 |
|
0.00% |
0 / 8 |
600 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| fetchWikipediaSites | |
0.00% |
0 / 36 |
|
0.00% |
0 / 1 |
56 | |||
| getWikipediaSites | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
| getArticleSiteLinks | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
30 | |||
| getArticleSizeInformation | |
0.00% |
0 / 64 |
|
0.00% |
0 / 1 |
56 | |||
| execute | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| getAllowedParams | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
2 | |||
| getExamplesMessages | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace ContentTranslation\ActionApi; |
| 5 | |
| 6 | use MediaWiki\Api\ApiQuery; |
| 7 | use MediaWiki\Api\ApiQueryBase; |
| 8 | use MediaWiki\Http\HttpRequestFactory; |
| 9 | use Wikimedia\LightweightObjectStore\ExpirationAwareness; |
| 10 | use Wikimedia\ObjectCache\WANObjectCache; |
| 11 | use Wikimedia\ParamValidator\ParamValidator; |
| 12 | |
| 13 | /** |
| 14 | * Api module for fetching the list of sitelinks for the article that corresponds |
| 15 | * to the Wikidata ID, ordered by article size, with server-side limiting and searching. |
| 16 | * |
| 17 | * @author Nik Gkountas |
| 18 | * @license GPL-2.0-or-later |
| 19 | * @since 2024.06 |
| 20 | */ |
| 21 | class ApiQueryAutomaticTranslationDenseLanguages extends ApiQueryBase { |
| 22 | private const WIKIPEDIA_API_URL = 'https://en.wikipedia.org/w/api.php'; |
| 23 | private const WIKIDATA_API_URL = 'https://www.wikidata.org/w/api.php'; |
| 24 | private const WIKIPEDIA_URL_FRAGMENT = 'wikipedia'; |
| 25 | private const ARTICLE_SITE_LINK_LIMIT = 15; |
| 26 | |
| 27 | public function __construct( |
| 28 | ApiQuery $query, |
| 29 | string $moduleName, |
| 30 | private readonly HttpRequestFactory $httpRequestFactory, |
| 31 | private readonly WANObjectCache $cache |
| 32 | ) { |
| 33 | parent::__construct( $query, $moduleName ); |
| 34 | } |
| 35 | |
| 36 | /** |
| 37 | * Fetch Wikipedia sites from the sitematrix API. |
| 38 | * @return array |
| 39 | * e.g. [ |
| 40 | * 'aawiki' => [ |
| 41 | * 'code' => 'aa', 'url' => 'https://aa.wikipedia.org/w/api.php', 'localname' => 'Afar' |
| 42 | * ] |
| 43 | * ] |
| 44 | */ |
| 45 | private function fetchWikipediaSites(): array { |
| 46 | $queryParams = [ |
| 47 | 'action' => 'sitematrix', |
| 48 | 'format' => 'json', |
| 49 | 'formatversion' => 2, |
| 50 | 'smtype' => 'language', |
| 51 | 'uselang' => $this->getContext()->getLanguage()->getCode(), |
| 52 | 'smlangprop' => 'site|code|localname', |
| 53 | 'smsiteprop' => 'dbname|url', |
| 54 | 'origin' => '*' |
| 55 | ]; |
| 56 | |
| 57 | $apiUrl = wfAppendQuery( self::WIKIPEDIA_API_URL, $queryParams ); |
| 58 | |
| 59 | $response = $this->httpRequestFactory->get( $apiUrl, [], __METHOD__ ); |
| 60 | |
| 61 | $responseBody = json_decode( $response, true ) ?: []; |
| 62 | |
| 63 | $siteMatrix = $responseBody['sitematrix']; |
| 64 | unset( $siteMatrix['count'] ); |
| 65 | |
| 66 | $results = []; |
| 67 | |
| 68 | foreach ( $siteMatrix as $site ) { |
| 69 | $code = $site['code']; |
| 70 | $subSites = $site['site']; |
| 71 | $localname = $site['localname'] ?? null; |
| 72 | if ( $localname === null ) { |
| 73 | // If a site doesn't have a proper localname, skip it |
| 74 | // as it's probably not properly configured or disabled. |
| 75 | continue; |
| 76 | } |
| 77 | $wikipediaSiteUrl = null; |
| 78 | $wikipediaSiteDb = null; |
| 79 | foreach ( $subSites as $subSite ) { |
| 80 | if ( strpos( $subSite['url'], self::WIKIPEDIA_URL_FRAGMENT ) !== false ) { |
| 81 | $wikipediaSiteUrl = $subSite['url']; |
| 82 | $wikipediaSiteDb = $subSite['dbname']; |
| 83 | break; |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | if ( $wikipediaSiteUrl ) { |
| 88 | $results[ $wikipediaSiteDb ] = [ |
| 89 | 'code' => $code, |
| 90 | 'url' => $wikipediaSiteUrl, |
| 91 | 'localname' => $localname |
| 92 | ]; |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | return $results; |
| 97 | } |
| 98 | |
| 99 | private function getWikipediaSites(): array { |
| 100 | return $this->cache->getWithSetCallback( |
| 101 | $this->cache->makeGlobalKey( 'ax-wikipedia-sites' ), |
| 102 | ExpirationAwareness::TTL_DAY, |
| 103 | $this->fetchWikipediaSites( ... ) |
| 104 | ); |
| 105 | } |
| 106 | |
| 107 | /** |
| 108 | * Fetch site links for a given Wikidata ID. |
| 109 | * @param string $qid |
| 110 | * @return array [ |
| 111 | * 'site' => string, |
| 112 | * 'title' => string, |
| 113 | * 'siteUrl' => string, |
| 114 | * 'siteCode' => string, |
| 115 | * 'localname' => string |
| 116 | * ] |
| 117 | */ |
| 118 | private function getArticleSiteLinks( string $qid ): array { |
| 119 | $queryParams = [ |
| 120 | 'action' => 'wbgetentities', |
| 121 | 'format' => 'json', |
| 122 | 'props' => 'sitelinks', |
| 123 | 'ids' => $qid, |
| 124 | ]; |
| 125 | |
| 126 | $apiUrl = wfAppendQuery( self::WIKIDATA_API_URL, $queryParams ); |
| 127 | |
| 128 | $response = $this->httpRequestFactory->get( $apiUrl, [], __METHOD__ ); |
| 129 | if ( !$response ) { |
| 130 | $this->dieWithError( 'apierror-query+automatictranslationdenselanguages-sitelink-request-failure' ); |
| 131 | } |
| 132 | |
| 133 | $responseBody = json_decode( $response, true ) ?: []; |
| 134 | |
| 135 | $siteLinks = $responseBody['entities'][$qid]['sitelinks'] ?? []; |
| 136 | |
| 137 | $sites = $this->getWikipediaSites(); |
| 138 | $results = []; |
| 139 | |
| 140 | foreach ( $siteLinks as $siteDb => $siteLink ) { |
| 141 | $currentSite = $sites[$siteDb] ?? null; |
| 142 | if ( !$currentSite ) { |
| 143 | continue; |
| 144 | } |
| 145 | |
| 146 | $results[] = [ |
| 147 | 'site' => $siteDb, |
| 148 | 'title' => $siteLink['title'], |
| 149 | 'localname' => $currentSite['localname'], |
| 150 | 'siteUrl' => $currentSite['url'], |
| 151 | 'siteCode' => $currentSite['code'] |
| 152 | ]; |
| 153 | } |
| 154 | |
| 155 | return $results; |
| 156 | } |
| 157 | |
| 158 | /** |
| 159 | * Get size information for articles, with server-side filtering and limiting. |
| 160 | * @param string $qid |
| 161 | * @return array ['languages' => array, 'total' => int] |
| 162 | */ |
| 163 | private function getArticleSizeInformation( string $qid ): array { |
| 164 | $params = $this->extractRequestParams(); |
| 165 | $limit = $params['limit'] ?? self::ARTICLE_SITE_LINK_LIMIT; |
| 166 | |
| 167 | $siteLinks = $this->getArticleSiteLinks( $qid ); |
| 168 | if ( !$siteLinks ) { |
| 169 | return [ 'languages' => [], 'total' => 0 ]; |
| 170 | } |
| 171 | |
| 172 | $total = count( $siteLinks ); |
| 173 | |
| 174 | $requests = array_map( static function ( $siteLink ) { |
| 175 | $queryParams = [ |
| 176 | 'action' => 'parse', |
| 177 | 'format' => 'json', |
| 178 | 'formatversion' => 2, |
| 179 | 'prop' => 'sections', |
| 180 | 'page' => $siteLink['title'] |
| 181 | ]; |
| 182 | $siteUrl = $siteLink['siteUrl']; |
| 183 | $apiUrl = wfAppendQuery( $siteUrl . '/w/api.php', $queryParams ); |
| 184 | |
| 185 | return [ 'method' => 'GET', 'url' => $apiUrl ]; |
| 186 | }, $siteLinks ); |
| 187 | |
| 188 | $multiHttpClient = $this->httpRequestFactory->createMultiClient(); |
| 189 | $responses = $multiHttpClient->runMulti( $requests ); |
| 190 | |
| 191 | $params = $this->extractRequestParams(); |
| 192 | $sectionTitlesOn = $params['section-titles']; |
| 193 | |
| 194 | $sizeInfos = array_map( |
| 195 | static function ( $response, $siteLink ) use ( $sectionTitlesOn ) { |
| 196 | $responseBody = json_decode( $response['response']['body'], true ) ?: []; |
| 197 | if ( !isset( $responseBody['parse'] ) ) { |
| 198 | return null; |
| 199 | } |
| 200 | $sections = $responseBody['parse']['sections']; |
| 201 | $lastSection = end( $sections ); |
| 202 | |
| 203 | $size = 0; |
| 204 | if ( $lastSection ) { |
| 205 | $size = $lastSection['byteoffset']; |
| 206 | } |
| 207 | |
| 208 | $sections = array_filter( $sections, static function ( $section ) { |
| 209 | return $section['toclevel'] === 1; |
| 210 | } ); |
| 211 | |
| 212 | $infos = [ |
| 213 | 'siteUrl' => $siteLink['siteUrl'], |
| 214 | 'language' => $siteLink['siteCode'], |
| 215 | 'languageName' => $siteLink['localname'], |
| 216 | 'title' => $siteLink['title'], |
| 217 | 'sections' => count( $sections ), |
| 218 | 'size' => $size, |
| 219 | ]; |
| 220 | |
| 221 | if ( $sectionTitlesOn ) { |
| 222 | $sectionTitles = array_values( array_map( static function ( $section ) { |
| 223 | return $section['line']; |
| 224 | }, $sections ) ); |
| 225 | |
| 226 | $infos['sectionTitles'] = $sectionTitles; |
| 227 | } |
| 228 | |
| 229 | return $infos; |
| 230 | }, |
| 231 | $responses, |
| 232 | $siteLinks |
| 233 | ); |
| 234 | |
| 235 | $sizeInfos = array_filter( $sizeInfos ); |
| 236 | |
| 237 | usort( $sizeInfos, static function ( $a, $b ) { |
| 238 | return $b['size'] <=> $a['size']; |
| 239 | } ); |
| 240 | |
| 241 | $limitedSizeInfos = ( count( $sizeInfos ) >= $limit ) |
| 242 | ? array_slice( $sizeInfos, 0, 10 ) |
| 243 | : $sizeInfos; |
| 244 | |
| 245 | return [ |
| 246 | 'languages' => $limitedSizeInfos, |
| 247 | 'total' => $total |
| 248 | ]; |
| 249 | } |
| 250 | |
| 251 | public function execute() { |
| 252 | $params = $this->extractRequestParams(); |
| 253 | $qid = $params['qid']; |
| 254 | |
| 255 | $resultData = $this->getArticleSizeInformation( $qid ); |
| 256 | $result = $this->getResult(); |
| 257 | $result->addValue( [ 'query', $this->getModuleName() ], 'languages', $resultData['languages'] ); |
| 258 | $result->addValue( [ 'query', $this->getModuleName() ], 'total', $resultData['total'] ); |
| 259 | } |
| 260 | |
| 261 | /** @inheritDoc */ |
| 262 | public function getAllowedParams() { |
| 263 | return [ |
| 264 | 'qid' => [ |
| 265 | ParamValidator::PARAM_TYPE => 'string', |
| 266 | ParamValidator::PARAM_REQUIRED => true, |
| 267 | ], |
| 268 | 'section-titles' => [ |
| 269 | ParamValidator::PARAM_TYPE => 'boolean', |
| 270 | ParamValidator::PARAM_DEFAULT => false |
| 271 | ], |
| 272 | 'limit' => [ |
| 273 | ParamValidator::PARAM_TYPE => 'integer', |
| 274 | ParamValidator::PARAM_DEFAULT => self::ARTICLE_SITE_LINK_LIMIT |
| 275 | ] |
| 276 | ]; |
| 277 | } |
| 278 | |
| 279 | /** @inheritDoc */ |
| 280 | protected function getExamplesMessages() { |
| 281 | return [ |
| 282 | 'action=query&list=automatictranslationdenselanguages&qid=Q405&limit=15' => |
| 283 | 'apihelp-query+automatictranslationdenselanguages-example-1', |
| 284 | 'action=query&list=automatictranslationdenselanguages&qid=Q405§ion-titles=true' => |
| 285 | 'apihelp-query+automatictranslationdenselanguages-example-2' |
| 286 | ]; |
| 287 | } |
| 288 | } |