Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
74.36% covered (warning)
74.36%
58 / 78
50.00% covered (danger)
50.00%
3 / 6
CRAP
0.00% covered (danger)
0.00%
0 / 1
WikibaseEntityAndLexemeFetcher
74.36% covered (warning)
74.36%
58 / 78
50.00% covered (danger)
50.00%
3 / 6
30.16
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
1
 fetch
44.83% covered (danger)
44.83%
13 / 29
0.00% covered (danger)
0.00%
0 / 1
26.79
 getClaimValueByLang
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
5
 getCachedLanguageEntityCode
90.91% covered (success)
90.91%
10 / 11
0.00% covered (danger)
0.00%
0 / 1
2.00
 fetchWikibaseItem
82.35% covered (warning)
82.35%
14 / 17
0.00% covered (danger)
0.00%
0 / 1
3.05
 isValidEntityOrLexeme
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace MediaWiki\Extension\Phonos\Wikibase;
4
5use LanguageCode;
6use MediaWiki\Config\Config;
7use MediaWiki\Extension\Phonos\Exception\PhonosException;
8use MediaWiki\Http\HttpRequestFactory;
9use RepoGroup;
10use stdClass;
11use WANObjectCache;
12
13/**
14 * Wikibase item fetcher for Phonos
15 * @newable
16 */
17class WikibaseEntityAndLexemeFetcher {
18
19    /** @var string */
20    private $wikibaseUrl;
21
22    /** @var string */
23    private $wikibasePronunciationAudioProp;
24
25    /** @var string */
26    private $wikibaseLangNameProp;
27
28    /** @var string */
29    private $wikibaseIETFLangTagProp;
30
31    /** @var string */
32    private $wikibaseIPATranscriptionProp;
33
34    /** @var HttpRequestFactory */
35    private $requestFactory;
36
37    /** @var RepoGroup */
38    private $repoGroup;
39
40    /** @var WANObjectCache */
41    private $wanCache;
42
43    /** @var string */
44    private $apiProxy;
45
46    /**
47     * @param HttpRequestFactory $requestFactory
48     * @param RepoGroup $repoGroup
49     * @param WANObjectCache $wanCache
50     * @param Config $config
51     */
52    public function __construct(
53        HttpRequestFactory $requestFactory,
54        RepoGroup $repoGroup,
55        WANObjectCache $wanCache,
56        Config $config
57    ) {
58        // Dependencies.
59        $this->requestFactory = $requestFactory;
60        $this->repoGroup = $repoGroup;
61        $this->wanCache = $wanCache;
62
63        // General configuration.
64        $this->wikibaseUrl = $config->get( 'PhonosWikibaseUrl' );
65        $this->apiProxy = $config->get( 'PhonosApiProxy' );
66
67        // PhonosWikibaseProperties configuration.
68        $phonosWikibaseProperties = $config->get( 'PhonosWikibaseProperties' );
69        $this->wikibasePronunciationAudioProp = $phonosWikibaseProperties['wikibasePronunciationAudioProp'];
70        $this->wikibaseLangNameProp = $phonosWikibaseProperties['wikibaseLangNameProp'];
71        $this->wikibaseIETFLangTagProp = $phonosWikibaseProperties['wikibaseIETFLangTagProp'];
72        $this->wikibaseIPATranscriptionProp = $phonosWikibaseProperties['wikibaseIPATranscriptionProp'];
73    }
74
75    /**
76     * @param string $wikibaseEntity
77     * @param string $text
78     * @param string $lang
79     * @return Entity
80     * @throws PhonosException
81     */
82    public function fetch(
83        string $wikibaseEntity,
84        string $text,
85        string $lang
86    ): Entity {
87        // Validate Wikibase ID.
88        if ( !$this->isValidEntityOrLexeme( $wikibaseEntity ) ) {
89            throw new PhonosException( 'phonos-wikibase-invalid-entity-lexeme',
90                [ $wikibaseEntity ] );
91        }
92
93        // Fetch entity data.
94        $item = $this->fetchWikibaseItem( $wikibaseEntity );
95
96        // Entity not found.
97        if ( $item === null ) {
98            throw new PhonosException( 'phonos-wikibase-not-found', [ $wikibaseEntity ] );
99        }
100
101        $entity = new Entity();
102        $audioFiles = [];
103        $ipaTranscriptions = [];
104
105        if ( $item->type === 'lexeme' ) {
106            // If lexeme, we need the $text representation for the audio file
107            if ( $text === '' ) {
108                return $entity;
109            }
110            $itemForms = $item->forms;
111            foreach ( $itemForms as $form ) {
112                $formRepresentations = $form->representations;
113                foreach ( $formRepresentations as $representation ) {
114                    // check if $text value is found in representation
115                    if ( $representation->value === $text ) {
116                        $audioFiles = $form->claims->{$this->wikibasePronunciationAudioProp} ?? [];
117                        $ipaTranscriptions = $form->claims->{$this->wikibaseIPATranscriptionProp} ?? [];
118                        break 2;
119                    }
120                }
121            }
122        } else {
123            $audioFiles = $item->claims->{$this->wikibasePronunciationAudioProp} ?? [];
124            $ipaTranscriptions = $item->claims->{$this->wikibaseIPATranscriptionProp} ?? [];
125        }
126
127        $entity->setIPATranscription( $this->getClaimValueByLang( $ipaTranscriptions, $lang ) );
128        $claimValue = $this->getClaimValueByLang( $audioFiles, $lang );
129        if ( $claimValue ) {
130            $file = $this->repoGroup->findFile( $claimValue );
131            if ( $file ) {
132                $entity->setAudioFile( $file );
133            }
134        }
135
136        return $entity;
137    }
138
139    /**
140     * Look through a set of claims to find the first value in the specified language.
141     * @param mixed[] $claims Set of claims.
142     * @param string $lang User-provided IETF language code.
143     * @return string|null
144     */
145    private function getClaimValueByLang( array $claims, string $lang ): ?string {
146        $normalizedLang = LanguageCode::bcp47( $lang );
147        foreach ( $claims as $claim ) {
148            $qualLangs = $claim->qualifiers->{$this->wikibaseLangNameProp} ?? [];
149            foreach ( $qualLangs as $qualLang ) {
150                $langId = $qualLang->datavalue->value->id ?? false;
151                if ( $langId ) {
152                    $langCode = $this->getCachedLanguageEntityCode( $langId );
153                    if ( $langCode === $normalizedLang ) {
154                        return $claim->mainsnak->datavalue->value;
155                    }
156                }
157            }
158        }
159        return null;
160    }
161
162    /**
163     * @param string $IETFLangEntity
164     * @return string|false
165     */
166    private function getCachedLanguageEntityCode( string $IETFLangEntity ) {
167        return $this->wanCache->getWithSetCallback(
168            $this->wanCache->makeKey( 'IETF-lang', $IETFLangEntity ),
169            WANObjectCache::TTL_INDEFINITE,
170            function () use ( $IETFLangEntity ) {
171                $langEntity = $this->fetchWikibaseItem( $IETFLangEntity );
172                if ( $langEntity ) {
173                    return $langEntity->claims->{$this->wikibaseIETFLangTagProp}[0]->mainsnak->datavalue->value
174                        ?? false;
175                }
176                return false;
177            }
178        );
179    }
180
181    /**
182     * @param string $wikibaseEntity
183     * @return stdClass|null
184     * @throws PhonosException
185     */
186    private function fetchWikibaseItem( string $wikibaseEntity ): ?stdClass {
187        $url = $this->wikibaseUrl . "Special:EntityData/$wikibaseEntity.json";
188        $options = [
189            'method' => 'GET'
190        ];
191
192        if ( $this->apiProxy ) {
193            $options['proxy'] = $this->apiProxy;
194        }
195
196        $request = $this->requestFactory->create(
197            $url,
198            $options,
199            __METHOD__
200        );
201        $status = $request->execute();
202
203        if ( !$status->isOK() ) {
204            $error = $status->getMessage()->text();
205            throw new PhonosException( 'phonos-wikibase-api-error', [ $error ] );
206        }
207        $content = json_decode( $request->getContent() );
208
209        return $content->entities->{$wikibaseEntity} ?? null;
210    }
211
212    /**
213     * @param string $wikibaseEntity
214     * @return bool
215     */
216    private function isValidEntityOrLexeme( string $wikibaseEntity ): bool {
217        return preg_match( '/^[QL][0-9]+$/i', $wikibaseEntity );
218    }
219}