Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
74.36% |
58 / 78 |
|
50.00% |
3 / 6 |
CRAP | |
0.00% |
0 / 1 |
WikibaseEntityAndLexemeFetcher | |
74.36% |
58 / 78 |
|
50.00% |
3 / 6 |
30.16 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
fetch | |
44.83% |
13 / 29 |
|
0.00% |
0 / 1 |
26.79 | |||
getClaimValueByLang | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
5 | |||
getCachedLanguageEntityCode | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
2.00 | |||
fetchWikibaseItem | |
82.35% |
14 / 17 |
|
0.00% |
0 / 1 |
3.05 | |||
isValidEntityOrLexeme | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\Phonos\Wikibase; |
4 | |
5 | use LanguageCode; |
6 | use MediaWiki\Config\Config; |
7 | use MediaWiki\Extension\Phonos\Exception\PhonosException; |
8 | use MediaWiki\Http\HttpRequestFactory; |
9 | use RepoGroup; |
10 | use stdClass; |
11 | use WANObjectCache; |
12 | |
13 | /** |
14 | * Wikibase item fetcher for Phonos |
15 | * @newable |
16 | */ |
17 | class WikibaseEntityAndLexemeFetcher { |
18 | |
19 | /** @var string */ |
20 | private $wikibaseUrl; |
21 | |
22 | /** @var string */ |
23 | private $wikibasePronunciationAudioProp; |
24 | |
25 | /** @var string */ |
26 | private $wikibaseLangNameProp; |
27 | |
28 | /** @var string */ |
29 | private $wikibaseIETFLangTagProp; |
30 | |
31 | /** @var string */ |
32 | private $wikibaseIPATranscriptionProp; |
33 | |
34 | /** @var HttpRequestFactory */ |
35 | private $requestFactory; |
36 | |
37 | /** @var RepoGroup */ |
38 | private $repoGroup; |
39 | |
40 | /** @var WANObjectCache */ |
41 | private $wanCache; |
42 | |
43 | /** @var string */ |
44 | private $apiProxy; |
45 | |
46 | /** |
47 | * @param HttpRequestFactory $requestFactory |
48 | * @param RepoGroup $repoGroup |
49 | * @param WANObjectCache $wanCache |
50 | * @param Config $config |
51 | */ |
52 | public function __construct( |
53 | HttpRequestFactory $requestFactory, |
54 | RepoGroup $repoGroup, |
55 | WANObjectCache $wanCache, |
56 | Config $config |
57 | ) { |
58 | // Dependencies. |
59 | $this->requestFactory = $requestFactory; |
60 | $this->repoGroup = $repoGroup; |
61 | $this->wanCache = $wanCache; |
62 | |
63 | // General configuration. |
64 | $this->wikibaseUrl = $config->get( 'PhonosWikibaseUrl' ); |
65 | $this->apiProxy = $config->get( 'PhonosApiProxy' ); |
66 | |
67 | // PhonosWikibaseProperties configuration. |
68 | $phonosWikibaseProperties = $config->get( 'PhonosWikibaseProperties' ); |
69 | $this->wikibasePronunciationAudioProp = $phonosWikibaseProperties['wikibasePronunciationAudioProp']; |
70 | $this->wikibaseLangNameProp = $phonosWikibaseProperties['wikibaseLangNameProp']; |
71 | $this->wikibaseIETFLangTagProp = $phonosWikibaseProperties['wikibaseIETFLangTagProp']; |
72 | $this->wikibaseIPATranscriptionProp = $phonosWikibaseProperties['wikibaseIPATranscriptionProp']; |
73 | } |
74 | |
75 | /** |
76 | * @param string $wikibaseEntity |
77 | * @param string $text |
78 | * @param string $lang |
79 | * @return Entity |
80 | * @throws PhonosException |
81 | */ |
82 | public function fetch( |
83 | string $wikibaseEntity, |
84 | string $text, |
85 | string $lang |
86 | ): Entity { |
87 | // Validate Wikibase ID. |
88 | if ( !$this->isValidEntityOrLexeme( $wikibaseEntity ) ) { |
89 | throw new PhonosException( 'phonos-wikibase-invalid-entity-lexeme', |
90 | [ $wikibaseEntity ] ); |
91 | } |
92 | |
93 | // Fetch entity data. |
94 | $item = $this->fetchWikibaseItem( $wikibaseEntity ); |
95 | |
96 | // Entity not found. |
97 | if ( $item === null ) { |
98 | throw new PhonosException( 'phonos-wikibase-not-found', [ $wikibaseEntity ] ); |
99 | } |
100 | |
101 | $entity = new Entity(); |
102 | $audioFiles = []; |
103 | $ipaTranscriptions = []; |
104 | |
105 | if ( $item->type === 'lexeme' ) { |
106 | // If lexeme, we need the $text representation for the audio file |
107 | if ( $text === '' ) { |
108 | return $entity; |
109 | } |
110 | $itemForms = $item->forms; |
111 | foreach ( $itemForms as $form ) { |
112 | $formRepresentations = $form->representations; |
113 | foreach ( $formRepresentations as $representation ) { |
114 | // check if $text value is found in representation |
115 | if ( $representation->value === $text ) { |
116 | $audioFiles = $form->claims->{$this->wikibasePronunciationAudioProp} ?? []; |
117 | $ipaTranscriptions = $form->claims->{$this->wikibaseIPATranscriptionProp} ?? []; |
118 | break 2; |
119 | } |
120 | } |
121 | } |
122 | } else { |
123 | $audioFiles = $item->claims->{$this->wikibasePronunciationAudioProp} ?? []; |
124 | $ipaTranscriptions = $item->claims->{$this->wikibaseIPATranscriptionProp} ?? []; |
125 | } |
126 | |
127 | $entity->setIPATranscription( $this->getClaimValueByLang( $ipaTranscriptions, $lang ) ); |
128 | $claimValue = $this->getClaimValueByLang( $audioFiles, $lang ); |
129 | if ( $claimValue ) { |
130 | $file = $this->repoGroup->findFile( $claimValue ); |
131 | if ( $file ) { |
132 | $entity->setAudioFile( $file ); |
133 | } |
134 | } |
135 | |
136 | return $entity; |
137 | } |
138 | |
139 | /** |
140 | * Look through a set of claims to find the first value in the specified language. |
141 | * @param mixed[] $claims Set of claims. |
142 | * @param string $lang User-provided IETF language code. |
143 | * @return string|null |
144 | */ |
145 | private function getClaimValueByLang( array $claims, string $lang ): ?string { |
146 | $normalizedLang = LanguageCode::bcp47( $lang ); |
147 | foreach ( $claims as $claim ) { |
148 | $qualLangs = $claim->qualifiers->{$this->wikibaseLangNameProp} ?? []; |
149 | foreach ( $qualLangs as $qualLang ) { |
150 | $langId = $qualLang->datavalue->value->id ?? false; |
151 | if ( $langId ) { |
152 | $langCode = $this->getCachedLanguageEntityCode( $langId ); |
153 | if ( $langCode === $normalizedLang ) { |
154 | return $claim->mainsnak->datavalue->value; |
155 | } |
156 | } |
157 | } |
158 | } |
159 | return null; |
160 | } |
161 | |
162 | /** |
163 | * @param string $IETFLangEntity |
164 | * @return string|false |
165 | */ |
166 | private function getCachedLanguageEntityCode( string $IETFLangEntity ) { |
167 | return $this->wanCache->getWithSetCallback( |
168 | $this->wanCache->makeKey( 'IETF-lang', $IETFLangEntity ), |
169 | WANObjectCache::TTL_INDEFINITE, |
170 | function () use ( $IETFLangEntity ) { |
171 | $langEntity = $this->fetchWikibaseItem( $IETFLangEntity ); |
172 | if ( $langEntity ) { |
173 | return $langEntity->claims->{$this->wikibaseIETFLangTagProp}[0]->mainsnak->datavalue->value |
174 | ?? false; |
175 | } |
176 | return false; |
177 | } |
178 | ); |
179 | } |
180 | |
181 | /** |
182 | * @param string $wikibaseEntity |
183 | * @return stdClass|null |
184 | * @throws PhonosException |
185 | */ |
186 | private function fetchWikibaseItem( string $wikibaseEntity ): ?stdClass { |
187 | $url = $this->wikibaseUrl . "Special:EntityData/$wikibaseEntity.json"; |
188 | $options = [ |
189 | 'method' => 'GET' |
190 | ]; |
191 | |
192 | if ( $this->apiProxy ) { |
193 | $options['proxy'] = $this->apiProxy; |
194 | } |
195 | |
196 | $request = $this->requestFactory->create( |
197 | $url, |
198 | $options, |
199 | __METHOD__ |
200 | ); |
201 | $status = $request->execute(); |
202 | |
203 | if ( !$status->isOK() ) { |
204 | $error = $status->getMessage()->text(); |
205 | throw new PhonosException( 'phonos-wikibase-api-error', [ $error ] ); |
206 | } |
207 | $content = json_decode( $request->getContent() ); |
208 | |
209 | return $content->entities->{$wikibaseEntity} ?? null; |
210 | } |
211 | |
212 | /** |
213 | * @param string $wikibaseEntity |
214 | * @return bool |
215 | */ |
216 | private function isValidEntityOrLexeme( string $wikibaseEntity ): bool { |
217 | return preg_match( '/^[QL][0-9]+$/i', $wikibaseEntity ); |
218 | } |
219 | } |