Press n or j to go to the next uncovered block, b, p or k for the previous block.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 | 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 6x 6x 1x 5x 5x 5x 5x 1x 4x 4x 4x 4x 4x 4x 4x 4x 4x 24x 4x 4x 4x 4x 22x 4x 4x 54x 54x 98x 118x 118x 100x 54x 118x 108x 108x 108x 100x 100x 54x 46x 2x 44x 44x 26x 26x 6x 6x 12x 12x 108x 100x 100x 100x 100x 100x 100x 100x 100x 100x 18x 25x 25x 1x 24x 24x 24x 24x 24x 24x 24x 24x 24x 24x 34x 24x 24x 24x 24x 27x 27x 1x 26x 26x 26x 26x 26x 26x 26x 26x 54x 54x 64x 64x 54x 66x 66x 66x 4x 4x 4x 4x 261x 261x 261x 261x 1x | 'use strict'; const { convertArrayToKnownTypedList, wrapInZ6, wrapInZ9, isLexemeId, isLexemeFormId, isLexemeSenseId, isObject } = require( '../function-schemata/javascript/src/utils.js' ); const { getLanguageMap } = require( './builtins.js' ); const languageMap = getLanguageMap(); // ZIDs for the Wikidata-based types and their keys const STATEMENT_TYPE = 'Z6003'; const STATEMENT_SUBJECT = 'Z6003K1'; const STATEMENT_PREDICATE = 'Z6003K2'; const STATEMENT_VALUE = 'Z6003K3'; const STATEMENT_RANK = 'Z6003K4'; const LEXEME_FORM_TYPE = 'Z6004'; const LEXEME_FORM_IDENTITY = 'Z6004K1'; const LEXEME_FORM_LEXEME = 'Z6004K2'; const LEXEME_FORM_REPRESENTATIONS = 'Z6004K3'; const LEXEME_FORM_GRAMMATICAL_FEATURES = 'Z6004K4'; const LEXEME_FORM_CLAIMS = 'Z6004K5'; const LEXEME_TYPE = 'Z6005'; const LEXEME_IDENTITY = 'Z6005K1'; const LEXEME_LEMMAS = 'Z6005K2'; const LEXEME_LANGUAGE = 'Z6005K3'; const LEXEME_LEXICAL_CATEGORY = 'Z6005K4'; const LEXEME_CLAIMS = 'Z6005K5'; const LEXEME_SENSES = 'Z6005K6'; const LEXEME_FORMS = 'Z6005K7'; const LEXEME_SENSE_TYPE = 'Z6006'; const LEXEME_SENSE_IDENTITY = 'Z6006K1'; const LEXEME_SENSE_GLOSSES = 'Z6006K2'; const LEXEME_SENSE_CLAIMS = 'Z6006K3'; // ZIDs for the instances of Z6040 / Wikidata statement rank const STATEMENT_RANK_PREFERRED = 'Z6041'; const STATEMENT_RANK_NORMAL = 'Z6042'; const STATEMENT_RANK_DEPRECATED = 'Z6043'; // ZIDs for the Wikidata reference types const ITEM_REF = 'Z6091'; const PROPERTY_REF = 'Z6092'; const LEXEME_FORM_REF = 'Z6094'; const LEXEME_REF = 'Z6095'; const LEXEME_SENSE_REF = 'Z6096'; // TODO( T370072, T341628 ) Use these to support a more comprehensive retrieval & instantiation: // const ITEM_TYPE = 'Z6001'; // const PROPERTY_TYPE = 'Z6002'; /** * Given the JSON returned from Wikidata for a lexeme, construct the corresponding ZObject * of type Wikidata lexeme. * * The thrown errors, indicating an invalid input, are defensive programming; * not supposed to happen. A Lexeme should always have an ID, language, lexical * category, and at least one lemma. (But there can be Lexemes without forms and/or * senses.) * * See also: * https://www.mediawiki.org/wiki/Wikibase/DataModel * https://www.wikidata.org/wiki/Wikidata:Lexicographical_data/Documentation * https://www.wikidata.org/wiki/Wikidata:Data_access * https://www.mediawiki.org/wiki/Extension:WikibaseLexeme/Data_Model * https://doc.wikimedia.org/WikibaseLexeme/master/php/md_docs_2topics_2json.html * https://phabricator.wikimedia.org/T368654 * * @param {Object} wikidataLexeme The JSON returned from Wikidata for a lexeme * @param {Function} logThis Logging function taking 2 arguments, level and message * @return {Object} A ZObject of type Wikidata lexeme, in normal form * @throws {Error} If wikidataLexeme is invalid (shouldn't happen) */ function convertLexeme( wikidataLexeme, logThis = null ) { const wikifunctionsLexeme = { Z1K1: wrapInZ9( LEXEME_TYPE ) }; if ( !wikidataLexeme.id ) { throw new Error( 'Lexeme from Wikidata has no id' ); } Iif ( !isLexemeId( wikidataLexeme.id ) ) { throw new Error( `Lexeme from Wikidata has an invalid id: <${ wikidataLexeme.id }>` ); } const wikifunctionsLexemeRef = makeWDRef( LEXEME_REF, wikidataLexeme.id ); wikifunctionsLexeme[ LEXEME_IDENTITY ] = wikifunctionsLexemeRef; if ( !wikidataLexeme.lemmas || Object.keys( wikidataLexeme.lemmas ).length === 0 ) { throw new Error( `Lexeme <${ wikidataLexeme.id }> has no lemmas` ); } wikifunctionsLexeme[ LEXEME_LEMMAS ] = convertToMultilingualText( wikidataLexeme.lemmas, logThis ); // We ignore the language element & take the language of the first lemma. wikifunctionsLexeme[ LEXEME_LANGUAGE ] = getLexemeLanguage( wikidataLexeme ); Iif ( !wikidataLexeme.lexicalCategory ) { throw new Error( `Lexeme <${ wikidataLexeme.id }> has no lexicalCategory` ); } wikifunctionsLexeme[ LEXEME_LEXICAL_CATEGORY ] = makeWDRef( ITEM_REF, wikidataLexeme.lexicalCategory ); // Not sure whether there is guaranteed to be a claims property; we provide // an empty object if needed, to avoid having a missing LEXEME_CLAIMS. const wikidataClaims = wikidataLexeme.claims || {}; wikifunctionsLexeme[ LEXEME_CLAIMS ] = convertClaims( wikifunctionsLexemeRef, wikidataClaims ); const wikifunctionsLexemeSenses = []; Eif ( wikidataLexeme.senses ) { for ( const wikidataLexemeSense of wikidataLexeme.senses ) { wikifunctionsLexemeSenses.push( convertLexemeSense( wikidataLexemeSense ) ); } } wikifunctionsLexeme[ LEXEME_SENSES ] = convertArrayToKnownTypedList( wikifunctionsLexemeSenses, wrapInZ9( LEXEME_SENSE_TYPE ) ); const wikifunctionsLexemeForms = []; Eif ( wikidataLexeme.forms ) { for ( const wikidataLexemeForm of wikidataLexeme.forms ) { wikifunctionsLexemeForms.push( convertLexemeForm( wikidataLexemeForm ) ); } } wikifunctionsLexeme[ LEXEME_FORMS ] = convertArrayToKnownTypedList( wikifunctionsLexemeForms, wrapInZ9( LEXEME_FORM_TYPE ) ); return wikifunctionsLexeme; } /** * Given the JSON returned from Wikidata for the value of a "claims" property, extract statements * we currently handle and transform them into a list of Z6003/'Wikidata statement'. * * The Wikidata JSON (wikidataClaims) is organized by property ID; e.g.: * { "P123": [ list of statement objects involving P123 ] * "P456": [ list of statement objects involving P456 ], * etc. } * The statement objects do not mention the subject ID (which comes from the object containing the * claims), so we pass in wikifunctionsSubjectRef from above. * * Each lexeme, lexeme form, lexeme sense, and possibly other entity types, can contain a "claims" * property, which can be an empty object {}. * * @param {Object} wikifunctionsSubjectRef Instance of reference type, for the subject of the * statements * @param {Object} wikidataClaims JSON returned from Wikidata for the value of a "claims" property * @return {Object} A ZList of Z6003/'Wikidata statement' */ function convertClaims( wikifunctionsSubjectRef, wikidataClaims ) { const wikifunctionsStatements = []; for ( const key of Object.keys( wikidataClaims ) ) { for ( const wikidataStatement of wikidataClaims[ key ] ) { const wikifunctionsStatement = convertStatement( wikifunctionsSubjectRef, wikidataStatement ); if ( wikifunctionsStatement ) { wikifunctionsStatements.push( wikifunctionsStatement ); } } } return convertArrayToKnownTypedList( wikifunctionsStatements, wrapInZ9( STATEMENT_TYPE ) ); } function convertStatement( wikifunctionsSubjectRef, wikidataStatement ) { // For now, we ignore statements with qualifiers. We also ignore statements without a rank (we // don't expect any, but if we encounter one we consider it to be a defective statement.) if ( wikidataStatement.type === 'statement' && wikidataStatement.mainsnak && wikidataStatement.rank && !wikidataStatement.qualifiers ) { const snak = wikidataStatement.mainsnak; // See if there's a value we want to keep let statementValue = null; if ( snak.snaktype === 'value' && snak.datavalue && isObject( snak.datavalue ) ) { const datavalue = snak.datavalue; if ( datavalue.type === 'string' ) { statementValue = wrapInZ6( snak.datavalue.value ); } else if ( datavalue.type === 'monolingualtext' && isObject( datavalue.value ) ) { statementValue = convertToMonolingualText( datavalue.value.language, datavalue.value.text ); } else Eif ( datavalue.type === 'wikibase-entityid' && isObject( datavalue.value ) ) { switch ( datavalue.value[ 'entity-type' ] ) { case 'item': statementValue = makeWDRef( ITEM_REF, datavalue.value.id ); break; case 'form': statementValue = makeWDRef( LEXEME_FORM_REF, datavalue.value.id ); break; case 'lexeme': statementValue = makeWDRef( LEXEME_REF, datavalue.value.id ); break; case 'sense': statementValue = makeWDRef( LEXEME_SENSE_REF, datavalue.value.id ); break; } } } // If there's a value, construct & return the statement if ( statementValue ) { const wikifunctionsStatement = { Z1K1: wrapInZ9( STATEMENT_TYPE ) }; wikifunctionsStatement[ STATEMENT_SUBJECT ] = wikifunctionsSubjectRef; wikifunctionsStatement[ STATEMENT_PREDICATE ] = makeWDRef( PROPERTY_REF, snak.property ); wikifunctionsStatement[ STATEMENT_VALUE ] = statementValue; let wikifunctionsRank; switch ( wikidataStatement.rank ) { case 'preferred': wikifunctionsRank = STATEMENT_RANK_PREFERRED; break; case 'normal': wikifunctionsRank = STATEMENT_RANK_NORMAL; break; case 'deprecated': wikifunctionsRank = STATEMENT_RANK_DEPRECATED; break; } wikifunctionsStatement[ STATEMENT_RANK ] = wrapInZ9( wikifunctionsRank ); return wikifunctionsStatement; } } return null; } /** * Given the JSON returned from Wikidata for a lexeme form, construct the corresponding * ZObject of type Wikidata lexeme form. * * @param {Object} wikidataLexemeForm The JSON returned from Wikidata for a lexeme form * @param {Function} logThis Logging function taking 2 arguments, level and message * @return {Object} A ZObject of type Wikidata lexeme form, in normal form */ function convertLexemeForm( wikidataLexemeForm, logThis = null ) { const wikifunctionsLexemeForm = { Z1K1: wrapInZ9( LEXEME_FORM_TYPE ) }; if ( !wikidataLexemeForm.id ) { throw new Error( 'Lexeme form from Wikidata has no id' ); } Iif ( !isLexemeFormId( wikidataLexemeForm.id ) ) { throw new Error( `Lexeme form from Wikidata has an invalid id: <${ wikidataLexemeForm.id }>` ); } const wikifunctionsLexemeFormRef = makeWDRef( LEXEME_FORM_REF, wikidataLexemeForm.id ); wikifunctionsLexemeForm[ LEXEME_FORM_IDENTITY ] = wikifunctionsLexemeFormRef; const lexemeId = wikidataLexemeForm.id.match( /^(L[1-9]\d*)-(F[1-9]\d*)$/ )[ 1 ]; wikifunctionsLexemeForm[ LEXEME_FORM_LEXEME ] = makeWDRef( LEXEME_REF, lexemeId ); // Not sure whether there is guaranteed to be at least one representation; we provide // an empty object if needed, to avoid having a missing K3. const wikidataRepresentations = wikidataLexemeForm.representations || {}; wikifunctionsLexemeForm[ LEXEME_FORM_REPRESENTATIONS ] = convertToMultilingualText( wikidataRepresentations, logThis ); // Each grammatical feature is a QID const wikifunctionsFeatures = []; Eif ( wikidataLexemeForm.grammaticalFeatures ) { for ( const feature of wikidataLexemeForm.grammaticalFeatures ) { wikifunctionsFeatures.push( makeWDRef( ITEM_REF, feature ) ); } } wikifunctionsLexemeForm[ LEXEME_FORM_GRAMMATICAL_FEATURES ] = convertArrayToKnownTypedList( wikifunctionsFeatures, wrapInZ9( ITEM_REF ) ); // Not sure whether there is guaranteed to be a claims property; we provide // an empty object if needed, to avoid having a missing LEXEME_FORM_CLAIMS. const wikidataClaims = wikidataLexemeForm.claims || {}; wikifunctionsLexemeForm[ LEXEME_FORM_CLAIMS ] = convertClaims( wikifunctionsLexemeFormRef, wikidataClaims ); return wikifunctionsLexemeForm; } /** * Given the JSON returned from Wikidata for a lexeme sense, construct the corresponding * ZObject of type Wikidata lexeme sense. * * @param {Object} wikidataLexemeSense The JSON returned from Wikidata for a lexeme sense * @param {Function} logThis Logging function taking 2 arguments, level and message * @return {Object} A ZObject of type Wikidata lexeme sense, in normal form */ function convertLexemeSense( wikidataLexemeSense, logThis = null ) { const wikifunctionsLexemeSense = { Z1K1: wrapInZ9( LEXEME_SENSE_TYPE ) }; if ( !wikidataLexemeSense.id ) { throw new Error( 'Lexeme sense from Wikidata has no id' ); } Iif ( !isLexemeSenseId( wikidataLexemeSense.id ) ) { throw new Error( `Lexeme sense from Wikidata has an invalid id: <${ wikidataLexemeSense.id }>` ); } const wikifunctionsLexemeSenseRef = makeWDRef( LEXEME_SENSE_REF, wikidataLexemeSense.id ); wikifunctionsLexemeSense[ LEXEME_SENSE_IDENTITY ] = wikifunctionsLexemeSenseRef; // Not sure whether there is guaranteed to be at least one gloss; we provide // an empty object if needed, to avoid having a missing K2. const wikidataGlosses = wikidataLexemeSense.glosses || {}; wikifunctionsLexemeSense[ LEXEME_SENSE_GLOSSES ] = convertToMultilingualText( wikidataGlosses, logThis ); // Not sure whether there is guaranteed to be a claims property; we provide // an empty object if needed, to avoid having a missing LEXEME_SENSE_CLAIMS. const wikidataClaims = wikidataLexemeSense.claims || {}; wikifunctionsLexemeSense[ LEXEME_SENSE_CLAIMS ] = convertClaims( wikifunctionsLexemeSenseRef, wikidataClaims ); return wikifunctionsLexemeSense; } /** * Given the JSON returned from Wikidata for a group of lemmas, representations, or glosses, * construct the corresponding ZObject of type Z12 / Multilingual text. Each such group * is a JavaScript Object, and each top-level value is an Object (wikidataPair) containing * a "language" key and a "value" key - and the values of those are strings. The top-level keys * duplicate the language keys. * * @param {Object} wikidataObject The JSON returned from Wikidata for a lexeme sense * @param {Function} logThis Logging function taking 2 arguments, level and message * @return {Object} An instance of Z12 / Multilingual text, in normal form */ function convertToMultilingualText( wikidataObject, logThis = null ) { const monolingualTexts = []; for ( const key of Object.keys( wikidataObject ) ) { const wikidataPair = wikidataObject[ key ]; monolingualTexts.push( convertToMonolingualText( wikidataPair.language, wikidataPair.value, logThis ) ); } return { Z1K1: wrapInZ9( 'Z12' ), // Multilingual text Z12K1: convertArrayToKnownTypedList( monolingualTexts, wrapInZ9( 'Z11' ) ) }; } function convertToMonolingualText( languageCode, text, logThis = null ) { let wikifunctionsLanguage; Eif ( languageCode in languageMap ) { wikifunctionsLanguage = wrapInZ9( languageMap[ languageCode ] ); } else { wikifunctionsLanguage = makeZ60( languageCode ); if ( logThis ) { const logMessage = `No persistent Z60 for <${ languageCode }>; making an inline Z60`; logThis( 'info', logMessage ); } } return { Z1K1: wrapInZ9( 'Z11' ), // Monolingual text Z11K1: wikifunctionsLanguage, Z11K2: wrapInZ6( text ) }; } /** * Given the JSON returned from Wikidata for a lexeme, determine the most appropriate * Wikifunctions ZObject (of type Z60) representing the language of the lexeme. * * wikidataLexeme.language contains a QID. Because there's no perfect mapping from * wikidataLexeme.language to our Z60 instances, and it would be some effort to maintain * such a mapping anyway, we currently use a heuristic that should work for the vast * majority of lexemes. * * If we fail to determine a known ZID (which should be very unusual), we return a * non-persistent Z60. * * See also: https://www.wikidata.org/wiki/Wikidata:Lexicographical_data/Documentation/Lexeme_languages * TODO( T373598 ): consider whether any other possible approaches might be better. * * @param {Object} wikidataLexeme The JSON returned from Wikidata for a lexeme * @param {Function} logThis Logging function taking 2 arguments, level and message * @return {Object} Z9/Reference or an instance of Z60 / Natural language */ function getLexemeLanguage( wikidataLexeme, logThis = null ) { const wikidatalemmas = wikidataLexeme.lemmas; // Get the IETF code from the first Lemma const code = Object.keys( wikidatalemmas )[ 0 ]; Eif ( code in languageMap ) { return wrapInZ9( languageMap[ code ] ); } if ( logThis ) { const logMessage = `No persistent Z60 for <${ code }> (lexeme <${ wikidataLexeme.id }>); making an inline Z60`; logThis( 'info', logMessage ); } return makeZ60( code ); } function makeZ60( code ) { return { Z1K1: wrapInZ9( 'Z60' ), // Natural language Z60K1: wrapInZ6( code ) }; } /** * @param {string} refTypeZID The ZID for a Wikidata reference type (Z669x) * @param {string} wikidataID A Wikidata ID (e.g. L12345, L12345-F1, etc.) * @return {Object} An instance of the given reference type, containing the given ID */ function makeWDRef( refTypeZID, wikidataID ) { const wikidataIDKey = refTypeZID + 'K1'; const ref = { Z1K1: wrapInZ9( refTypeZID ) }; ref[ wikidataIDKey ] = wrapInZ6( wikidataID ); return ref; } module.exports = { convertLexeme, convertLexemeForm, convertLexemeSense }; |