Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
95.73% |
157 / 164 |
|
72.22% |
13 / 18 |
CRAP | |
0.00% |
0 / 1 |
JsonLdRdfWriter | |
95.73% |
157 / 164 |
|
72.22% |
13 / 18 |
64 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
1 | |||
encode | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
compactify | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
7.04 | |||
toIRI | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
getCurrentTerm | |
80.00% |
12 / 15 |
|
0.00% |
0 / 1 |
7.39 | |||
beginJson | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
finishJson | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
5 | |||
finishDocument | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
5 | |||
writeSubject | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
finishSubject | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
writePredicate | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
writeResource | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
writeText | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
writeValue | |
100.00% |
24 / 24 |
|
100.00% |
1 / 1 |
8 | |||
addTypedValue | |
94.74% |
18 / 19 |
|
0.00% |
0 / 1 |
10.01 | |||
finishPredicate | |
94.44% |
17 / 18 |
|
0.00% |
0 / 1 |
7.01 | |||
newSubWriter | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getMimeType | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace Wikimedia\Purtle; |
4 | |
5 | use LogicException; |
6 | |
7 | /** |
8 | * RdfWriter implementation for generating JSON-LD output. |
9 | * |
10 | * @license GPL-2.0-or-later |
11 | * @author C. Scott Ananian |
12 | */ |
13 | class JsonLdRdfWriter extends RdfWriterBase { |
14 | |
15 | /** |
16 | * The JSON-LD "@context", which maps terms to IRIs. This is shared with all sub-writers, and a |
17 | * single context is emitted when the writer is finalized. |
18 | * |
19 | * @see https://www.w3.org/TR/json-ld/#the-context |
20 | * |
21 | * @var array[] |
22 | */ |
23 | protected $context = []; |
24 | |
25 | /** |
26 | * A set of predicates which rely on the default typing rules for |
27 | * JSON-LD; that is, values for the predicate have been emitted which |
28 | * would be broken if an explicit "@type" was added to the context |
29 | * for the predicate. |
30 | * |
31 | * @var bool[] |
32 | */ |
33 | protected $defaulted = []; |
34 | |
35 | /** |
36 | * The JSON-LD "@graph", which lists all the nodes described by this JSON-LD object. |
37 | * We apply an optimization eliminating the "@graph" entry if it consists |
38 | * of a single node; in that case, we will set $this->graph to null in |
39 | * #finishJson() to ensure that the deferred callback in #finishDocument() |
40 | * doesn't later emit "@graph". |
41 | * |
42 | * @see https://www.w3.org/TR/json-ld/#named-graphs |
43 | * |
44 | * @var array|null |
45 | */ |
46 | private $graph = []; |
47 | |
48 | /** |
49 | * A collection of predicates about a specific subject. The |
50 | * subject is identified by the "@id" key in this array; the other |
51 | * keys identify JSON-LD properties. |
52 | * |
53 | * @see https://www.w3.org/TR/json-ld/#dfn-edge |
54 | * |
55 | * @var array |
56 | */ |
57 | private $predicates = []; |
58 | |
59 | /** |
60 | * A sequence of zero or more IRIs, nodes, or values, which are the |
61 | * destination targets of the current predicates. |
62 | * |
63 | * @see https://www.w3.org/TR/json-ld/#dfn-list |
64 | * |
65 | * @var array |
66 | */ |
67 | private $values = []; |
68 | |
69 | /** |
70 | * True iff we have written the opening of the "@graph" field. |
71 | * |
72 | * @var bool |
73 | */ |
74 | private $wroteGraph = false; |
75 | |
76 | /** |
77 | * JSON-LD objects describing a single node can omit the "@graph" field; |
78 | * this variable remains false only so long as we can guarantee that |
79 | * only a single node has been described. |
80 | * |
81 | * @var bool |
82 | */ |
83 | private $disableGraphOpt = false; |
84 | |
85 | /** |
86 | * The IRI for the RDF `type` property. |
87 | */ |
88 | private const RDF_TYPE_IRI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'; |
89 | |
90 | /** |
91 | * The type internally used for "default type", which is a string or |
92 | * otherwise default-coerced type. |
93 | */ |
94 | private const DEFAULT_TYPE = '@purtle@default@'; |
95 | |
96 | /** |
97 | * @param string $role |
98 | * @param BNodeLabeler|null $labeler |
99 | */ |
100 | public function __construct( $role = parent::DOCUMENT_ROLE, ?BNodeLabeler $labeler = null ) { |
101 | parent::__construct( $role, $labeler ); |
102 | |
103 | // The following named methods are protected, not private, so we |
104 | // can invoke them directly w/o function wrappers. |
105 | $this->transitionTable[self::STATE_START][self::STATE_DOCUMENT] = |
106 | [ $this, 'beginJson' ]; |
107 | $this->transitionTable[self::STATE_DOCUMENT][self::STATE_FINISH] = |
108 | [ $this, 'finishJson' ]; |
109 | $this->transitionTable[self::STATE_OBJECT][self::STATE_PREDICATE] = |
110 | [ $this, 'finishPredicate' ]; |
111 | $this->transitionTable[self::STATE_OBJECT][self::STATE_SUBJECT] = |
112 | [ $this, 'finishSubject' ]; |
113 | $this->transitionTable[self::STATE_OBJECT][self::STATE_DOCUMENT] = |
114 | [ $this, 'finishDocument' ]; |
115 | } |
116 | |
117 | /** |
118 | * Emit $val as JSON, with $indent extra indentations on each line. |
119 | * @param array $val |
120 | * @param int $indent |
121 | * @return string the JSON string for $val |
122 | */ |
123 | public function encode( $val, $indent ) { |
124 | $str = json_encode( $val, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES ); |
125 | // Strip outermost open/close braces/brackets |
126 | $str = preg_replace( '/^[[{]\n?|\n?[}\]]$/', '', $str ); |
127 | |
128 | if ( $indent > 0 ) { |
129 | // add extra indentation |
130 | $str = preg_replace( '/^/m', str_repeat( ' ', $indent ), $str ); |
131 | } |
132 | |
133 | return $str; |
134 | } |
135 | |
136 | /** |
137 | * Return a "compact IRI" corresponding to the given base/local pair. |
138 | * This adds entries to the "@context" key when needed to allow use |
139 | * of a given prefix. |
140 | * @see https://www.w3.org/TR/json-ld/#dfn-compact-iri |
141 | * |
142 | * @param string $base A QName prefix if $local is given, or an IRI if $local is null. |
143 | * @param string|null $local A QName suffix, or null if $base is an IRI. |
144 | * |
145 | * @return string A compact IRI. |
146 | */ |
147 | private function compactify( $base, $local = null ) { |
148 | $this->expandShorthand( $base, $local ); |
149 | |
150 | if ( $local === null ) { |
151 | return $base; |
152 | } else { |
153 | if ( $base !== '_' && isset( $this->prefixes[ $base ] ) ) { |
154 | if ( $base === '' ) { |
155 | // Empty prefixes are not supported; use full IRI |
156 | return $this->prefixes[ $base ] . $local; |
157 | } |
158 | if ( !isset( $this->context[ $base ] ) ) { |
159 | $this->context[ $base ] = $this->prefixes[ $base ]; |
160 | } |
161 | if ( $this->context[ $base ] !== $this->prefixes[ $base ] ) { |
162 | // Context name conflict; use full IRI |
163 | return $this->prefixes[ $base ] . $local; |
164 | } |
165 | } |
166 | return $base . ':' . $local; |
167 | } |
168 | } |
169 | |
170 | /** |
171 | * Return an absolute IRI from the given base/local pair. |
172 | * @see https://www.w3.org/TR/json-ld/#dfn-absolute-iri |
173 | * |
174 | * @param string $base A QName prefix if $local is given, or an IRI if $local is null. |
175 | * @param string|null $local A QName suffix, or null if $base is an IRI. |
176 | * |
177 | * @return string|null An absolute IRI, or null if it cannot be constructed. |
178 | */ |
179 | private function toIRI( $base, $local ) { |
180 | $this->expandShorthand( $base, $local ); |
181 | $this->expandQName( $base, $local ); |
182 | if ( $local !== null ) { |
183 | throw new LogicException( 'Unknown prefix: ' . $base ); |
184 | } |
185 | return $base; |
186 | } |
187 | |
188 | /** |
189 | * Return a appropriate term for the current predicate value. |
190 | * |
191 | * @return string |
192 | */ |
193 | private function getCurrentTerm() { |
194 | [ $base, $local ] = $this->currentPredicate; |
195 | $predIRI = $this->toIRI( $base, $local ); |
196 | if ( $predIRI === self::RDF_TYPE_IRI ) { |
197 | return $predIRI; |
198 | } |
199 | $this->expandShorthand( $base, $local ); |
200 | if ( $local === null ) { |
201 | return $base; |
202 | } elseif ( $base !== '_' && !isset( $this->prefixes[ $local ] ) ) { |
203 | // Prefixes get priority over field names in @context |
204 | $pred = $this->compactify( $base, $local ); |
205 | if ( !isset( $this->context[ $local ] ) ) { |
206 | $this->context[ $local ] = [ '@id' => $pred ]; |
207 | } |
208 | if ( $this->context[ $local ][ '@id' ] === $pred ) { |
209 | return $local; |
210 | } |
211 | return $pred; |
212 | } |
213 | return $this->compactify( $base, $local ); |
214 | } |
215 | |
216 | /** |
217 | * Write document header. |
218 | */ |
219 | protected function beginJson() { |
220 | if ( $this->role === self::DOCUMENT_ROLE ) { |
221 | $this->write( "{\n" ); |
222 | $this->write( function () { |
223 | // If this buffer is drained early, disable @graph optimization |
224 | $this->disableGraphOpt = true; |
225 | return ''; |
226 | } ); |
227 | } |
228 | } |
229 | |
230 | /** |
231 | * Write document footer. |
232 | */ |
233 | protected function finishJson() { |
234 | // If we haven't drained yet, and @graph has only 1 element, then we |
235 | // can optimize our output and hoist the single node to top level. |
236 | if ( $this->role === self::DOCUMENT_ROLE ) { |
237 | if ( ( !$this->disableGraphOpt ) && count( $this->graph ) === 1 ) { |
238 | $this->write( $this->encode( $this->graph[0], 0 ) ); |
239 | $this->graph = null; // We're done with @graph. |
240 | } else { |
241 | $this->disableGraphOpt = true; |
242 | $this->write( "\n ]" ); |
243 | } |
244 | } |
245 | |
246 | if ( count( $this->context ) ) { |
247 | // Write @context field. |
248 | $this->write( ",\n" ); |
249 | $this->write( $this->encode( [ |
250 | '@context' => $this->context |
251 | ], 0 ) ); |
252 | } |
253 | |
254 | $this->write( "\n}" ); |
255 | } |
256 | |
257 | protected function finishDocument() { |
258 | $this->finishSubject(); |
259 | $this->write( function () { |
260 | // if this is drained before finishJson(), then disable |
261 | // the graph optimization and dump what we've got so far. |
262 | $str = ''; |
263 | if ( $this->graph !== null && count( $this->graph ) > 0 ) { |
264 | $this->disableGraphOpt = true; |
265 | if ( $this->role === self::DOCUMENT_ROLE && !$this->wroteGraph ) { |
266 | $str .= " \"@graph\": [\n"; |
267 | $this->wroteGraph = true; |
268 | } else { |
269 | $str .= ",\n"; |
270 | } |
271 | $str .= $this->encode( $this->graph, 1 ); |
272 | $this->graph = []; |
273 | return $str; |
274 | } |
275 | // Delay; maybe we'll be able to optimize this later. |
276 | return $str; |
277 | } ); |
278 | } |
279 | |
280 | /** |
281 | * @param string $base |
282 | * @param string|null $local |
283 | */ |
284 | protected function writeSubject( $base, $local = null ) { |
285 | $this->predicates = [ |
286 | '@id' => $this->compactify( $base, $local ) |
287 | ]; |
288 | } |
289 | |
290 | protected function finishSubject() { |
291 | $this->finishPredicate(); |
292 | $this->graph[] = $this->predicates; |
293 | } |
294 | |
295 | /** |
296 | * @param string $base |
297 | * @param string|null $local |
298 | */ |
299 | protected function writePredicate( $base, $local = null ) { |
300 | // no op |
301 | } |
302 | |
303 | /** |
304 | * @param string $base |
305 | * @param string|null $local |
306 | */ |
307 | protected function writeResource( $base, $local = null ) { |
308 | $pred = $this->getCurrentTerm(); |
309 | $value = $this->compactify( $base, $local ); |
310 | $this->addTypedValue( '@id', $value, [ |
311 | '@id' => $value |
312 | ], ( $pred === self::RDF_TYPE_IRI ) ); |
313 | } |
314 | |
315 | /** |
316 | * @param string $text |
317 | * @param string|null $language |
318 | */ |
319 | protected function writeText( $text, $language = null ) { |
320 | if ( !$this->isValidLanguageCode( $language ) ) { |
321 | $this->addTypedValue( self::DEFAULT_TYPE, $text ); |
322 | } else { |
323 | $expanded = [ |
324 | '@language' => $language, |
325 | '@value' => $text |
326 | ]; |
327 | $this->addTypedValue( self::DEFAULT_TYPE, $expanded, $expanded ); |
328 | } |
329 | } |
330 | |
331 | /** |
332 | * @param string $literal |
333 | * @param string|null $typeBase |
334 | * @param string|null $typeLocal |
335 | */ |
336 | public function writeValue( $literal, $typeBase, $typeLocal = null ) { |
337 | if ( $typeBase === null && $typeLocal === null ) { |
338 | $this->addTypedValue( self::DEFAULT_TYPE, $literal ); |
339 | return; |
340 | } |
341 | |
342 | switch ( $this->toIRI( $typeBase, $typeLocal ) ) { |
343 | case 'http://www.w3.org/2001/XMLSchema#string': |
344 | $this->addTypedValue( self::DEFAULT_TYPE, strval( $literal ) ); |
345 | return; |
346 | case 'http://www.w3.org/2001/XMLSchema#integer': |
347 | $this->addTypedValue( self::DEFAULT_TYPE, intval( $literal ) ); |
348 | return; |
349 | case 'http://www.w3.org/2001/XMLSchema#boolean': |
350 | $this->addTypedValue( self::DEFAULT_TYPE, ( $literal === 'true' ) ); |
351 | return; |
352 | case 'http://www.w3.org/2001/XMLSchema#double': |
353 | $v = floatval( $literal ); |
354 | // Only "numbers with fractions" are xsd:double. We need |
355 | // to verify that the JSON string will contain a decimal |
356 | // point, otherwise the value would be interpreted as an |
357 | // xsd:integer. |
358 | // TODO: consider instead using JSON_PRESERVE_ZERO_FRACTION |
359 | // in $this->encode() once our required PHP >= 5.6.6. |
360 | // OTOH, the spec language is ambiguous about whether "5." |
361 | // would be considered an integer or a double. |
362 | if ( strpos( json_encode( $v ), '.' ) !== false ) { |
363 | $this->addTypedValue( self::DEFAULT_TYPE, $v ); |
364 | return; |
365 | } |
366 | } |
367 | |
368 | $type = $this->compactify( $typeBase, $typeLocal ); |
369 | $literal = strval( $literal ); |
370 | $this->addTypedValue( $type, $literal, [ |
371 | '@type' => $type, |
372 | '@value' => $literal |
373 | ] ); |
374 | } |
375 | |
376 | /** |
377 | * Add a typed value for the given predicate. If possible, adds a |
378 | * default type to the context to avoid having to repeat type information |
379 | * in each value for this predicate. If there is already a default |
380 | * type which conflicts with this one, or if $forceExpand is true, |
381 | * then use the "expanded" value which will explicitly override any |
382 | * default type. |
383 | * |
384 | * @param string $type The compactified JSON-LD @type for this value, or |
385 | * self::DEFAULT_TYPE to indicate the default JSON-LD type coercion rules |
386 | * should be used. |
387 | * @param string|int|float|bool $simpleVal The "simple" representation |
388 | * for this value, used if the type can be hoisted into the context. |
389 | * @param array|null $expandedVal The "expanded" representation for this |
390 | * value, used if the context @type conflicts with this value; or null |
391 | * to use "@value" for the expanded representation. |
392 | * @param bool $forceExpand If true, don't try to add this type to the |
393 | * context. Defaults to false. |
394 | */ |
395 | protected function addTypedValue( $type, $simpleVal, $expandedVal = null, $forceExpand = false ) { |
396 | if ( !$forceExpand ) { |
397 | $pred = $this->getCurrentTerm(); |
398 | if ( $type === self::DEFAULT_TYPE ) { |
399 | if ( !isset( $this->context[ $pred ][ '@type' ] ) ) { |
400 | $this->defaulted[ $pred ] = true; |
401 | } |
402 | if ( isset( $this->defaulted[ $pred ] ) ) { |
403 | $this->values[] = $simpleVal; |
404 | return; |
405 | } |
406 | } elseif ( !isset( $this->defaulted[ $pred ] ) ) { |
407 | if ( !isset( $this->context[ $pred ] ) ) { |
408 | $this->context[ $pred ] = []; |
409 | } |
410 | if ( !isset( $this->context[ $pred ][ '@type' ] ) ) { |
411 | $this->context[ $pred ][ '@type' ] = $type; |
412 | } |
413 | if ( $this->context[ $pred ][ '@type' ] === $type ) { |
414 | $this->values[] = $simpleVal; |
415 | return; |
416 | } |
417 | } |
418 | } |
419 | if ( $expandedVal === null ) { |
420 | $this->values[] = [ '@value' => $simpleVal ]; |
421 | } else { |
422 | $this->values[] = $expandedVal; |
423 | } |
424 | } |
425 | |
426 | protected function finishPredicate() { |
427 | $name = $this->getCurrentTerm(); |
428 | |
429 | if ( $name === self::RDF_TYPE_IRI ) { |
430 | $name = '@type'; |
431 | $this->values = array_map( static function ( array $val ) { |
432 | return $val[ '@id' ]; |
433 | }, $this->values ); |
434 | } |
435 | if ( isset( $this->predicates[$name] ) ) { |
436 | $was = $this->predicates[$name]; |
437 | // Wrap $was into a numeric indexed array if it isn't already. |
438 | // Note that $was could have non-numeric indices, eg |
439 | // [ "@id" => "foo" ], in which was it still needs to be wrapped. |
440 | if ( !( is_array( $was ) && isset( $was[0] ) ) ) { |
441 | $was = [ $was ]; |
442 | } |
443 | $this->values = array_merge( $was, $this->values ); |
444 | } |
445 | |
446 | $cnt = count( $this->values ); |
447 | if ( $cnt === 0 ) { |
448 | throw new LogicException( 'finishPredicate can\'t be called without at least one value' ); |
449 | } elseif ( $cnt === 1 ) { |
450 | $this->predicates[$name] = $this->values[0]; |
451 | } else { |
452 | $this->predicates[$name] = $this->values; |
453 | } |
454 | |
455 | $this->values = []; |
456 | } |
457 | |
458 | /** |
459 | * @param string $role |
460 | * @param BNodeLabeler $labeler |
461 | * |
462 | * @return RdfWriterBase |
463 | */ |
464 | protected function newSubWriter( $role, BNodeLabeler $labeler ) { |
465 | $writer = new self( $role, $labeler ); |
466 | |
467 | // Have subwriter share context with this parent. |
468 | $writer->context = &$this->context; |
469 | $writer->defaulted = &$this->defaulted; |
470 | |
471 | // We can't use the @graph optimization. |
472 | $this->disableGraphOpt = true; |
473 | |
474 | return $writer; |
475 | } |
476 | |
477 | /** |
478 | * @return string a MIME type |
479 | */ |
480 | public function getMimeType() { |
481 | return 'application/ld+json; charset=UTF-8'; |
482 | } |
483 | |
484 | } |