Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
54 / 54 |
|
100.00% |
3 / 3 |
CRAP | |
100.00% |
1 / 1 |
N3Quoter | |
100.00% |
54 / 54 |
|
100.00% |
3 / 3 |
6 | |
100.00% |
1 / 1 |
setEscapeUnicode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
escapeIRI | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
1 | |||
escapeLiteral | |
100.00% |
41 / 41 |
|
100.00% |
1 / 1 |
3 |
1 | <?php |
2 | |
3 | namespace Wikimedia\Purtle; |
4 | |
5 | /** |
6 | * Helper class for quoting literals and URIs in N3 output. |
7 | * Optionally supports shorthand and prefix resolution. |
8 | * |
9 | * @license GPL-2.0-or-later |
10 | * @author Daniel Kinzler |
11 | */ |
12 | class N3Quoter { |
13 | |
14 | /** |
15 | * @var UnicodeEscaper |
16 | */ |
17 | private $escaper = null; |
18 | |
19 | /** |
20 | * @param bool $escapeUnicode |
21 | */ |
22 | public function setEscapeUnicode( $escapeUnicode ) { |
23 | $this->escaper = $escapeUnicode ? new UnicodeEscaper() : null; |
24 | } |
25 | |
26 | /** |
27 | * @param string $iri |
28 | * |
29 | * @return string |
30 | */ |
31 | public function escapeIRI( $iri ) { |
32 | // FIXME: apply unicode escaping?! |
33 | return strtr( $iri, [ |
34 | ' ' => '%20', |
35 | '"' => '%22', |
36 | '<' => '%3C', |
37 | '>' => '%3E', |
38 | '\\' => '%5C', |
39 | '`' => '%60', |
40 | '^' => '%5E', |
41 | '|' => '%7C', |
42 | '{' => '%7B', |
43 | '}' => '%7D', |
44 | ] ); |
45 | } |
46 | |
47 | /** |
48 | * @param string $s |
49 | * |
50 | * @return string |
51 | */ |
52 | public function escapeLiteral( $s ) { |
53 | // Performance: If the entire string is just (a safe subset) of ASCII, let it through. |
54 | // Ok are space (31), ! (32), # (35) - [ (91) and ] (93) to ~ (126), excludes " (34) and \ (92). |
55 | if ( preg_match( '/^[ !#-[\]-~]*\z/', $s ) ) { |
56 | return $s; |
57 | } |
58 | |
59 | // String escapes. Note that the N3 spec is more restrictive than the Turtle and TR |
60 | // specifications, see <https://www.w3.org/TeamSubmission/n3/#escaping> |
61 | // and <https://www.w3.org/TR/turtle/#string> |
62 | // and <https://www.w3.org/TR/n-triples/#grammar-production-literal>. |
63 | // Allowed escapes according to the N3 spec are: |
64 | // ECHAR ::= '\' [tbnrf"'\] |
65 | // The single quote however does not require escaping when used in double quotes. |
66 | $escaped = strtr( $s, [ |
67 | "\x00" => '\u0000', |
68 | "\x01" => '\u0001', |
69 | "\x02" => '\u0002', |
70 | "\x03" => '\u0003', |
71 | "\x04" => '\u0004', |
72 | "\x05" => '\u0005', |
73 | "\x06" => '\u0006', |
74 | "\x07" => '\u0007', |
75 | "\x08" => '\b', |
76 | "\x09" => '\t', |
77 | "\x0A" => '\n', |
78 | "\x0B" => '\u000B', |
79 | "\x0C" => '\f', |
80 | "\x0D" => '\r', |
81 | "\x0E" => '\u000E', |
82 | "\x0F" => '\u000F', |
83 | "\x10" => '\u0010', |
84 | "\x11" => '\u0011', |
85 | "\x12" => '\u0012', |
86 | "\x13" => '\u0013', |
87 | "\x14" => '\u0014', |
88 | "\x15" => '\u0015', |
89 | "\x16" => '\u0016', |
90 | "\x17" => '\u0017', |
91 | "\x18" => '\u0018', |
92 | "\x19" => '\u0019', |
93 | "\x1A" => '\u001A', |
94 | "\x1B" => '\u001B', |
95 | "\x1C" => '\u001C', |
96 | "\x1D" => '\u001D', |
97 | "\x1E" => '\u001E', |
98 | "\x1F" => '\u001F', |
99 | '"' => '\"', |
100 | '\\' => '\\\\', |
101 | ] ); |
102 | |
103 | if ( $this->escaper !== null ) { |
104 | $escaped = $this->escaper->escapeString( $escaped ); |
105 | } |
106 | |
107 | return $escaped; |
108 | } |
109 | |
110 | } |