Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
54 / 54
100.00% covered (success)
100.00%
3 / 3
CRAP
100.00% covered (success)
100.00%
1 / 1
N3Quoter
100.00% covered (success)
100.00%
54 / 54
100.00% covered (success)
100.00%
3 / 3
6
100.00% covered (success)
100.00%
1 / 1
 setEscapeUnicode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
2
 escapeIRI
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
1
 escapeLiteral
100.00% covered (success)
100.00%
41 / 41
100.00% covered (success)
100.00%
1 / 1
3
1<?php
2
3namespace Wikimedia\Purtle;
4
5/**
6 * Helper class for quoting literals and URIs in N3 output.
7 * Optionally supports shorthand and prefix resolution.
8 *
9 * @license GPL-2.0-or-later
10 * @author Daniel Kinzler
11 */
12class N3Quoter {
13
14    /**
15     * @var UnicodeEscaper
16     */
17    private $escaper = null;
18
19    /**
20     * @param bool $escapeUnicode
21     */
22    public function setEscapeUnicode( $escapeUnicode ) {
23        $this->escaper = $escapeUnicode ? new UnicodeEscaper() : null;
24    }
25
26    /**
27     * @param string $iri
28     *
29     * @return string
30     */
31    public function escapeIRI( $iri ) {
32        // FIXME: apply unicode escaping?!
33        return strtr( $iri, [
34                ' ' => '%20',
35                '"' => '%22',
36                '<' => '%3C',
37                '>' => '%3E',
38                '\\' => '%5C',
39                '`' => '%60',
40                '^' => '%5E',
41                '|' => '%7C',
42                '{' => '%7B',
43                '}' => '%7D',
44        ] );
45    }
46
47    /**
48     * @param string $s
49     *
50     * @return string
51     */
52    public function escapeLiteral( $s ) {
53        // Performance: If the entire string is just (a safe subset) of ASCII, let it through.
54        // Ok are space (31), ! (32), # (35) - [ (91) and ] (93) to ~ (126), excludes " (34) and \ (92).
55        if ( preg_match( '/^[ !#-[\]-~]*\z/', $s ) ) {
56            return $s;
57        }
58
59        // String escapes. Note that the N3 spec is more restrictive than the Turtle and TR
60        // specifications, see <https://www.w3.org/TeamSubmission/n3/#escaping>
61        // and <https://www.w3.org/TR/turtle/#string>
62        // and <https://www.w3.org/TR/n-triples/#grammar-production-literal>.
63        // Allowed escapes according to the N3 spec are:
64        // ECHAR    ::=    '\' [tbnrf"'\]
65        // The single quote however does not require escaping when used in double quotes.
66        $escaped = strtr( $s, [
67            "\x00" => '\u0000',
68            "\x01" => '\u0001',
69            "\x02" => '\u0002',
70            "\x03" => '\u0003',
71            "\x04" => '\u0004',
72            "\x05" => '\u0005',
73            "\x06" => '\u0006',
74            "\x07" => '\u0007',
75            "\x08" => '\b',
76            "\x09" => '\t',
77            "\x0A" => '\n',
78            "\x0B" => '\u000B',
79            "\x0C" => '\f',
80            "\x0D" => '\r',
81            "\x0E" => '\u000E',
82            "\x0F" => '\u000F',
83            "\x10" => '\u0010',
84            "\x11" => '\u0011',
85            "\x12" => '\u0012',
86            "\x13" => '\u0013',
87            "\x14" => '\u0014',
88            "\x15" => '\u0015',
89            "\x16" => '\u0016',
90            "\x17" => '\u0017',
91            "\x18" => '\u0018',
92            "\x19" => '\u0019',
93            "\x1A" => '\u001A',
94            "\x1B" => '\u001B',
95            "\x1C" => '\u001C',
96            "\x1D" => '\u001D',
97            "\x1E" => '\u001E',
98            "\x1F" => '\u001F',
99            '"' => '\"',
100            '\\' => '\\\\',
101        ] );
102
103        if ( $this->escaper !== null ) {
104            $escaped = $this->escaper->escapeString( $escaped );
105        }
106
107        return $escaped;
108    }
109
110}