Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
97.96% |
48 / 49 |
|
66.67% |
2 / 3 |
CRAP | |
0.00% |
0 / 1 |
UnicodeEscaper | |
97.96% |
48 / 49 |
|
66.67% |
2 / 3 |
18 | |
0.00% |
0 / 1 |
escapeString | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
unicodeCharNo | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
5 | |||
escapedChar | |
95.00% |
19 / 20 |
|
0.00% |
0 / 1 |
10 |
1 | <?php |
2 | |
3 | namespace Wikimedia\Purtle; |
4 | |
5 | /** |
6 | * Helper class for turning non-ascii characters into Python-style unicode escape sequences. |
7 | * |
8 | * @author Daniel Kinzler |
9 | * |
10 | * Most of this class was copied from EasyRdf's Ntriples.php. |
11 | * The following licensing terms apply to the copied code: |
12 | * |
13 | * Copyright (c) 2009-2013 Nicholas J Humfrey. All rights reserved. |
14 | * |
15 | * Redistribution and use in source and binary forms, with or without |
16 | * modification, are permitted provided that the following conditions are met: |
17 | * 1. Redistributions of source code must retain the above copyright |
18 | * notice, this list of conditions and the following disclaimer. |
19 | * 2. Redistributions in binary form must reproduce the above copyright notice, |
20 | * this list of conditions and the following disclaimer in the documentation |
21 | * and/or other materials provided with the distribution. |
22 | * 3. The name of the author 'Nicholas J Humfrey" may be used to endorse or |
23 | * promote products derived from this software without specific prior |
24 | * written permission. |
25 | * |
26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
27 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
28 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
29 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
30 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
31 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
32 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
33 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
34 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
35 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
36 | * POSSIBILITY OF SUCH DAMAGE. |
37 | * |
38 | * @copyright Copyright (c) 2009-2013 Nicholas J Humfrey |
39 | * @license http://www.opensource.org/licenses/bsd-license.php BSD-2-Clause |
40 | */ |
41 | class UnicodeEscaper { |
42 | |
43 | /** |
44 | * @var string[] Character encoding cache |
45 | */ |
46 | private $escChars = []; |
47 | |
48 | /** |
49 | * @param string $str |
50 | * |
51 | * @return string |
52 | */ |
53 | public function escapeString( $str ) { |
54 | $result = ''; |
55 | $strLen = mb_strlen( $str, 'UTF-8' ); |
56 | for ( $i = 0; $i < $strLen; $i++ ) { |
57 | $c = mb_substr( $str, $i, 1, 'UTF-8' ); |
58 | if ( !isset( $this->escChars[$c] ) ) { |
59 | $this->escChars[$c] = $this->escapedChar( $c ); |
60 | } |
61 | $result .= $this->escChars[$c]; |
62 | } |
63 | return $result; |
64 | } |
65 | |
66 | /** |
67 | * @param string $cUtf |
68 | * |
69 | * @return int |
70 | */ |
71 | private function unicodeCharNo( $cUtf ) { |
72 | $bl = strlen( $cUtf ); /* binary length */ |
73 | $r = 0; |
74 | switch ( $bl ) { |
75 | case 1: /* 0####### (0-127) */ |
76 | $r = ord( $cUtf ); |
77 | break; |
78 | case 2: /* 110##### 10###### = 192+x 128+x */ |
79 | $r = ( ( ord( $cUtf[0] ) - 192 ) * 64 ) + |
80 | ( ord( $cUtf[1] ) - 128 ); |
81 | break; |
82 | case 3: /* 1110#### 10###### 10###### = 224+x 128+x 128+x */ |
83 | $r = ( ( ord( $cUtf[0] ) - 224 ) * 4096 ) + |
84 | ( ( ord( $cUtf[1] ) - 128 ) * 64 ) + |
85 | ( ord( $cUtf[2] ) - 128 ); |
86 | break; |
87 | case 4: /* 1111#### 10###### 10###### 10###### = 240+x 128+x 128+x 128+x */ |
88 | $r = ( ( ord( $cUtf[0] ) - 240 ) * 262144 ) + |
89 | ( ( ord( $cUtf[1] ) - 128 ) * 4096 ) + |
90 | ( ( ord( $cUtf[2] ) - 128 ) * 64 ) + |
91 | ( ord( $cUtf[3] ) - 128 ); |
92 | break; |
93 | } |
94 | return $r; |
95 | } |
96 | |
97 | /** |
98 | * @param string $c |
99 | * |
100 | * @return string |
101 | */ |
102 | private function escapedChar( $c ) { |
103 | $no = $this->unicodeCharNo( $c ); |
104 | /* see http://www.w3.org/TR/rdf-testcases/#ntrip_strings */ |
105 | if ( $no < 9 ) { |
106 | return '\u' . sprintf( '%04X', $no ); /* #x0-#x8 (0-8) */ |
107 | } elseif ( $no == 9 ) { |
108 | return '\t'; /* #x9 (9) */ |
109 | } elseif ( $no == 10 ) { |
110 | return '\n'; /* #xA (10) */ |
111 | } elseif ( $no < 13 ) { |
112 | return '\u' . sprintf( '%04X', $no ); /* #xB-#xC (11-12) */ |
113 | } elseif ( $no == 13 ) { |
114 | return '\r'; /* #xD (13) */ |
115 | } elseif ( $no < 32 ) { |
116 | return '\u' . sprintf( '%04X', $no ); /* #xE-#x1F (14-31) */ |
117 | } elseif ( $no < 127 ) { |
118 | return $c; /* #x20-#x7E (32-126) */ |
119 | } elseif ( $no < 65536 ) { |
120 | return '\u' . sprintf( '%04X', $no ); /* #x7F-#xFFFF (128-65535) */ |
121 | } elseif ( $no < 1114112 ) { |
122 | return '\U' . sprintf( '%08X', $no ); /* #x10000-#x10FFFF (65536-1114111) */ |
123 | } else { |
124 | return ''; /* not defined => ignore (also probably unreachable since PHP 8.3) */ |
125 | } |
126 | } |
127 | |
128 | } |