Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
93.98% |
78 / 83 |
|
75.00% |
3 / 4 |
CRAP | |
0.00% |
0 / 1 |
FormatJson | |
93.98% |
78 / 83 |
|
75.00% |
3 / 4 |
43.40 | |
0.00% |
0 / 1 |
encode | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
8 | |||
decode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
parse | |
84.85% |
28 / 33 |
|
0.00% |
0 / 1 |
15.78 | |||
stripComments | |
100.00% |
38 / 38 |
|
100.00% |
1 / 1 |
19 |
1 | <?php |
2 | /** |
3 | * Wrapper for json_encode and json_decode. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | */ |
22 | |
23 | use MediaWiki\Status\Status; |
24 | |
25 | /** |
26 | * JSON formatter wrapper class |
27 | */ |
28 | class FormatJson { |
29 | /** |
30 | * Skip escaping most characters above U+007F for readability and compactness. |
31 | * This encoding option saves 3 to 8 bytes (uncompressed) for each such character; |
32 | * however, it could break compatibility with systems that incorrectly handle UTF-8. |
33 | * |
34 | * @since 1.22 |
35 | */ |
36 | public const UTF8_OK = 1; |
37 | |
38 | /** |
39 | * Skip escaping the characters '<', '>', and '&', which have special meanings in |
40 | * HTML and XML. |
41 | * |
42 | * @warning Do not use this option for JSON that could end up in inline scripts. |
43 | * - HTML 5.2, §4.12.1.3 Restrictions for contents of script elements |
44 | * - XML 1.0 (5th Ed.), §2.4 Character Data and Markup |
45 | * |
46 | * @since 1.22 |
47 | */ |
48 | public const XMLMETA_OK = 2; |
49 | |
50 | /** |
51 | * Skip escaping as many characters as reasonably possible. |
52 | * |
53 | * @warning When generating inline script blocks, use FormatJson::UTF8_OK instead. |
54 | * |
55 | * @since 1.22 |
56 | */ |
57 | public const ALL_OK = self::UTF8_OK | self::XMLMETA_OK; |
58 | |
59 | /** |
60 | * If set, treat JSON objects '{...}' as associative arrays. Without this option, |
61 | * JSON objects will be converted to stdClass. |
62 | * |
63 | * @since 1.24 |
64 | */ |
65 | public const FORCE_ASSOC = 0x100; |
66 | |
67 | /** |
68 | * If set, attempt to fix invalid JSON. |
69 | * |
70 | * @since 1.24 |
71 | */ |
72 | public const TRY_FIXING = 0x200; |
73 | |
74 | /** |
75 | * If set, strip comments from input before parsing as JSON. |
76 | * |
77 | * @since 1.25 |
78 | */ |
79 | public const STRIP_COMMENTS = 0x400; |
80 | |
81 | /** |
82 | * Returns the JSON representation of a value. |
83 | * |
84 | * @note Empty arrays are encoded as numeric arrays, not as objects, so cast any associative |
85 | * array that might be empty to an object before encoding it. |
86 | * |
87 | * @note In pre-1.22 versions of MediaWiki, using this function for generating inline script |
88 | * blocks may result in an XSS vulnerability, and quite likely will in XML documents |
89 | * (cf. FormatJson::XMLMETA_OK). Use Xml::encodeJsVar() instead in such cases. |
90 | * |
91 | * @param mixed $value The value to encode. Can be any type except a resource. |
92 | * @param string|bool $pretty If a string, add non-significant whitespace to improve |
93 | * readability, using that string for indentation (must consist only of whitespace |
94 | * characters). If true, use the default indent string (four spaces). |
95 | * @param int $escaping Bitfield consisting of _OK class constants |
96 | * @return string|false String if successful; false upon failure |
97 | */ |
98 | public static function encode( $value, $pretty = false, $escaping = 0 ) { |
99 | // PHP escapes '/' to prevent breaking out of inline script blocks using '</script>', |
100 | // which is hardly useful when '<' and '>' are escaped (and inadequate), and such |
101 | // escaping negatively impacts the human readability of URLs and similar strings. |
102 | $options = JSON_UNESCAPED_SLASHES; |
103 | if ( $pretty || is_string( $pretty ) ) { |
104 | $options |= JSON_PRETTY_PRINT; |
105 | } |
106 | if ( $escaping & self::UTF8_OK ) { |
107 | $options |= JSON_UNESCAPED_UNICODE; |
108 | } |
109 | if ( !( $escaping & self::XMLMETA_OK ) ) { |
110 | $options |= JSON_HEX_TAG | JSON_HEX_AMP; |
111 | } |
112 | $json = json_encode( $value, $options ); |
113 | |
114 | if ( is_string( $pretty ) && $pretty !== ' ' && $json !== false ) { |
115 | // Change the four-space indent to the provided indent. |
116 | // The regex matches four spaces either at the start of a line or immediately |
117 | // after the previous match. $pretty should contain only whitespace characters, |
118 | // so there should be no need to call StringUtils::escapeRegexReplacement(). |
119 | $json = preg_replace( '/ {4}|.*+\n\K {4}/A', $pretty, $json ); |
120 | } |
121 | |
122 | return $json; |
123 | } |
124 | |
125 | /** |
126 | * Decodes a JSON string. It is recommended to use FormatJson::parse(), |
127 | * which returns more comprehensive result in case of an error, and has |
128 | * more parsing options. |
129 | * |
130 | * In PHP versions before 7.1, decoding a JSON string containing an empty key |
131 | * without passing $assoc as true results in a return object with a property |
132 | * named "_empty_" (because true empty properties were not supported pre-PHP-7.1). |
133 | * Instead, consider passing $assoc as true to return an associative array. |
134 | * |
135 | * But be aware that in all supported PHP versions, decoding an empty JSON object |
136 | * with $assoc = true returns an array, not an object, breaking round-trip consistency. |
137 | * |
138 | * See https://phabricator.wikimedia.org/T206411 for more details on these quirks. |
139 | * |
140 | * @param string $value The JSON string being decoded |
141 | * @param bool $assoc When true, returned objects will be converted into associative arrays. |
142 | * |
143 | * @return mixed The value encoded in JSON in appropriate PHP type. |
144 | * `null` is returned if $value represented `null`, if $value could not be decoded, |
145 | * or if the encoded data was deeper than the recursion limit. |
146 | * Use FormatJson::parse() to distinguish between types of `null` and to get proper error code. |
147 | */ |
148 | public static function decode( $value, $assoc = false ) { |
149 | return json_decode( $value, $assoc ); |
150 | } |
151 | |
152 | /** |
153 | * Decodes a JSON string. |
154 | * Unlike FormatJson::decode(), if $value represents null value, it will be |
155 | * properly decoded as valid. |
156 | * |
157 | * @param string $value The JSON string being decoded |
158 | * @param int $options A bit field that allows FORCE_ASSOC, TRY_FIXING, |
159 | * STRIP_COMMENTS |
160 | * @return Status If valid JSON, the value is available in $result->getValue() |
161 | */ |
162 | public static function parse( $value, $options = 0 ) { |
163 | if ( $options & self::STRIP_COMMENTS ) { |
164 | $value = self::stripComments( $value ); |
165 | } |
166 | $assoc = ( $options & self::FORCE_ASSOC ) !== 0; |
167 | $result = json_decode( $value, $assoc ); |
168 | $code = json_last_error(); |
169 | |
170 | if ( $code === JSON_ERROR_SYNTAX && ( $options & self::TRY_FIXING ) !== 0 ) { |
171 | // The most common error is the trailing comma in a list or an object. |
172 | // We cannot simply replace /,\s*[}\]]/ because it could be inside a string value. |
173 | // But we could use the fact that JSON does not allow multi-line string values, |
174 | // And remove trailing commas if they are et the end of a line. |
175 | // JSON only allows 4 control characters: [ \t\r\n]. So we must not use '\s' for matching. |
176 | // Regex match ,]<any non-quote chars>\n or ,\n] with optional spaces/tabs. |
177 | $count = 0; |
178 | $value = |
179 | preg_replace( '/,([ \t]*[}\]][^"\r\n]*([\r\n]|$)|[ \t]*[\r\n][ \t\r\n]*[}\]])/', '$1', |
180 | $value, -1, $count ); |
181 | if ( $count > 0 ) { |
182 | $result = json_decode( $value, $assoc ); |
183 | if ( json_last_error() === JSON_ERROR_NONE ) { |
184 | // Report warning |
185 | $st = Status::newGood( $result ); |
186 | $st->warning( wfMessage( 'json-warn-trailing-comma' )->numParams( $count ) ); |
187 | return $st; |
188 | } |
189 | } |
190 | } |
191 | |
192 | // JSON_ERROR_RECURSION, JSON_ERROR_INF_OR_NAN, JSON_ERROR_UNSUPPORTED_TYPE, |
193 | // are all encode errors that we don't need to care about here. |
194 | switch ( $code ) { |
195 | case JSON_ERROR_NONE: |
196 | return Status::newGood( $result ); |
197 | default: |
198 | return Status::newFatal( wfMessage( 'json-error-unknown' )->numParams( $code ) ); |
199 | case JSON_ERROR_DEPTH: |
200 | $msg = 'json-error-depth'; |
201 | break; |
202 | case JSON_ERROR_STATE_MISMATCH: |
203 | $msg = 'json-error-state-mismatch'; |
204 | break; |
205 | case JSON_ERROR_CTRL_CHAR: |
206 | $msg = 'json-error-ctrl-char'; |
207 | break; |
208 | case JSON_ERROR_SYNTAX: |
209 | $msg = 'json-error-syntax'; |
210 | break; |
211 | case JSON_ERROR_UTF8: |
212 | $msg = 'json-error-utf8'; |
213 | break; |
214 | case JSON_ERROR_INVALID_PROPERTY_NAME: |
215 | $msg = 'json-error-invalid-property-name'; |
216 | break; |
217 | case JSON_ERROR_UTF16: |
218 | $msg = 'json-error-utf16'; |
219 | break; |
220 | } |
221 | return Status::newFatal( $msg ); |
222 | } |
223 | |
224 | /** |
225 | * Remove multiline and single line comments from an otherwise valid JSON |
226 | * input string. This can be used as a preprocessor, to allow JSON |
227 | * formatted configuration files to contain comments. |
228 | * |
229 | * @param string $json |
230 | * @return string JSON with comments removed |
231 | */ |
232 | public static function stripComments( $json ) { |
233 | // Ensure we have a string |
234 | $str = (string)$json; |
235 | $buffer = ''; |
236 | $maxLen = strlen( $str ); |
237 | $mark = 0; |
238 | |
239 | $inString = false; |
240 | $inComment = false; |
241 | $multiline = false; |
242 | |
243 | for ( $idx = 0; $idx < $maxLen; $idx++ ) { |
244 | switch ( $str[$idx] ) { |
245 | case '"': |
246 | $lookBehind = ( $idx - 1 >= 0 ) ? $str[$idx - 1] : ''; |
247 | if ( !$inComment && $lookBehind !== '\\' ) { |
248 | // Either started or ended a string |
249 | $inString = !$inString; |
250 | } |
251 | break; |
252 | |
253 | case '/': |
254 | $lookAhead = ( $idx + 1 < $maxLen ) ? $str[$idx + 1] : ''; |
255 | $lookBehind = ( $idx - 1 >= 0 ) ? $str[$idx - 1] : ''; |
256 | if ( $inString ) { |
257 | break; |
258 | |
259 | } elseif ( !$inComment && |
260 | ( $lookAhead === '/' || $lookAhead === '*' ) |
261 | ) { |
262 | // Transition into a comment |
263 | // Add characters seen to buffer |
264 | $buffer .= substr( $str, $mark, $idx - $mark ); |
265 | // Consume the look ahead character |
266 | $idx++; |
267 | // Track state |
268 | $inComment = true; |
269 | $multiline = $lookAhead === '*'; |
270 | |
271 | } elseif ( $multiline && $lookBehind === '*' ) { |
272 | // Found the end of the current comment |
273 | $mark = $idx + 1; |
274 | $inComment = false; |
275 | $multiline = false; |
276 | } |
277 | break; |
278 | |
279 | case "\n": |
280 | if ( $inComment && !$multiline ) { |
281 | // Found the end of the current comment |
282 | $mark = $idx + 1; |
283 | $inComment = false; |
284 | } |
285 | break; |
286 | } |
287 | } |
288 | if ( $inComment ) { |
289 | // Comment ends with input |
290 | // Technically we should check to ensure that we aren't in |
291 | // a multiline comment that hasn't been properly ended, but this |
292 | // is a strip filter, not a validating parser. |
293 | $mark = $maxLen; |
294 | } |
295 | // Add final chunk to buffer before returning |
296 | return $buffer . substr( $str, $mark, $maxLen - $mark ); |
297 | } |
298 | } |