MediaWiki REL1_31
FormatJson.php
Go to the documentation of this file.
1<?php
34 const UTF8_OK = 1;
35
46 const XMLMETA_OK = 2;
47
55 const ALL_OK = 3;
56
64 const FORCE_ASSOC = 0x100;
65
71 const TRY_FIXING = 0x200;
72
78 const STRIP_COMMENTS = 0x400;
79
89 const WS_CLEANUP_REGEX = '/(?<=[\[{])\n\s*+(?=[\]}])/';
90
97 private static $badChars = [
98 "\xe2\x80\xa8", // U+2028 LINE SEPARATOR
99 "\xe2\x80\xa9", // U+2029 PARAGRAPH SEPARATOR
100 ];
101
105 private static $badCharsEscaped = [
106 '\u2028', // U+2028 LINE SEPARATOR
107 '\u2029', // U+2029 PARAGRAPH SEPARATOR
108 ];
109
127 public static function encode( $value, $pretty = false, $escaping = 0 ) {
128 if ( !is_string( $pretty ) ) {
129 $pretty = $pretty ? ' ' : false;
130 }
131
132 static $bug66021;
133 if ( $pretty !== false && $bug66021 === null ) {
134 $bug66021 = json_encode( [], JSON_PRETTY_PRINT ) !== '[]';
135 }
136
137 // PHP escapes '/' to prevent breaking out of inline script blocks using '</script>',
138 // which is hardly useful when '<' and '>' are escaped (and inadequate), and such
139 // escaping negatively impacts the human readability of URLs and similar strings.
140 $options = JSON_UNESCAPED_SLASHES;
141 $options |= $pretty !== false ? JSON_PRETTY_PRINT : 0;
142 $options |= ( $escaping & self::UTF8_OK ) ? JSON_UNESCAPED_UNICODE : 0;
143 $options |= ( $escaping & self::XMLMETA_OK ) ? 0 : ( JSON_HEX_TAG | JSON_HEX_AMP );
144 $json = json_encode( $value, $options );
145 if ( $json === false ) {
146 return false;
147 }
148
149 if ( $pretty !== false ) {
150 // Workaround for <https://bugs.php.net/bug.php?id=66021>
151 if ( $bug66021 ) {
152 $json = preg_replace( self::WS_CLEANUP_REGEX, '', $json );
153 }
154 if ( $pretty !== ' ' ) {
155 // Change the four-space indent to a tab indent
156 $json = str_replace( "\n ", "\n\t", $json );
157 while ( strpos( $json, "\t " ) !== false ) {
158 $json = str_replace( "\t ", "\t\t", $json );
159 }
160
161 if ( $pretty !== "\t" ) {
162 // Change the tab indent to the provided indent
163 $json = str_replace( "\t", $pretty, $json );
164 }
165 }
166 }
167 if ( $escaping & self::UTF8_OK ) {
168 $json = str_replace( self::$badChars, self::$badCharsEscaped, $json );
169 }
170
171 return $json;
172 }
173
187 public static function decode( $value, $assoc = false ) {
188 return json_decode( $value, $assoc );
189 }
190
201 public static function parse( $value, $options = 0 ) {
202 if ( $options & self::STRIP_COMMENTS ) {
203 $value = self::stripComments( $value );
204 }
205 $assoc = ( $options & self::FORCE_ASSOC ) !== 0;
206 $result = json_decode( $value, $assoc );
207 $code = json_last_error();
208
209 if ( $code === JSON_ERROR_SYNTAX && ( $options & self::TRY_FIXING ) !== 0 ) {
210 // The most common error is the trailing comma in a list or an object.
211 // We cannot simply replace /,\s*[}\]]/ because it could be inside a string value.
212 // But we could use the fact that JSON does not allow multi-line string values,
213 // And remove trailing commas if they are et the end of a line.
214 // JSON only allows 4 control characters: [ \t\r\n]. So we must not use '\s' for matching.
215 // Regex match ,]<any non-quote chars>\n or ,\n] with optional spaces/tabs.
216 $count = 0;
217 $value =
218 preg_replace( '/,([ \t]*[}\]][^"\r\n]*([\r\n]|$)|[ \t]*[\r\n][ \t\r\n]*[}\]])/', '$1',
219 $value, -1, $count );
220 if ( $count > 0 ) {
221 $result = json_decode( $value, $assoc );
222 if ( JSON_ERROR_NONE === json_last_error() ) {
223 // Report warning
224 $st = Status::newGood( $result );
225 $st->warning( wfMessage( 'json-warn-trailing-comma' )->numParams( $count ) );
226 return $st;
227 }
228 }
229 }
230
231 switch ( $code ) {
232 case JSON_ERROR_NONE:
233 return Status::newGood( $result );
234 default:
235 return Status::newFatal( wfMessage( 'json-error-unknown' )->numParams( $code ) );
236 case JSON_ERROR_DEPTH:
237 $msg = 'json-error-depth';
238 break;
239 case JSON_ERROR_STATE_MISMATCH:
240 $msg = 'json-error-state-mismatch';
241 break;
242 case JSON_ERROR_CTRL_CHAR:
243 $msg = 'json-error-ctrl-char';
244 break;
245 case JSON_ERROR_SYNTAX:
246 $msg = 'json-error-syntax';
247 break;
248 case JSON_ERROR_UTF8:
249 $msg = 'json-error-utf8';
250 break;
251 case JSON_ERROR_RECURSION:
252 $msg = 'json-error-recursion';
253 break;
254 case JSON_ERROR_INF_OR_NAN:
255 $msg = 'json-error-inf-or-nan';
256 break;
257 case JSON_ERROR_UNSUPPORTED_TYPE:
258 $msg = 'json-error-unsupported-type';
259 break;
260 }
261 return Status::newFatal( $msg );
262 }
263
272 public static function stripComments( $json ) {
273 // Ensure we have a string
274 $str = (string)$json;
275 $buffer = '';
276 $maxLen = strlen( $str );
277 $mark = 0;
278
279 $inString = false;
280 $inComment = false;
281 $multiline = false;
282
283 for ( $idx = 0; $idx < $maxLen; $idx++ ) {
284 switch ( $str[$idx] ) {
285 case '"':
286 $lookBehind = ( $idx - 1 >= 0 ) ? $str[$idx - 1] : '';
287 if ( !$inComment && $lookBehind !== '\\' ) {
288 // Either started or ended a string
289 $inString = !$inString;
290 }
291 break;
292
293 case '/':
294 $lookAhead = ( $idx + 1 < $maxLen ) ? $str[$idx + 1] : '';
295 $lookBehind = ( $idx - 1 >= 0 ) ? $str[$idx - 1] : '';
296 if ( $inString ) {
297 break;
298
299 } elseif ( !$inComment &&
300 ( $lookAhead === '/' || $lookAhead === '*' )
301 ) {
302 // Transition into a comment
303 // Add characters seen to buffer
304 $buffer .= substr( $str, $mark, $idx - $mark );
305 // Consume the look ahead character
306 $idx++;
307 // Track state
308 $inComment = true;
309 $multiline = $lookAhead === '*';
310
311 } elseif ( $multiline && $lookBehind === '*' ) {
312 // Found the end of the current comment
313 $mark = $idx + 1;
314 $inComment = false;
315 $multiline = false;
316 }
317 break;
318
319 case "\n":
320 if ( $inComment && !$multiline ) {
321 // Found the end of the current comment
322 $mark = $idx + 1;
323 $inComment = false;
324 }
325 break;
326 }
327 }
328 if ( $inComment ) {
329 // Comment ends with input
330 // Technically we should check to ensure that we aren't in
331 // a multiline comment that hasn't been properly ended, but this
332 // is a strip filter, not a validating parser.
333 $mark = $maxLen;
334 }
335 // Add final chunk to buffer before returning
336 return $buffer . substr( $str, $mark, $maxLen - $mark );
337 }
338}
JSON formatter wrapper class.
const UTF8_OK
Skip escaping most characters above U+007F for readability and compactness.
static parse( $value, $options=0)
Decodes a JSON string.
const FORCE_ASSOC
If set, treat json objects '{...}' as associative arrays.
const TRY_FIXING
If set, attempts to fix invalid json.
static encode( $value, $pretty=false, $escaping=0)
Returns the JSON representation of a value.
const STRIP_COMMENTS
If set, strip comments from input before parsing as JSON.
static decode( $value, $assoc=false)
Decodes a JSON string.
static $badChars
Characters problematic in JavaScript.
const ALL_OK
Skip escaping as many characters as reasonably possible.
const WS_CLEANUP_REGEX
Regex that matches whitespace inside empty arrays and objects.
static $badCharsEscaped
Escape sequences for characters listed in FormatJson::$badChars.
const XMLMETA_OK
Skip escaping the characters '<', '>', and '&', which have special meanings in HTML and XML.
static stripComments( $json)
Remove multiline and single line comments from an otherwise valid JSON input string.
This code would result in ircNotify being run twice when an article is and once for brion Hooks can return three possible true was required This is the default since MediaWiki *some string
Definition hooks.txt:181
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:2001
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition hooks.txt:865
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
$buffer