Code Coverage for /src/MediaWiki/Sniffs/AlternativeSyntax/UnicodeEscapeSniff.php

	Code Coverage
	Lines			Functions and Methods				Classes and Traits
Total	0.00% covered (danger)	0.00%	0 / 60	0.00% covered (danger)	0.00%	0 / 2	CRAP	0.00% covered (danger)	0.00%	0 / 1
UnicodeEscapeSniff	0.00% covered (danger)	0.00%	0 / 60	0.00% covered (danger)	0.00%	0 / 2	420	0.00% covered (danger)	0.00%	0 / 1
register	0.00% covered (danger)	0.00%	0 / 5	0.00% covered (danger)	0.00%	0 / 1	2
process	0.00% covered (danger)	0.00%	0 / 55	0.00% covered (danger)	0.00%	0 / 1	380

1	<?php
2	/**
3	* This program is free software; you can redistribute it and/or modify
4	* it under the terms of the GNU General Public License as published by
5	* the Free Software Foundation; either version 2 of the License, or
6	* (at your option) any later version.
7	*
8	* This program is distributed in the hope that it will be useful,
9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11	* GNU General Public License for more details.
12	*
13	* You should have received a copy of the GNU General Public License along
14	* with this program; if not, write to the Free Software Foundation, Inc.,
15	* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16	* http://www.gnu.org/copyleft/gpl.html
17	*
18	* @file
19	*/
20
21	namespace MediaWiki\Sniffs\AlternativeSyntax;
22
23	use PHP_CodeSniffer\Files\File;
24	use PHP_CodeSniffer\Sniffs\Sniff;
25
26	class UnicodeEscapeSniff implements Sniff {
27
28	/**
29	* @inheritDoc
30	*/
31	public function register(): array {
32	return [
33	T_CONSTANT_ENCAPSED_STRING,
34	T_DOUBLE_QUOTED_STRING,
35	T_START_HEREDOC,
36	];
37	}
38
39	/**
40	* @param File $phpcsFile File
41	* @param int $stackPtr Location
42	* @return int
43	*/
44	public function process( File $phpcsFile, $stackPtr ) {
45	$tokens = $phpcsFile->getTokens();
46
47	// Find the end of the string.
48	$endPtr = $phpcsFile->findNext(
49	/* types */ [ $tokens[$stackPtr]['code'], T_HEREDOC, T_END_HEREDOC ],
50	/* start */ $stackPtr + 1,
51	/* end */ null,
52	/* exclude */ true
53	) ?: $phpcsFile->numTokens;
54
55	if ( $tokens[$endPtr - 1]['code'] === T_END_HEREDOC ) {
56	if ( isset( $tokens[$endPtr] ) && $tokens[$endPtr]['code'] === T_SEMICOLON ) {
57	++$endPtr;
58	}
59	if ( isset( $tokens[$endPtr] ) && $tokens[$endPtr]['code'] === T_WHITESPACE ) {
60	++$endPtr;
61	}
62	}
63
64	// If this is a single-quoted string, skip it.
65	if ( $tokens[$stackPtr]['code'] === T_CONSTANT_ENCAPSED_STRING &&
66	$tokens[$stackPtr]['content'][0] === "'"
67	) {
68	return $endPtr;
69	}
70
71	// If the string takes up multiple lines, PHP_CodeSniffer would
72	// have split some of its tokens. Recombine the string's tokens
73	// so the next step will work.
74	$content = $phpcsFile->getTokensAsString( $stackPtr, $endPtr - $stackPtr );
75
76	// If the string contains braced expressions, PHP_CodeSniffer
77	// would have combined these and surrounding tokens, which could
78	// lead to false matches. Avoid this by retokenizing the string.
79	$origTokens = token_get_all( '<?php ' . $content );
80	$warn = false;
81	$content = '';
82	foreach ( $origTokens as $i => $origToken ) {
83	// Skip the PHP opening tag we added.
84	if ( $i === 0 ) {
85	continue;
86	}
87
88	// Don't check tokens that cannot contain escape sequences.
89	$origToken = (array)$origToken;
90	if ( !(
91	$origToken[0] === T_ENCAPSED_AND_WHITESPACE \|\|
92	( $origToken[0] === T_CONSTANT_ENCAPSED_STRING && $origToken[1][0] !== "'" )
93	) ) {
94	$content .= $origToken[1] ?? $origToken[0];
95	continue;
96	}
97
98	// Check for Unicode escape sequences in the token, explicitly
99	// skipping escaped backslashes to prevent false matches.
100	$content .= preg_replace_callback(
101	'/\\\\(?:u\{([0-9A-Fa-f]+)\}\|\\\\(SKIP)(FAIL))/',
102	static function ( array $m ) use ( &$warn ) {
103	// Decode the codepoint-digits.
104	$cp = hexdec( $m[1] );
105	if ( $cp > 0x10FFFF ) {
106	// This is a parse error. Don't offer to fix it.
107	return $m[0];
108	}
109
110	// Check the codepoint-digits against the expected format.
111	$hex = sprintf( '%04X', $cp );
112	if ( $m[1] === $hex ) {
113	// Keep the conforming escape sequence as-is.
114	return $m[0];
115	}
116
117	// Print a warning for the token containing the nonconforming
118	// escape sequence and replace it with a conforming one.
119	$warn = true;
120	return '\u{' . $hex . '}';
121	},
122	$origToken[1]
123	);
124	}
125
126	if ( $warn ) {
127	$fix = $phpcsFile->addFixableWarning(
128	'Unicode code points should be expressed using four to six uppercase hex ' .
129	'digits, with leading zeros used only as necessary for \u{0FFF} and below',
130	$stackPtr,
131	'DigitsNotNormalized'
132	);
133	if ( $fix ) {
134	$phpcsFile->fixer->beginChangeset();
135	$phpcsFile->fixer->replaceToken( $stackPtr, $content );
136	for ( $i = $stackPtr + 1; $i < $endPtr; ++$i ) {
137	$phpcsFile->fixer->replaceToken( $i, '' );
138	}
139	$phpcsFile->fixer->endChangeset();
140	}
141	}
142
143	return $endPtr;
144	}
145
146	}