Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 60 |
|
0.00% |
0 / 2 |
CRAP | |
0.00% |
0 / 1 |
UnicodeEscapeSniff | |
0.00% |
0 / 60 |
|
0.00% |
0 / 2 |
420 | |
0.00% |
0 / 1 |
register | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
process | |
0.00% |
0 / 55 |
|
0.00% |
0 / 1 |
380 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | namespace MediaWiki\Sniffs\AlternativeSyntax; |
22 | |
23 | use PHP_CodeSniffer\Files\File; |
24 | use PHP_CodeSniffer\Sniffs\Sniff; |
25 | |
26 | class UnicodeEscapeSniff implements Sniff { |
27 | |
28 | /** |
29 | * @inheritDoc |
30 | */ |
31 | public function register(): array { |
32 | return [ |
33 | T_CONSTANT_ENCAPSED_STRING, |
34 | T_DOUBLE_QUOTED_STRING, |
35 | T_START_HEREDOC, |
36 | ]; |
37 | } |
38 | |
39 | /** |
40 | * @param File $phpcsFile File |
41 | * @param int $stackPtr Location |
42 | * @return int |
43 | */ |
44 | public function process( File $phpcsFile, $stackPtr ) { |
45 | $tokens = $phpcsFile->getTokens(); |
46 | |
47 | // Find the end of the string. |
48 | $endPtr = $phpcsFile->findNext( |
49 | /* types */ [ $tokens[$stackPtr]['code'], T_HEREDOC, T_END_HEREDOC ], |
50 | /* start */ $stackPtr + 1, |
51 | /* end */ null, |
52 | /* exclude */ true |
53 | ) ?: $phpcsFile->numTokens; |
54 | |
55 | if ( $tokens[$endPtr - 1]['code'] === T_END_HEREDOC ) { |
56 | if ( isset( $tokens[$endPtr] ) && $tokens[$endPtr]['code'] === T_SEMICOLON ) { |
57 | ++$endPtr; |
58 | } |
59 | if ( isset( $tokens[$endPtr] ) && $tokens[$endPtr]['code'] === T_WHITESPACE ) { |
60 | ++$endPtr; |
61 | } |
62 | } |
63 | |
64 | // If this is a single-quoted string, skip it. |
65 | if ( $tokens[$stackPtr]['code'] === T_CONSTANT_ENCAPSED_STRING && |
66 | $tokens[$stackPtr]['content'][0] === "'" |
67 | ) { |
68 | return $endPtr; |
69 | } |
70 | |
71 | // If the string takes up multiple lines, PHP_CodeSniffer would |
72 | // have split some of its tokens. Recombine the string's tokens |
73 | // so the next step will work. |
74 | $content = $phpcsFile->getTokensAsString( $stackPtr, $endPtr - $stackPtr ); |
75 | |
76 | // If the string contains braced expressions, PHP_CodeSniffer |
77 | // would have combined these and surrounding tokens, which could |
78 | // lead to false matches. Avoid this by retokenizing the string. |
79 | $origTokens = token_get_all( '<?php ' . $content ); |
80 | $warn = false; |
81 | $content = ''; |
82 | foreach ( $origTokens as $i => $origToken ) { |
83 | // Skip the PHP opening tag we added. |
84 | if ( $i === 0 ) { |
85 | continue; |
86 | } |
87 | |
88 | // Don't check tokens that cannot contain escape sequences. |
89 | $origToken = (array)$origToken; |
90 | if ( !( |
91 | $origToken[0] === T_ENCAPSED_AND_WHITESPACE || |
92 | ( $origToken[0] === T_CONSTANT_ENCAPSED_STRING && $origToken[1][0] !== "'" ) |
93 | ) ) { |
94 | $content .= $origToken[1] ?? $origToken[0]; |
95 | continue; |
96 | } |
97 | |
98 | // Check for Unicode escape sequences in the token, explicitly |
99 | // skipping escaped backslashes to prevent false matches. |
100 | $content .= preg_replace_callback( |
101 | '/\\\\(?:u\{([0-9A-Fa-f]+)\}|\\\\(*SKIP)(*FAIL))/', |
102 | static function ( array $m ) use ( &$warn ) { |
103 | // Decode the codepoint-digits. |
104 | $cp = hexdec( $m[1] ); |
105 | if ( $cp > 0x10FFFF ) { |
106 | // This is a parse error. Don't offer to fix it. |
107 | return $m[0]; |
108 | } |
109 | |
110 | // Check the codepoint-digits against the expected format. |
111 | $hex = sprintf( '%04X', $cp ); |
112 | if ( $m[1] === $hex ) { |
113 | // Keep the conforming escape sequence as-is. |
114 | return $m[0]; |
115 | } |
116 | |
117 | // Print a warning for the token containing the nonconforming |
118 | // escape sequence and replace it with a conforming one. |
119 | $warn = true; |
120 | return '\u{' . $hex . '}'; |
121 | }, |
122 | $origToken[1] |
123 | ); |
124 | } |
125 | |
126 | if ( $warn ) { |
127 | $fix = $phpcsFile->addFixableWarning( |
128 | 'Unicode code points should be expressed using four to six uppercase hex ' . |
129 | 'digits, with leading zeros used only as necessary for \u{0FFF} and below', |
130 | $stackPtr, |
131 | 'DigitsNotNormalized' |
132 | ); |
133 | if ( $fix ) { |
134 | $phpcsFile->fixer->beginChangeset(); |
135 | $phpcsFile->fixer->replaceToken( $stackPtr, $content ); |
136 | for ( $i = $stackPtr + 1; $i < $endPtr; ++$i ) { |
137 | $phpcsFile->fixer->replaceToken( $i, '' ); |
138 | } |
139 | $phpcsFile->fixer->endChangeset(); |
140 | } |
141 | } |
142 | |
143 | return $endPtr; |
144 | } |
145 | |
146 | } |