Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 29 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
| Utils | |
0.00% |
0 / 29 |
|
0.00% |
0 / 5 |
156 | |
0.00% |
0 / 1 |
| codepointToUtf8 | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
30 | |||
| hexSequenceToUtf8 | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
| utf8ToHexSequence | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| utf8ToCodepoint | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
| escapeSingleString | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace UtfNormal; |
| 5 | |
| 6 | use InvalidArgumentException; |
| 7 | |
| 8 | /** |
| 9 | * Copyright © 2004 Brooke Vibber <bvibber@pobox.com> |
| 10 | * https://www.mediawiki.org/ |
| 11 | * |
| 12 | * This program is free software; you can redistribute it and/or modify |
| 13 | * it under the terms of the GNU General Public License as published by |
| 14 | * the Free Software Foundation; either version 2 of the License, or |
| 15 | * (at your option) any later version. |
| 16 | * |
| 17 | * This program is distributed in the hope that it will be useful, |
| 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | * GNU General Public License for more details. |
| 21 | * |
| 22 | * You should have received a copy of the GNU General Public License along |
| 23 | * with this program; if not, write to the Free Software Foundation, Inc., |
| 24 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 25 | * http://www.gnu.org/copyleft/gpl.html |
| 26 | * |
| 27 | * @file |
| 28 | */ |
| 29 | |
| 30 | /** |
| 31 | * Some of these functions are adapted from places in MediaWiki. |
| 32 | * Should probably merge them for consistency. |
| 33 | * |
| 34 | * @ingroup UtfNormal |
| 35 | */ |
| 36 | class Utils { |
| 37 | /** |
| 38 | * Return UTF-8 sequence for a given Unicode code point. |
| 39 | * |
| 40 | * @param int $codepoint |
| 41 | * @return string |
| 42 | * @throws InvalidArgumentException if fed out of range data. |
| 43 | */ |
| 44 | public static function codepointToUtf8( $codepoint ) { |
| 45 | // In PHP 7.2, mb_chr is buggy when $codepoint is 0 (null byte) |
| 46 | if ( $codepoint === 0 ) { |
| 47 | return "\u{0000}"; |
| 48 | } |
| 49 | // In PHP >=7.4, mb_chr fails when $codepoint is in surrogate range |
| 50 | // U+D800 - U+DBFF / U+DC00 - U+DFFF |
| 51 | if ( $codepoint >= 0xD800 && $codepoint <= 0xDFFF ) { |
| 52 | // UTF-8 encoding of the codepoint, the hard way. |
| 53 | return ( |
| 54 | chr( 0xED ) . |
| 55 | chr( 0x80 | ( ( $codepoint >> 6 ) & 0x3F ) ) . |
| 56 | chr( 0x80 | ( $codepoint & 0x3F ) ) |
| 57 | ); |
| 58 | } |
| 59 | $char = mb_chr( $codepoint ); |
| 60 | if ( $char === false ) { |
| 61 | throw new InvalidArgumentException( "Asked for code outside of range ($codepoint)" ); |
| 62 | } |
| 63 | |
| 64 | return $char; |
| 65 | } |
| 66 | |
| 67 | /** |
| 68 | * Take a series of space-separated hexadecimal numbers representing |
| 69 | * Unicode code points and return a UTF-8 string composed of those |
| 70 | * characters. Used by UTF-8 data generation and testing routines. |
| 71 | * |
| 72 | * @param string $sequence |
| 73 | * @return string |
| 74 | * @throws InvalidArgumentException if fed out of range data. |
| 75 | * @private Used in tests and data table generation |
| 76 | */ |
| 77 | public static function hexSequenceToUtf8( $sequence ) { |
| 78 | $utf = ''; |
| 79 | foreach ( explode( ' ', $sequence ) as $hex ) { |
| 80 | $n = hexdec( $hex ); |
| 81 | $utf .= self::codepointToUtf8( $n ); |
| 82 | } |
| 83 | |
| 84 | return $utf; |
| 85 | } |
| 86 | |
| 87 | /** |
| 88 | * Take a UTF-8 string and return a space-separated series of hex |
| 89 | * numbers representing Unicode code points. For debugging. |
| 90 | * |
| 91 | * @param string $str UTF-8 string. |
| 92 | * @return string |
| 93 | * @private |
| 94 | */ |
| 95 | private static function utf8ToHexSequence( $str ) { |
| 96 | $buf = ''; |
| 97 | foreach ( preg_split( '//u', $str, -1, PREG_SPLIT_NO_EMPTY ) as $cp ) { |
| 98 | $buf .= sprintf( '%04x ', mb_ord( $cp ) ); |
| 99 | } |
| 100 | |
| 101 | return rtrim( $buf ); |
| 102 | } |
| 103 | |
| 104 | /** |
| 105 | * Determine the Unicode codepoint of a single-character UTF-8 sequence. |
| 106 | * Does not check for invalid input data. |
| 107 | * |
| 108 | * @deprecated since 2.1, use mb_ord() |
| 109 | * |
| 110 | * @param string $char |
| 111 | * @return int|false |
| 112 | */ |
| 113 | public static function utf8ToCodepoint( $char ) { |
| 114 | return mb_strlen( $char ) > 1 ? false : mb_ord( $char ); |
| 115 | } |
| 116 | |
| 117 | /** |
| 118 | * Escape a string for inclusion in a PHP single-quoted string literal. |
| 119 | * |
| 120 | * @param string $string String to be escaped. |
| 121 | * @return string Escaped string. |
| 122 | */ |
| 123 | public static function escapeSingleString( $string ) { |
| 124 | return strtr( |
| 125 | $string, |
| 126 | [ |
| 127 | '\\' => '\\\\', |
| 128 | '\'' => '\\\'' |
| 129 | ] |
| 130 | ); |
| 131 | } |
| 132 | } |