All files unicodejs.js

100% Statements 74/74
100% Branches 44/44
100% Functions 7/7
100% Lines 72/72

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218              1x                             1x 16472x                 1x 12547x                     7990x                         3231x 807x   2424x 2424x 1047x   1377x                                                       680x 680x       680x 680x     680x 680x     680x 663x     17x         17x   17x     17x 7x   17x 15x   17x 15x   17x                           1x 64x 64x 64x   64x 3918x     3918x 1860x 1381x 2x   1379x 1379x     479x 1x     478x 478x     478x 478x         2058x 2058x 2058x 1x   2057x 1x     2056x 4x     2052x   1372x 680x   5x 5x     675x       2052x 927x 927x 927x 927x         55x 13x 42x 34x   55x      
/*!
 * UnicodeJS namespace
 *
 * @copyright 2013-2018 UnicodeJS team and others; see AUTHORS.txt
 * @license The MIT License (MIT); see LICENSE.txt
 */
 
( function () {
 
	/**
	 * Namespace for all UnicodeJS classes, static methods and static properties.
	 *
	 * @class unicodeJS
	 * @singleton
	 */
 
	/**
	 * Check if a code unit is a the leading half of a surrogate pair
	 *
	 * @param {string} unit Code unit
	 * @return {boolean}
	 */
	unicodeJS.isLeadingSurrogate = function ( unit ) {
		return unit && unit.match( /^[\uD800-\uDBFF]$/ );
	};
 
	/**
	 * Check if a code unit is a the trailing half of a surrogate pair
	 *
	 * @param {string} unit Code unit
	 * @return {boolean}
	 */
	unicodeJS.isTrailingSurrogate = function ( unit ) {
		return unit && unit.match( /^[\uDC00-\uDFFF]$/ );
	};
 
	/**
	 * Write a UTF-16 code unit as a javascript string literal.
	 *
	 * @private
	 * @param {number} codeUnit integer between 0x0000 and 0xFFFF
	 * @return {string} String literal ('\u' followed by 4 hex digits)
	 */
	function uEsc( codeUnit ) {
		return '\\u' + ( codeUnit + 0x10000 ).toString( 16 ).slice( -4 );
	}
 
	/**
	 * Return a regexp string for the code unit range min-max
	 *
	 * @private
	 * @param {number} min the minimum code unit in the range.
	 * @param {number} max the maximum code unit in the range.
	 * @param {boolean} [bracket] If true, then wrap range in [ ... ]
	 * @return {string} Regexp string which matches the range
	 */
	function codeUnitRange( min, max, bracket ) {
		if ( min === max ) { // single code unit: never bracket
			return uEsc( min );
		}
		var value = uEsc( min ) + '-' + uEsc( max );
		if ( bracket ) {
			return '[' + value + ']';
		} else {
			return value;
		}
	}
 
	/**
	 * Get a list of boxes in hi-lo surrogate space, corresponding to the given character range
	 *
	 * A box {hi: [x, y], lo: [z, w]} represents a regex [x-y][z-w] to match a surrogate pair
	 *
	 * Suppose ch1 and ch2 have surrogate pairs (hi1, lo1) and (hi2, lo2).
	 * Then the range of chars from ch1 to ch2 can be represented as the
	 * disjunction of three code unit ranges:
	 *
	 *     [hi1 - hi1][lo1 - 0xDFFF]
	 *      |
	 *     [hi1+1 - hi2-1][0xDC00 - 0xDFFF]
	 *      |
	 *     [hi2 - hi2][0xD800 - lo2]
	 *
	 * Often the notation can be optimised (e.g. when hi1 == hi2).
	 *
	 * @private
	 * @param {number} ch1 The min character of the range; must be over 0xFFFF
	 * @param {number} ch2 The max character of the range; must be at least ch1
	 * @return {Array} A list of boxes {hi: [x, y], lo: [z, w]}
	 */
	function getCodeUnitBoxes( ch1, ch2 ) {
 
		var loMin = 0xDC00;
		var loMax = 0xDFFF;
 
		// hi and lo surrogates for ch1
		/* eslint-disable no-bitwise */
		var hi1 = 0xD800 + ( ( ch1 - 0x10000 ) >> 10 );
		var lo1 = 0xDC00 + ( ( ch1 - 0x10000 ) & 0x3FF );
 
		// hi and lo surrogates for ch2
		var hi2 = 0xD800 + ( ( ch2 - 0x10000 ) >> 10 );
		var lo2 = 0xDC00 + ( ( ch2 - 0x10000 ) & 0x3FF );
		/* eslint-enable no-bitwise */
 
		if ( hi1 === hi2 ) {
			return [ { hi: [ hi1, hi2 ], lo: [ lo1, lo2 ] } ];
		}
 
		var boxes = [];
 
		/* eslint-disable no-bitwise */
 
		// minimum hi surrogate which only represents characters >= ch1
		var hiMinAbove = 0xD800 + ( ( ch1 - 0x10000 + 0x3FF ) >> 10 );
		// maximum hi surrogate which only represents characters <= ch2
		var hiMaxBelow = 0xD800 + ( ( ch2 - 0x10000 - 0x3FF ) >> 10 );
		/* eslint-enable no-bitwise */
 
		if ( hi1 < hiMinAbove ) {
			boxes.push( { hi: [ hi1, hi1 ], lo: [ lo1, loMax ] } );
		}
		if ( hiMinAbove <= hiMaxBelow ) {
			boxes.push( { hi: [ hiMinAbove, hiMaxBelow ], lo: [ loMin, loMax ] } );
		}
		if ( hiMaxBelow < hi2 ) {
			boxes.push( { hi: [ hi2, hi2 ], lo: [ loMin, lo2 ] } );
		}
		return boxes;
	}
 
	/**
	 * Make a regexp string for an array of Unicode character ranges.
	 *
	 * If either character in a range is above 0xFFFF, then the range will
	 * be encoded as multiple surrogate pair ranges. It is an error for a
	 * range to overlap with the surrogate range 0xD800-0xDFFF (as this would
	 * only match ill-formed strings).
	 *
	 * @param {Array} ranges Array of ranges, each of which is a character or an interval
	 * @return {string} Regexp string for the disjunction of the ranges.
	 */
	unicodeJS.charRangeArrayRegexp = function ( ranges ) {
		var boxes = [],
			characterClass = [], // list of (\uXXXX code unit or interval), for BMP
			disjunction = []; // list of regex strings, to be joined with '|'
 
		for ( var i = 0; i < ranges.length; i++ ) {
			var range = ranges[ i ];
 
			// Handle single code unit
			if ( typeof range === 'number' ) {
				if ( range <= 0xFFFF ) {
					if ( range >= 0xD800 && range <= 0xDFFF ) {
						throw new Error( 'Surrogate: ' + range.toString( 16 ) );
					}
					characterClass.push( uEsc( range ) );
					continue;
				} else {
					// Handle single surrogate pair
					if ( range > 0x10FFFF ) {
						throw new Error( 'Character code too high: ' + range.toString( 16 ) );
					}
					/* eslint-disable no-bitwise */
					var hi = 0xD800 + ( ( range - 0x10000 ) >> 10 );
					var lo = 0xDC00 + ( ( range - 0x10000 ) & 0x3FF );
					/* eslint-enable no-bitwise */
 
					disjunction.push( uEsc( hi ) + uEsc( lo ) );
					continue;
				}
			}
 
			// Handle interval
			var min = range[ 0 ];
			var max = range[ 1 ];
			if ( min > max ) {
				throw new Error( min.toString( 16 ) + ' > ' + max.toString( 16 ) );
			}
			if ( max > 0x10FFFF ) {
				throw new Error( 'Character code too high: ' +
					max.toString( 16 ) );
			}
			if ( max >= 0xD800 && min <= 0xDFFF ) {
				throw new Error( 'range includes surrogates: ' +
					min.toString( 16 ) + '-' + max.toString( 16 ) );
			}
			if ( max <= 0xFFFF ) {
				// interval is entirely BMP
				characterClass.push( codeUnitRange( min, max ) );
			} else if ( min <= 0xFFFF ) {
				// interval is BMP and non-BMP
				characterClass.push( codeUnitRange( min, 0xFFFF ) );
				boxes = getCodeUnitBoxes( 0x10000, max );
			} else {
				// interval is entirely non-BMP
				boxes = getCodeUnitBoxes( min, max );
			}
 
			// append hi-lo surrogate space boxes as code unit range pairs
			for ( var j = 0; j < boxes.length; j++ ) {
				var box = boxes[ j ];
				var hi2 = codeUnitRange( box.hi[ 0 ], box.hi[ 1 ], true );
				var lo2 = codeUnitRange( box.lo[ 0 ], box.lo[ 1 ], true );
				disjunction.push( hi2 + lo2 );
			}
		}
 
		// prepend BMP character class to the disjunction
		if ( characterClass.length === 1 && !characterClass[ 0 ].match( /-/ ) ) {
			disjunction.unshift( characterClass[ 0 ] ); // single character
		} else if ( characterClass.length > 0 ) {
			disjunction.unshift( '[' + characterClass.join( '' ) + ']' );
		}
		return disjunction.join( '|' );
	};
}() );