All files unicodejs.js

100% Statements 74/74
100% Branches 44/44
100% Functions 7/7
100% Lines 72/72
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222  
 
 
 
 
 
 
1x
 
 
 
 
 
 
 
 
 
 
 
 
 
1x
16472x
 
 
 
 
 
 
 
 
1x
12547x
 
 
 
 
 
 
 
 
 
 
 
8724x
 
 
 
 
 
 
 
 
 
 
 
 
 
3618x
1002x
 
2616x
2616x
1244x
 
1372x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
762x
762x
 
 
 
762x
762x
 
 
762x
762x
 
 
762x
744x
 
 
18x
 
 
 
 
18x
 
18x
 
 
18x
8x
 
18x
16x
 
18x
16x
 
18x
 
 
 
 
 
 
 
 
 
 
 
 
 
1x
64x
64x
64x
 
64x
4085x
 
 
4085x
1950x
1406x
2x
 
1404x
1404x
 
 
544x
1x
 
 
543x
543x
 
 
543x
543x
 
 
 
 
2135x
2135x
2135x
1x
 
2134x
1x
 
 
2133x
4x
 
 
2129x
 
1367x
762x
 
5x
5x
 
 
757x
 
 
 
2129x
1123x
1123x
1123x
1123x
 
 
 
 
55x
13x
42x
34x
 
55x
 
 
  /*!
 * UnicodeJS namespace
 *
 * @copyright 2013-2018 UnicodeJS team and others; see AUTHORS.txt
 * @license The MIT License (MIT); see LICENSE.txt
 */
 
( function () {
 
	/**
	 * Namespace for all UnicodeJS classes, static methods and static properties.
	 *
	 * @namespace unicodeJS
	 */
 
	/**
	 * Check if a code unit is a the leading half of a surrogate pair
	 *
	 * @param {string} unit Code unit
	 * @return {boolean}
	 */
	unicodeJS.isLeadingSurrogate = function ( unit ) {
		return unit && unit.match( /^[\uD800-\uDBFF]$/ );
	};
 
	/**
	 * Check if a code unit is a the trailing half of a surrogate pair
	 *
	 * @param {string} unit Code unit
	 * @return {boolean}
	 */
	unicodeJS.isTrailingSurrogate = function ( unit ) {
		return unit && unit.match( /^[\uDC00-\uDFFF]$/ );
	};
 
	/**
	 * Write a UTF-16 code unit as a javascript string literal.
	 *
	 * @memberof unicodeJS
	 * @private
	 * @param {number} codeUnit integer between 0x0000 and 0xFFFF
	 * @return {string} String literal ('\u' followed by 4 hex digits)
	 */
	function uEsc( codeUnit ) {
		return '\\u' + ( codeUnit + 0x10000 ).toString( 16 ).slice( -4 );
	}
 
	/**
	 * Return a regexp string for the code unit range min-max
	 *
	 * @memberof unicodeJS
	 * @private
	 * @param {number} min the minimum code unit in the range.
	 * @param {number} max the maximum code unit in the range.
	 * @param {boolean} [bracket] If true, then wrap range in [ ... ]
	 * @return {string} Regexp string which matches the range
	 */
	function codeUnitRange( min, max, bracket ) {
		if ( min === max ) { // single code unit: never bracket
			return uEsc( min );
		}
		var value = uEsc( min ) + '-' + uEsc( max );
		if ( bracket ) {
			return '[' + value + ']';
		} else {
			return value;
		}
	}
 
	/**
	 * Get a list of boxes in hi-lo surrogate space, corresponding to the given character range
	 *
	 * A box {hi: [x, y], lo: [z, w]} represents a regex [x-y][z-w] to match a surrogate pair
	 *
	 * Suppose ch1 and ch2 have surrogate pairs (hi1, lo1) and (hi2, lo2).
	 * Then the range of chars from ch1 to ch2 can be represented as the
	 * disjunction of three code unit ranges:
	 *
	 *     [hi1 - hi1][lo1 - 0xDFFF]
	 *      |
	 *     [hi1+1 - hi2-1][0xDC00 - 0xDFFF]
	 *      |
	 *     [hi2 - hi2][0xD800 - lo2]
	 *
	 * Often the notation can be optimised (e.g. when hi1 == hi2).
	 *
	 * @memberof unicodeJS
	 * @private
	 * @param {number} ch1 The min character of the range; must be over 0xFFFF
	 * @param {number} ch2 The max character of the range; must be at least ch1
	 * @return {Array.<Object>} A list of boxes where each box is an object with two properties: 'hi' and 'lo'.
	 *  'hi' is an array of two numbers representing the range of the high surrogate.
	 *  'lo' is an array of two numbers representing the range of the low surrogate.
	 */
	function getCodeUnitBoxes( ch1, ch2 ) {
 
		var loMin = 0xDC00;
		var loMax = 0xDFFF;
 
		// hi and lo surrogates for ch1
		/* eslint-disable no-bitwise */
		var hi1 = 0xD800 + ( ( ch1 - 0x10000 ) >> 10 );
		var lo1 = 0xDC00 + ( ( ch1 - 0x10000 ) & 0x3FF );
 
		// hi and lo surrogates for ch2
		var hi2 = 0xD800 + ( ( ch2 - 0x10000 ) >> 10 );
		var lo2 = 0xDC00 + ( ( ch2 - 0x10000 ) & 0x3FF );
		/* eslint-enable no-bitwise */
 
		if ( hi1 === hi2 ) {
			return [ { hi: [ hi1, hi2 ], lo: [ lo1, lo2 ] } ];
		}
 
		var boxes = [];
 
		/* eslint-disable no-bitwise */
 
		// minimum hi surrogate which only represents characters >= ch1
		var hiMinAbove = 0xD800 + ( ( ch1 - 0x10000 + 0x3FF ) >> 10 );
		// maximum hi surrogate which only represents characters <= ch2
		var hiMaxBelow = 0xD800 + ( ( ch2 - 0x10000 - 0x3FF ) >> 10 );
		/* eslint-enable no-bitwise */
 
		if ( hi1 < hiMinAbove ) {
			boxes.push( { hi: [ hi1, hi1 ], lo: [ lo1, loMax ] } );
		}
		if ( hiMinAbove <= hiMaxBelow ) {
			boxes.push( { hi: [ hiMinAbove, hiMaxBelow ], lo: [ loMin, loMax ] } );
		}
		if ( hiMaxBelow < hi2 ) {
			boxes.push( { hi: [ hi2, hi2 ], lo: [ loMin, lo2 ] } );
		}
		return boxes;
	}
 
	/**
	 * Make a regexp string for an array of Unicode character ranges.
	 *
	 * If either character in a range is above 0xFFFF, then the range will
	 * be encoded as multiple surrogate pair ranges. It is an error for a
	 * range to overlap with the surrogate range 0xD800-0xDFFF (as this would
	 * only match ill-formed strings).
	 *
	 * @param {Array} ranges Array of ranges, each of which is a character or an interval
	 * @return {string} Regexp string for the disjunction of the ranges.
	 */
	unicodeJS.charRangeArrayRegexp = function ( ranges ) {
		var boxes = [],
			characterClass = [], // list of (\uXXXX code unit or interval), for BMP
			disjunction = []; // list of regex strings, to be joined with '|'
 
		for ( var i = 0; i < ranges.length; i++ ) {
			var range = ranges[ i ];
 
			// Handle single code unit
			if ( typeof range === 'number' ) {
				if ( range <= 0xFFFF ) {
					if ( range >= 0xD800 && range <= 0xDFFF ) {
						throw new Error( 'Surrogate: ' + range.toString( 16 ) );
					}
					characterClass.push( uEsc( range ) );
					continue;
				} else {
					// Handle single surrogate pair
					if ( range > 0x10FFFF ) {
						throw new Error( 'Character code too high: ' + range.toString( 16 ) );
					}
					/* eslint-disable no-bitwise */
					var hi = 0xD800 + ( ( range - 0x10000 ) >> 10 );
					var lo = 0xDC00 + ( ( range - 0x10000 ) & 0x3FF );
					/* eslint-enable no-bitwise */
 
					disjunction.push( uEsc( hi ) + uEsc( lo ) );
					continue;
				}
			}
 
			// Handle interval
			var min = range[ 0 ];
			var max = range[ 1 ];
			if ( min > max ) {
				throw new Error( min.toString( 16 ) + ' > ' + max.toString( 16 ) );
			}
			if ( max > 0x10FFFF ) {
				throw new Error( 'Character code too high: ' +
					max.toString( 16 ) );
			}
			if ( max >= 0xD800 && min <= 0xDFFF ) {
				throw new Error( 'range includes surrogates: ' +
					min.toString( 16 ) + '-' + max.toString( 16 ) );
			}
			if ( max <= 0xFFFF ) {
				// interval is entirely BMP
				characterClass.push( codeUnitRange( min, max ) );
			} else if ( min <= 0xFFFF ) {
				// interval is BMP and non-BMP
				characterClass.push( codeUnitRange( min, 0xFFFF ) );
				boxes = getCodeUnitBoxes( 0x10000, max );
			} else {
				// interval is entirely non-BMP
				boxes = getCodeUnitBoxes( min, max );
			}
 
			// append hi-lo surrogate space boxes as code unit range pairs
			for ( var j = 0; j < boxes.length; j++ ) {
				var box = boxes[ j ];
				var hi2 = codeUnitRange( box.hi[ 0 ], box.hi[ 1 ], true );
				var lo2 = codeUnitRange( box.lo[ 0 ], box.lo[ 1 ], true );
				disjunction.push( hi2 + lo2 );
			}
		}
 
		// prepend BMP character class to the disjunction
		if ( characterClass.length === 1 && !characterClass[ 0 ].match( /-/ ) ) {
			disjunction.unshift( characterClass[ 0 ] ); // single character
		} else if ( characterClass.length > 0 ) {
			disjunction.unshift( '[' + characterClass.join( '' ) + ']' );
		}
		return disjunction.join( '|' );
	};
}() );