Press n or j to go to the next uncovered block, b, p or k for the previous block.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 | 1x 1x 16472x 1x 12547x 8724x 3618x 1002x 2616x 2616x 1244x 1372x 762x 762x 762x 762x 762x 762x 762x 744x 18x 18x 18x 18x 8x 18x 16x 18x 16x 18x 1x 64x 64x 64x 64x 4085x 4085x 1950x 1406x 2x 1404x 1404x 544x 1x 543x 543x 543x 543x 2135x 2135x 2135x 1x 2134x 1x 2133x 4x 2129x 1367x 762x 5x 5x 757x 2129x 1123x 1123x 1123x 1123x 55x 13x 42x 34x 55x | /*! * UnicodeJS namespace * * @copyright 2013-2018 UnicodeJS team and others; see AUTHORS.txt * @license The MIT License (MIT); see LICENSE.txt */ ( function () { /** * Namespace for all UnicodeJS classes, static methods and static properties. * * @namespace unicodeJS */ /** * Check if a code unit is a the leading half of a surrogate pair * * @param {string} unit Code unit * @return {boolean} */ unicodeJS.isLeadingSurrogate = function ( unit ) { return unit && unit.match( /^[\uD800-\uDBFF]$/ ); }; /** * Check if a code unit is a the trailing half of a surrogate pair * * @param {string} unit Code unit * @return {boolean} */ unicodeJS.isTrailingSurrogate = function ( unit ) { return unit && unit.match( /^[\uDC00-\uDFFF]$/ ); }; /** * Write a UTF-16 code unit as a javascript string literal. * * @memberof unicodeJS * @private * @param {number} codeUnit integer between 0x0000 and 0xFFFF * @return {string} String literal ('\u' followed by 4 hex digits) */ function uEsc( codeUnit ) { return '\\u' + ( codeUnit + 0x10000 ).toString( 16 ).slice( -4 ); } /** * Return a regexp string for the code unit range min-max * * @memberof unicodeJS * @private * @param {number} min the minimum code unit in the range. * @param {number} max the maximum code unit in the range. * @param {boolean} [bracket] If true, then wrap range in [ ... ] * @return {string} Regexp string which matches the range */ function codeUnitRange( min, max, bracket ) { if ( min === max ) { // single code unit: never bracket return uEsc( min ); } var value = uEsc( min ) + '-' + uEsc( max ); if ( bracket ) { return '[' + value + ']'; } else { return value; } } /** * Get a list of boxes in hi-lo surrogate space, corresponding to the given character range * * A box {hi: [x, y], lo: [z, w]} represents a regex [x-y][z-w] to match a surrogate pair * * Suppose ch1 and ch2 have surrogate pairs (hi1, lo1) and (hi2, lo2). * Then the range of chars from ch1 to ch2 can be represented as the * disjunction of three code unit ranges: * * [hi1 - hi1][lo1 - 0xDFFF] * | * [hi1+1 - hi2-1][0xDC00 - 0xDFFF] * | * [hi2 - hi2][0xD800 - lo2] * * Often the notation can be optimised (e.g. when hi1 == hi2). * * @memberof unicodeJS * @private * @param {number} ch1 The min character of the range; must be over 0xFFFF * @param {number} ch2 The max character of the range; must be at least ch1 * @return {Array.<Object>} A list of boxes where each box is an object with two properties: 'hi' and 'lo'. * 'hi' is an array of two numbers representing the range of the high surrogate. * 'lo' is an array of two numbers representing the range of the low surrogate. */ function getCodeUnitBoxes( ch1, ch2 ) { var loMin = 0xDC00; var loMax = 0xDFFF; // hi and lo surrogates for ch1 /* eslint-disable no-bitwise */ var hi1 = 0xD800 + ( ( ch1 - 0x10000 ) >> 10 ); var lo1 = 0xDC00 + ( ( ch1 - 0x10000 ) & 0x3FF ); // hi and lo surrogates for ch2 var hi2 = 0xD800 + ( ( ch2 - 0x10000 ) >> 10 ); var lo2 = 0xDC00 + ( ( ch2 - 0x10000 ) & 0x3FF ); /* eslint-enable no-bitwise */ if ( hi1 === hi2 ) { return [ { hi: [ hi1, hi2 ], lo: [ lo1, lo2 ] } ]; } var boxes = []; /* eslint-disable no-bitwise */ // minimum hi surrogate which only represents characters >= ch1 var hiMinAbove = 0xD800 + ( ( ch1 - 0x10000 + 0x3FF ) >> 10 ); // maximum hi surrogate which only represents characters <= ch2 var hiMaxBelow = 0xD800 + ( ( ch2 - 0x10000 - 0x3FF ) >> 10 ); /* eslint-enable no-bitwise */ if ( hi1 < hiMinAbove ) { boxes.push( { hi: [ hi1, hi1 ], lo: [ lo1, loMax ] } ); } if ( hiMinAbove <= hiMaxBelow ) { boxes.push( { hi: [ hiMinAbove, hiMaxBelow ], lo: [ loMin, loMax ] } ); } if ( hiMaxBelow < hi2 ) { boxes.push( { hi: [ hi2, hi2 ], lo: [ loMin, lo2 ] } ); } return boxes; } /** * Make a regexp string for an array of Unicode character ranges. * * If either character in a range is above 0xFFFF, then the range will * be encoded as multiple surrogate pair ranges. It is an error for a * range to overlap with the surrogate range 0xD800-0xDFFF (as this would * only match ill-formed strings). * * @param {Array} ranges Array of ranges, each of which is a character or an interval * @return {string} Regexp string for the disjunction of the ranges. */ unicodeJS.charRangeArrayRegexp = function ( ranges ) { var boxes = [], characterClass = [], // list of (\uXXXX code unit or interval), for BMP disjunction = []; // list of regex strings, to be joined with '|' for ( var i = 0; i < ranges.length; i++ ) { var range = ranges[ i ]; // Handle single code unit if ( typeof range === 'number' ) { if ( range <= 0xFFFF ) { if ( range >= 0xD800 && range <= 0xDFFF ) { throw new Error( 'Surrogate: ' + range.toString( 16 ) ); } characterClass.push( uEsc( range ) ); continue; } else { // Handle single surrogate pair if ( range > 0x10FFFF ) { throw new Error( 'Character code too high: ' + range.toString( 16 ) ); } /* eslint-disable no-bitwise */ var hi = 0xD800 + ( ( range - 0x10000 ) >> 10 ); var lo = 0xDC00 + ( ( range - 0x10000 ) & 0x3FF ); /* eslint-enable no-bitwise */ disjunction.push( uEsc( hi ) + uEsc( lo ) ); continue; } } // Handle interval var min = range[ 0 ]; var max = range[ 1 ]; if ( min > max ) { throw new Error( min.toString( 16 ) + ' > ' + max.toString( 16 ) ); } if ( max > 0x10FFFF ) { throw new Error( 'Character code too high: ' + max.toString( 16 ) ); } if ( max >= 0xD800 && min <= 0xDFFF ) { throw new Error( 'range includes surrogates: ' + min.toString( 16 ) + '-' + max.toString( 16 ) ); } if ( max <= 0xFFFF ) { // interval is entirely BMP characterClass.push( codeUnitRange( min, max ) ); } else if ( min <= 0xFFFF ) { // interval is BMP and non-BMP characterClass.push( codeUnitRange( min, 0xFFFF ) ); boxes = getCodeUnitBoxes( 0x10000, max ); } else { // interval is entirely non-BMP boxes = getCodeUnitBoxes( min, max ); } // append hi-lo surrogate space boxes as code unit range pairs for ( var j = 0; j < boxes.length; j++ ) { var box = boxes[ j ]; var hi2 = codeUnitRange( box.hi[ 0 ], box.hi[ 1 ], true ); var lo2 = codeUnitRange( box.lo[ 0 ], box.lo[ 1 ], true ); disjunction.push( hi2 + lo2 ); } } // prepend BMP character class to the disjunction if ( characterClass.length === 1 && !characterClass[ 0 ].match( /-/ ) ) { disjunction.unshift( characterClass[ 0 ] ); // single character } else if ( characterClass.length > 0 ) { disjunction.unshift( '[' + characterClass.join( '' ) + ']' ); } return disjunction.join( '|' ); }; }() ); |