Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 | 1x 1x 1x 1x 1x 1x 1x 18x 1x 1x 13287x 137703x 13086x 201x 1x 17x 1x 17x 1x 34x 34x 34x 152x 152x 64x 50x 50x 34x 30x 34x 1x 8301x 8301x 8301x 8301x 8301x 3650x 4651x 150x 4501x 4501x 4501x 4501x 4501x 2x 341x 9x 10x 4139x 1286x 2853x 950x 137x 813x 813x 813x 2716x 109x 2607x 3164x 3164x 1356x 1356x 1808x 1808x 2607x 2607x 2937x 2937x 1356x 1356x 1581x 1581x 2607x 2607x 156x 66x 7x 190x 2188x 13x 13x 13x 37x 37x 4x 33x 33x 33x 20x 13x 8x 2180x | /*!
* UnicodeJS Word Break module
*
* Implementation of Unicode 15.0.0 Default Word Boundary Specification
* http://www.unicode.org/reports/tr29/#Default_Word_Boundaries
*
* @copyright 2013– UnicodeJS team and others; see AUTHORS.txt
* @license The MIT License (MIT); see LICENSE.txt
*/
/* eslint-disable no-fallthrough */
( function () {
const properties = unicodeJS.wordbreakproperties,
emojiProperties = unicodeJS.emojiproperties,
/**
* @namespace unicodeJS.wordbreak
*/
wordbreak = unicodeJS.wordbreak = {},
patterns = {},
ZWJ_FE = /^(Format|Extend|ZWJ)$/;
// build regexes
for ( const property in properties ) {
// eslint-disable-next-line security/detect-non-literal-regexp
patterns[ property ] = new RegExp(
unicodeJS.charRangeArrayRegexp( properties[ property ] )
);
}
for ( const property in emojiProperties ) {
// eslint-disable-next-line security/detect-non-literal-regexp
patterns[ property ] = new RegExp(
unicodeJS.charRangeArrayRegexp( emojiProperties[ property ] )
);
}
/**
* Return the wordbreak property value for the codepoint
*
* See http://www.unicode.org/reports/tr29/#Word_Boundaries
*
* @memberof unicodeJS.wordbreak
* @private
* @param {string} codepoint The codepoint
* @return {string|null} The unicode wordbreak property value (key of unicodeJS.wordbreakproperties)
*/
function getProperty( codepoint ) {
for ( const property in patterns ) {
if ( patterns[ property ].test( codepoint ) ) {
return property;
}
}
return null;
}
/**
* Find the next word break offset.
*
* @memberof unicodeJS.wordbreak
* @param {unicodeJS.TextString} string TextString
* @param {number} pos Character position
* @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric
* @return {number} Returns the next offset which is a word break
*/
wordbreak.nextBreakOffset = function ( string, pos, onlyAlphaNumeric ) {
return this.moveBreakOffset( 1, string, pos, onlyAlphaNumeric );
};
/**
* Find the previous word break offset.
*
* @memberof unicodeJS.wordbreak
* @param {unicodeJS.TextString} string TextString
* @param {number} pos Character position
* @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric
* @return {number} Returns the previous offset which is a word break
*/
wordbreak.prevBreakOffset = function ( string, pos, onlyAlphaNumeric ) {
return this.moveBreakOffset( -1, string, pos, onlyAlphaNumeric );
};
/**
* Find the next word break offset in a specified direction.
*
* @memberof unicodeJS.wordbreak
* @param {number} direction Direction to search in, should be plus or minus one
* @param {unicodeJS.TextString} string TextString
* @param {number} pos Character position
* @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric
* @return {number} Returns the next offset which is word break in the specified direction
*/
wordbreak.moveBreakOffset = function ( direction, string, pos, onlyAlphaNumeric ) {
// when moving backwards, use the character to the left of the cursor
const nextCodepoint = direction > 0 ? string.nextCodepoint.bind( string ) : string.prevCodepoint.bind( string ),
prevCodepoint = direction > 0 ? string.prevCodepoint.bind( string ) : string.nextCodepoint.bind( string );
let codepoint;
// Search for the next break point
while ( ( codepoint = nextCodepoint( pos ) ) !== null ) {
pos += codepoint.length * direction;
if ( this.isBreak( string, pos ) ) {
// Check previous character was alpha-numeric if required
if ( onlyAlphaNumeric ) {
const lastProperty = getProperty( prevCodepoint( pos ) );
if ( lastProperty !== 'ALetter' &&
lastProperty !== 'Numeric' &&
lastProperty !== 'Katakana' &&
lastProperty !== 'HebrewLetter' ) {
continue;
}
}
break;
}
}
return pos;
};
/**
* Evaluates whether a position within some text is a word boundary.
*
* The text object elements may be codepoints or code units
*
* @memberof unicodeJS.wordbreak
* @param {unicodeJS.TextString} string TextString
* @param {number} pos Character position
* @return {boolean} Is the position a word boundary
*/
wordbreak.isBreak = function ( string, pos ) {
const lft = [], rgt = [];
let l = 0, r = 0;
// Table 3a. Word_Break Rule Macros
// Macro Represents
// AHLetter (ALetter | Hebrew_Letter)
// MidNumLetQ (MidNumLet | Single_Quote)
// Get some context
let nextCodepoint = string.nextCodepoint( pos + r );
let prevCodepoint = string.prevCodepoint( pos - l );
// Break at the start and end of text, unless the text is empty.
// WB1: sot ÷ Any
// WB2: Any ÷ eot
if ( nextCodepoint === null || prevCodepoint === null ) {
return true;
}
// Do not break inside surrogate pair
if ( string.isMidSurrogate( pos ) ) {
return false;
}
// Store context
rgt.push( getProperty( nextCodepoint ) );
lft.push( getProperty( prevCodepoint ) );
r += nextCodepoint.length;
l += prevCodepoint.length;
switch ( true ) {
// Do not break within CRLF.
// WB3: CR × LF
case lft[ 0 ] === 'CR' && rgt[ 0 ] === 'LF':
return false;
// Otherwise break before and after Newlines (including CR and LF)
// WB3a: (Newline | CR | LF) ÷
case lft[ 0 ] === 'Newline' || lft[ 0 ] === 'CR' || lft[ 0 ] === 'LF':
// WB3b: ÷ (Newline | CR | LF)
case rgt[ 0 ] === 'Newline' || rgt[ 0 ] === 'CR' || rgt[ 0 ] === 'LF':
return true;
// Do not break within emoji zwj sequences.
// WB3c: ZWJ × \p{Extended_Pictographic}
case lft[ 0 ] === 'ZWJ' && rgt[ 0 ] === 'ExtendedPictographic':
return false;
// Do not break within emoji zwj sequences.
// WB3d: Keep horizontal whitespace together.
case lft[ 0 ] === 'WSegSpace' && rgt[ 0 ] === 'WSegSpace':
return false;
}
// Ignore Format and Extend characters, except after sot, CR, LF, and Newline.
// (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any × (Format | Extend | ZWJ)
// WB4: X (Extend | Format | ZWJ)* → X
if ( rgt[ 0 ] && rgt[ 0 ].match( ZWJ_FE ) ) {
// The Extend|Format character is to the right, so it is attached
// to a character to the left, don't split here
return false;
}
// We've reached the end of an ZWJ_FE sequence, collapse it
while ( lft[ 0 ] && lft[ 0 ].match( ZWJ_FE ) ) {
if ( pos - l <= 0 ) {
// start of document
return true;
}
prevCodepoint = string.prevCodepoint( pos - l );
// TODO: This is not covered by tests, see T264904
// istanbul ignore next
if ( prevCodepoint === null ) {
// start of document?
return true;
}
lft[ 0 ] = getProperty( prevCodepoint );
l += prevCodepoint.length;
}
// Do not break between most letters.
// WB5: AHLetter × AHLetter
if (
( lft[ 0 ] === 'ALetter' || lft[ 0 ] === 'HebrewLetter' ) &&
( rgt[ 0 ] === 'ALetter' || rgt[ 0 ] === 'HebrewLetter' )
) {
return false;
}
let nextProperty;
// Some tests beyond this point require more context, as per WB4 ignore ZWJ_FE.
do {
nextCodepoint = string.nextCodepoint( pos + r );
if ( nextCodepoint === null ) {
nextProperty = null;
break;
}
r += nextCodepoint.length;
nextProperty = getProperty( nextCodepoint );
} while ( nextProperty && nextProperty.match( ZWJ_FE ) );
rgt.push( nextProperty );
let prevProperty;
do {
prevCodepoint = string.prevCodepoint( pos - l );
if ( prevCodepoint === null ) {
prevProperty = null;
break;
}
l += prevCodepoint.length;
prevProperty = getProperty( prevCodepoint );
} while ( prevProperty && prevProperty.match( ZWJ_FE ) );
lft.push( prevProperty );
switch ( true ) {
// Do not break letters across certain punctuation.
// WB6: AHLetter × (MidLetter | MidNumLetQ) AHLetter
case ( lft[ 0 ] === 'ALetter' || lft[ 0 ] === 'HebrewLetter' ) &&
( rgt[ 1 ] === 'ALetter' || rgt[ 1 ] === 'HebrewLetter' ) &&
( rgt[ 0 ] === 'MidLetter' || rgt[ 0 ] === 'MidNumLet' || rgt[ 0 ] === 'SingleQuote' ):
// WB7: AHLetter (MidLetter | MidNumLetQ) × AHLetter
case ( rgt[ 0 ] === 'ALetter' || rgt[ 0 ] === 'HebrewLetter' ) &&
( lft[ 1 ] === 'ALetter' || lft[ 1 ] === 'HebrewLetter' ) &&
( lft[ 0 ] === 'MidLetter' || lft[ 0 ] === 'MidNumLet' || lft[ 0 ] === 'SingleQuote' ):
// WB7a: Hebrew_Letter × Single_Quote
case lft[ 0 ] === 'HebrewLetter' && rgt[ 0 ] === 'SingleQuote':
// WB7b: Hebrew_Letter × Double_Quote Hebrew_Letter
case lft[ 0 ] === 'HebrewLetter' && rgt[ 0 ] === 'DoubleQuote' && rgt[ 1 ] === 'HebrewLetter':
// WB7c: Hebrew_Letter Double_Quote × Hebrew_Letter
case lft[ 1 ] === 'HebrewLetter' && lft[ 0 ] === 'DoubleQuote' && rgt[ 0 ] === 'HebrewLetter':
// Do not break within sequences of digits, or digits adjacent to letters (“3a”, or “A3”).
// WB8: Numeric × Numeric
case lft[ 0 ] === 'Numeric' && rgt[ 0 ] === 'Numeric':
// WB9: AHLetter × Numeric
case ( lft[ 0 ] === 'ALetter' || lft[ 0 ] === 'HebrewLetter' ) && rgt[ 0 ] === 'Numeric':
// WB10: Numeric × AHLetter
case lft[ 0 ] === 'Numeric' && ( rgt[ 0 ] === 'ALetter' || rgt[ 0 ] === 'HebrewLetter' ):
return false;
// Do not break within sequences, such as “3.2” or “3,456.789”.
// WB11: Numeric (MidNum | MidNumLetQ) × Numeric
case rgt[ 0 ] === 'Numeric' && lft[ 1 ] === 'Numeric' &&
( lft[ 0 ] === 'MidNum' || lft[ 0 ] === 'MidNumLet' || lft[ 0 ] === 'SingleQuote' ):
// WB12: Numeric × (MidNum | MidNumLetQ) Numeric
case lft[ 0 ] === 'Numeric' && rgt[ 1 ] === 'Numeric' &&
( rgt[ 0 ] === 'MidNum' || rgt[ 0 ] === 'MidNumLet' || rgt[ 0 ] === 'SingleQuote' ):
return false;
// Do not break between Katakana.
// WB13: Katakana × Katakana
case lft[ 0 ] === 'Katakana' && rgt[ 0 ] === 'Katakana':
return false;
// Do not break from extenders.
// WB13a: (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
case rgt[ 0 ] === 'ExtendNumLet' &&
( lft[ 0 ] === 'ALetter' || lft[ 0 ] === 'HebrewLetter' || lft[ 0 ] === 'Numeric' || lft[ 0 ] === 'Katakana' || lft[ 0 ] === 'ExtendNumLet' ):
// WB13b: ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana)
case lft[ 0 ] === 'ExtendNumLet' &&
( rgt[ 0 ] === 'ALetter' || rgt[ 0 ] === 'HebrewLetter' || rgt[ 0 ] === 'Numeric' || rgt[ 0 ] === 'Katakana' ):
return false;
}
// Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point.
// WB15: ^ (RI RI)* RI × RI
// WB16: [^RI] (RI RI)* RI × RI
if ( lft[ 0 ] === 'RegionalIndicator' && rgt[ 0 ] === 'RegionalIndicator' ) {
// Count RIs on the left
let regional = 0;
let n = 0;
do {
prevCodepoint = string.prevCodepoint( pos - n );
if ( prevCodepoint === null ) {
break;
}
n += prevCodepoint.length;
prevProperty = getProperty( prevCodepoint );
if ( prevProperty === 'RegionalIndicator' ) {
regional++;
}
} while ( prevProperty === 'RegionalIndicator' || ( prevProperty && prevProperty.match( ZWJ_FE ) ) );
if ( regional % 2 === 1 ) {
return false;
}
}
// Otherwise, break everywhere (including around ideographs).
// WB999: Any ÷ Any
return true;
};
}() );
|