All files unicodejs.graphemebreak.js

100% Statements 55/55
100% Branches 42/42
100% Functions 5/5
100% Lines 55/55
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165  
 
 
 
 
 
 
 
 
1x
1x
1x
 
 
 
1x
1x
 
 
 
1x
 
13x
 
 
 
1x
 
1x
 
 
 
 
 
1955x
14496x
1747x
 
 
208x
 
 
 
 
 
 
 
 
 
1x
603x
 
1557x
 
 
 
 
1557x
603x
 
 
954x
 
954x
 
 
 
954x
3x
 
 
 
 
951x
 
 
 
200x
 
 
 
 
751x
 
 
 
7x
 
 
744x
 
 
 
6x
 
 
738x
 
 
 
3x
 
 
 
 
 
 
 
735x
385x
 
 
350x
11x
 
 
 
 
339x
339x
39x
7x
7x
7x
2x
2x
 
7x
4x
 
 
 
 
 
 
 
335x
335x
38x
38x
 
335x
8x
 
 
 
327x
 
 
1557x
1130x
 
 
 
 
 
 
427x
 
 
1557x
 
 
 
  /*!
 * UnicodeJS Grapheme Break module
 *
 * Implementation of Unicode 15.0.0 Default Grapheme Cluster Boundary Specification
 * http://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table
 *
 * @copyright 2013–2018 UnicodeJS team and others; see AUTHORS.txt
 * @license The MIT License (MIT); see LICENSE.txt
 */
( function () {
	var properties = unicodeJS.graphemebreakproperties,
		emojiProperties = unicodeJS.emojiproperties,
		/**
		 * @namespace unicodeJS.graphemebreak
		 */
		graphemebreak = unicodeJS.graphemebreak = {},
		patterns = {};
 
	var property;
	// build regexes
	for ( property in properties ) {
		// eslint-disable-next-line security/detect-non-literal-regexp
		patterns[ property ] = new RegExp(
			unicodeJS.charRangeArrayRegexp( properties[ property ] )
		);
	}
	for ( property in emojiProperties ) {
		// eslint-disable-next-line security/detect-non-literal-regexp
		patterns[ property ] = new RegExp(
			unicodeJS.charRangeArrayRegexp( emojiProperties[ property ] )
		);
	}
 
	function getProperty( codepoint ) {
		for ( property in patterns ) {
			if ( patterns[ property ].test( codepoint ) ) {
				return property;
			}
		}
		return null;
	}
 
	/**
	 * Split text into grapheme clusters
	 *
	 * @memberof unicodeJS.graphemebreak
	 * @param {string} text Text to split
	 * @return {string[]} Split text
	 */
	graphemebreak.splitClusters = function ( text ) {
		return text.split( /(?![\uDC00-\uDFFF])/g ).reduce( ( clusters, codepoint, i, codepoints ) => {
			function isBreak() {
				var lft = [];
 
				// Break at the start and end of text, unless the text is empty.
				// GB1: sot ÷ Any
				// GB2: Any ÷ eot
				if ( i === 0 || i === codepoints.length ) {
					return true;
				}
 
				lft.push( getProperty( codepoints[ i - 1 ] ) );
				// No rules currently require us to look ahead.
				var rgt = getProperty( codepoint );
 
				// Do not break between a CR and LF. Otherwise, break before and after controls.
				// GB3: CR × LF
				if ( lft[ 0 ] === 'CR' && rgt === 'LF' ) {
					return false;
				}
 
				// GB4: ( Control | CR | LF ) ÷
				// GB5: ÷ ( Control | CR | LF )
				if (
					[ 'Control', 'CR', 'LF' ].indexOf( lft[ 0 ] ) !== -1 ||
					[ 'Control', 'CR', 'LF' ].indexOf( rgt ) !== -1
				) {
					return true;
				}
 
				// Do not break Hangul syllable sequences.
				// GB6: L × ( L | V | LV | LVT )
				if (
					lft[ 0 ] === 'L' &&
					[ 'L', 'V', 'LV', 'LVT' ].indexOf( rgt ) !== -1
				) {
					return false;
				}
				// GB7: ( LV | V ) × ( V | T )
				if (
					[ 'LV', 'V' ].indexOf( lft[ 0 ] ) !== -1 &&
					[ 'V', 'T' ].indexOf( rgt ) !== -1
				) {
					return false;
				}
				// GB8: ( LVT | T ) × T
				if (
					[ 'LVT', 'T' ].indexOf( lft[ 0 ] ) !== -1 &&
					rgt === 'T'
				) {
					return false;
				}
 
				// Do not break before extending characters or ZWJ.
				// GB9 × ( Extend | ZWJ )
				// The GB9a and GB9b rules only apply to extended grapheme clusters:
				// Do not break before SpacingMarks, or after Prepend characters.
				// GB9a: × SpacingMark
				if ( [ 'Extend', 'ZWJ', 'SpacingMark' ].indexOf( rgt ) !== -1 ) {
					return false;
				}
				// GB9b: Prepend ×
				if ( lft[ 0 ] === 'Prepend' ) {
					return false;
				}
 
				// Do not break within emoji modifier sequences or emoji zwj sequences.
				// GB11: \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic}
				var l = 0;
				if ( rgt === 'ExtendedPictographic' ) {
					if ( lft[ l ] === 'ZWJ' ) {
						l++;
						lft[ l ] = getProperty( codepoints[ i - 1 - l ] );
						while ( lft[ l ] === 'Extend' ) {
							l++;
							lft[ l ] = getProperty( codepoints[ i - 1 - l ] );
						}
						if ( lft[ l ] === 'ExtendedPictographic' ) {
							return false;
						}
					}
				}
 
				// Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point.
				// GB12: sot (RI RI)* RI × RI
				// GB13: [^RI] (RI RI)* RI × RI
				l = 0;
				while ( lft[ l ] === 'RegionalIndicator' ) {
					l++;
					lft[ l ] = getProperty( codepoints[ i - 1 - l ] );
				}
				if ( rgt === 'RegionalIndicator' && l % 2 === 1 ) {
					return false;
				}
				// Otherwise, break everywhere.
				// GB999: Any ÷ Any
				return true;
			}
 
			if ( isBreak() ) {
				clusters.push( codepoint );
			} else {
				// TODO: This is not covered by tests, is it needed?
				// istanbul ignore next
				if ( !clusters.length ) {
					clusters.push( '' );
				}
				clusters[ clusters.length - 1 ] += codepoint;
			}
 
			return clusters;
		}, [] );
	};
}() );