All files unicodejs.graphemebreak.js

100% Statements 55/55
100% Branches 42/42
100% Functions 5/5
100% Lines 55/55

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165                  1x 1x 1x       1x 1x       1x   13x       1x   1x           1955x 14496x 1747x     208x                   1x 603x   1557x         1557x 603x     954x   954x       954x 3x         951x       200x         751x       7x     744x       6x     738x       3x               735x 385x     350x 11x         339x 339x 39x 7x 7x 7x 2x 2x   7x 4x               335x 335x 38x 38x   335x 8x       327x     1557x 1130x             427x     1557x        
/*!
 * UnicodeJS Grapheme Break module
 *
 * Implementation of Unicode 15.0.0 Default Grapheme Cluster Boundary Specification
 * http://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table
 *
 * @copyright 2013–2018 UnicodeJS team and others; see AUTHORS.txt
 * @license The MIT License (MIT); see LICENSE.txt
 */
( function () {
	var properties = unicodeJS.graphemebreakproperties,
		emojiProperties = unicodeJS.emojiproperties,
		/**
		 * @namespace unicodeJS.graphemebreak
		 */
		graphemebreak = unicodeJS.graphemebreak = {},
		patterns = {};
 
	var property;
	// build regexes
	for ( property in properties ) {
		// eslint-disable-next-line security/detect-non-literal-regexp
		patterns[ property ] = new RegExp(
			unicodeJS.charRangeArrayRegexp( properties[ property ] )
		);
	}
	for ( property in emojiProperties ) {
		// eslint-disable-next-line security/detect-non-literal-regexp
		patterns[ property ] = new RegExp(
			unicodeJS.charRangeArrayRegexp( emojiProperties[ property ] )
		);
	}
 
	function getProperty( codepoint ) {
		for ( property in patterns ) {
			if ( patterns[ property ].test( codepoint ) ) {
				return property;
			}
		}
		return null;
	}
 
	/**
	 * Split text into grapheme clusters
	 *
	 * @memberof unicodeJS.graphemebreak
	 * @param {string} text Text to split
	 * @return {string[]} Split text
	 */
	graphemebreak.splitClusters = function ( text ) {
		return text.split( /(?![\uDC00-\uDFFF])/g ).reduce( ( clusters, codepoint, i, codepoints ) => {
			function isBreak() {
				var lft = [];
 
				// Break at the start and end of text, unless the text is empty.
				// GB1: sot ÷ Any
				// GB2: Any ÷ eot
				if ( i === 0 || i === codepoints.length ) {
					return true;
				}
 
				lft.push( getProperty( codepoints[ i - 1 ] ) );
				// No rules currently require us to look ahead.
				var rgt = getProperty( codepoint );
 
				// Do not break between a CR and LF. Otherwise, break before and after controls.
				// GB3: CR × LF
				if ( lft[ 0 ] === 'CR' && rgt === 'LF' ) {
					return false;
				}
 
				// GB4: ( Control | CR | LF ) ÷
				// GB5: ÷ ( Control | CR | LF )
				if (
					[ 'Control', 'CR', 'LF' ].indexOf( lft[ 0 ] ) !== -1 ||
					[ 'Control', 'CR', 'LF' ].indexOf( rgt ) !== -1
				) {
					return true;
				}
 
				// Do not break Hangul syllable sequences.
				// GB6: L × ( L | V | LV | LVT )
				if (
					lft[ 0 ] === 'L' &&
					[ 'L', 'V', 'LV', 'LVT' ].indexOf( rgt ) !== -1
				) {
					return false;
				}
				// GB7: ( LV | V ) × ( V | T )
				if (
					[ 'LV', 'V' ].indexOf( lft[ 0 ] ) !== -1 &&
					[ 'V', 'T' ].indexOf( rgt ) !== -1
				) {
					return false;
				}
				// GB8: ( LVT | T ) × T
				if (
					[ 'LVT', 'T' ].indexOf( lft[ 0 ] ) !== -1 &&
					rgt === 'T'
				) {
					return false;
				}
 
				// Do not break before extending characters or ZWJ.
				// GB9 × ( Extend | ZWJ )
				// The GB9a and GB9b rules only apply to extended grapheme clusters:
				// Do not break before SpacingMarks, or after Prepend characters.
				// GB9a: × SpacingMark
				if ( [ 'Extend', 'ZWJ', 'SpacingMark' ].indexOf( rgt ) !== -1 ) {
					return false;
				}
				// GB9b: Prepend ×
				if ( lft[ 0 ] === 'Prepend' ) {
					return false;
				}
 
				// Do not break within emoji modifier sequences or emoji zwj sequences.
				// GB11: \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic}
				var l = 0;
				if ( rgt === 'ExtendedPictographic' ) {
					if ( lft[ l ] === 'ZWJ' ) {
						l++;
						lft[ l ] = getProperty( codepoints[ i - 1 - l ] );
						while ( lft[ l ] === 'Extend' ) {
							l++;
							lft[ l ] = getProperty( codepoints[ i - 1 - l ] );
						}
						if ( lft[ l ] === 'ExtendedPictographic' ) {
							return false;
						}
					}
				}
 
				// Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point.
				// GB12: sot (RI RI)* RI × RI
				// GB13: [^RI] (RI RI)* RI × RI
				l = 0;
				while ( lft[ l ] === 'RegionalIndicator' ) {
					l++;
					lft[ l ] = getProperty( codepoints[ i - 1 - l ] );
				}
				if ( rgt === 'RegionalIndicator' && l % 2 === 1 ) {
					return false;
				}
				// Otherwise, break everywhere.
				// GB999: Any ÷ Any
				return true;
			}
 
			if ( isBreak() ) {
				clusters.push( codepoint );
			} else {
				// TODO: This is not covered by tests, is it needed?
				// istanbul ignore next
				if ( !clusters.length ) {
					clusters.push( '' );
				}
				clusters[ clusters.length - 1 ] += codepoint;
			}
 
			return clusters;
		}, [] );
	};
}() );