All files / src/dm ve.dm.TextFinder.js

96.55% Statements 112/116
86.66% Branches 39/45
100% Functions 14/14
96.36% Lines 106/110

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272              1x 44x     1x           1x 200x 200x 12x 12x     200x                 1x                         1x 20x 20x 20x 20x     20x 11x   9x     20x     1x         1x           96x 96x     96x 783x 783x 164x 164x 30x 30x 30x       96x                     1x 5x   5x 5x 5x 1x   30x   5x 5x 5x 32x 32x       1x                     1x     64x   64x 64x     61x     3x   3x 3x     3x   1x 1x       3x           1x 44x       44x 44x 38x     44x   44x 44x 246x 246x 34x 34x 34x 32x 32x   34x 34x           44x                 1x 19x 19x     19x 19x     1x         1x 60x   60x 60x 58x     58x         8x 8x     50x 50x     19x     60x                   1x 1x 1x 1x     1x         1x 20x 20x 10x     10x   20x 20x       20x    
/**
 * Finder of text content within lines of text
 *
 * @constructor
 * @param {Object} [options] Search options
 * @param {boolean} [options.wholeWord] Only match whole-word occurrences
 */
ve.dm.TextFinder = function VeDmTextFinder( options ) {
	this.options = options;
};
 
OO.initClass( ve.dm.TextFinder );
 
/**
 * @param {string} line The line of text to match
 * @return {Array[]} Match ranges [ [ start1, end1 ], ... ]
 */
ve.dm.TextFinder.prototype.find = function ( line ) {
	let ranges = this.findRanges( line );
	if ( this.options.wholeWord ) {
		const dataString = new ve.dm.DataString( [ ...line ] );
		ranges = ranges.filter( ( range ) => unicodeJS.wordbreak.isBreak( dataString, range[ 0 ] ) &&
			unicodeJS.wordbreak.isBreak( dataString, range[ 1 ] ) );
	}
	return ranges;
};
 
/**
 * Find the matching ranges, without checking for word boundaries
 *
 * @param {string} line The line of text to match
 * @return {Array[]} Match ranges [ [ start1, end1 ], ... ]
 */
ve.dm.TextFinder.prototype.findRanges = null;
 
/**
 * @constructor
 * @param {string} query Text to find
 * @param {Object} [options] Search options
 * @param {boolean} [options.lang='unk'] Language for Intl.Collator#compare
 * @param {boolean} [options.caseSensitiveString] Case sensitive search for a string query
 * @param {boolean} [options.diacriticInsensitiveString] Diacritic insensitive search for a string query
 *  Only works in browsers which support the Internationalization API
 * @param {boolean} [options.noOverlaps] Avoid overlapping matches
 * @return {ve.Range[]} List of ranges where the string was found
 */
ve.dm.StringTextFinder = function VeDmStringTextFinder( query, options ) {
	ve.dm.StringTextFinder.super.call( this );
	this.query = query;
	this.options = options;
	const lang = options.lang || 'unk';
 
	let sensitivity;
	if ( this.options.diacriticInsensitiveString ) {
		sensitivity = this.options.caseSensitiveString ? 'case' : 'base';
	} else {
		sensitivity = this.options.caseSensitiveString ? 'variant' : 'accent';
	}
	// Intl is only used browser clients
	this.compare = new Intl.Collator( lang, { sensitivity } ).compare;
};
 
OO.inheritClass( ve.dm.StringTextFinder, ve.dm.TextFinder );
 
/**
 * @inheritdoc
 */
ve.dm.StringTextFinder.prototype.findRanges = function ( line ) {
	// FIXME: For case-insensitive search, character-wise comparison is not i18n-safe. and
	// it cannot be assumed that the match will have the same string length as the search
	// query. E.g. the following compares equal:
	// collator = new Intl.Collator( 'en', { sensitivity: 'base' } );
	// collator.compare( '\u0130', '\u0069\u0307' );
	const qLen = this.query.length;
	const ranges = [];
	// Iterate up to (and including) offset textLength - queryLength. Beyond that point
	// there is not enough room for the query to exist
	for ( let offset = 0, l = line.length - qLen; offset <= l; offset++ ) {
		let j = 0;
		while ( this.compare( line[ offset + j ], this.query[ j ] ) === 0 ) {
			j++;
			if ( j === qLen ) {
				ranges.push( [ offset, offset + qLen ] );
				offset += this.options.noOverlaps ? qLen - 1 : 0;
				break;
			}
		}
	}
	return ranges;
};
 
/**
 * @constructor
 * @param {Set} query The set of words to match
 * @param {Object} [options] Search options
 * @param {boolean} [options.caseSensitiveString] Should the search be case sensitive.
 * @param {boolean} [options.lang] Language for case folding. Required if !caseSensitiveString.
 * @param {boolean} [options.noOverlaps] Avoid overlapping matches
 */
ve.dm.SetTextFinder = function VeDmSetTextFinder( query, options = {} ) {
	ve.dm.SetTextFinder.super.call( this );
 
	this.options = options;
	this.lang = options.lang;
	if ( options.caseSensitiveString ) {
		this.normalizedQuery = query;
	} else {
		this.normalizedQuery = new Set( Array.from( query ).map( ( s ) => s.toLocaleLowerCase( this.lang ) ) );
	}
	this.minLen = Infinity;
	this.maxLen = 0;
	this.normalizedQuery.forEach( ( s ) => {
		this.minLen = Math.min( this.minLen, s.length );
		this.maxLen = Math.max( this.maxLen, s.length );
	} );
};
 
OO.inheritClass( ve.dm.SetTextFinder, ve.dm.TextFinder );
 
/**
 * Map from offset in case-folded string to offset in original string
 * In some cases, case-folding can change string length
 * For example, if s = '\u0130', then s.length === 1 but s.toLocaleLowerCase( 'en' ).length === 2
 *
 * @param {string} s
 * @param {number} offsetLower in lowercased string
 * @return {number} corresponding offset in original string
 */
ve.dm.SetTextFinder.static.fixOffset = function ( s, offsetLower ) {
	// Start by guessing that lowercasing didn't change the offset,
	// except when the offset is out of bounds in the original string
	let guess = Math.min( offsetLower, s.length );
 
	let diff = s.slice( 0, guess ).toLocaleLowerCase( this.lang ).length - offsetLower;
	if ( diff === 0 ) {
		// Optimization note: this will almost always be true
		// Only rare characters change length of substr when case folding
		return guess;
	}
 
	while ( diff > 0 ) {
		// The lowercase substr is longer than original
		guess--;
		diff = s.slice( 0, guess ).toLocaleLowerCase( this.lang ).length - offsetLower;
	}
 
	while ( diff < 0 ) {
		// The lowercase substr is shorter than original
		guess++;
		diff = s.slice( 0, guess ).toLocaleLowerCase( this.lang ).length - offsetLower;
	}
	// In some rare situations the diff might be positive now
	// (which would correspond to no offset in the original string mapping to the desired offset)
	return guess;
};
 
/**
 * @inheritdoc
 */
ve.dm.SetTextFinder.prototype.findRanges = function ( line ) {
	Iif ( this.normalizedQuery.size === 0 ) {
		return [];
	}
 
	let normalizedLine = line;
	if ( !this.options.caseSensitiveString ) {
		normalizedLine = line.toLocaleLowerCase( this.lang );
	}
 
	const ranges = [];
	// For each possible length, do a sliding window search on the normalized line
	for ( let len = this.minLen; len <= this.maxLen; len++ ) {
		for ( let i = 0; i <= normalizedLine.length - len; i++ ) {
			const substr = normalizedLine.slice( i, i + len );
			if ( this.normalizedQuery.has( substr ) ) {
				let start = i;
				let end = i + len;
				if ( !this.options.caseSensitiveString ) {
					start = this.constructor.static.fixOffset( line, start );
					end = this.constructor.static.fixOffset( line, end );
				}
				ranges.push( [ start, end ] );
				Iif ( this.options.noOverlaps ) {
					i += len - 1;
				}
			}
		}
	}
	return ranges;
};
 
/**
 * @constructor
 * @param {RegExp} query RegExp object with the /g flag
 * @param {Object} [options] Search options
 * @param {boolean} [options.noOverlaps] Avoid overlapping matches
 */
ve.dm.RegExpTextFinder = function VeDmRegExpTextFinder( query, options = {} ) {
	ve.dm.RegExpTextFinder.super.call( this );
	Iif ( !query.global ) {
		throw new Error( 'The /g flag must be set on the query RegExp' );
	}
	this.query = query;
	this.options = options;
};
 
OO.inheritClass( ve.dm.RegExpTextFinder, ve.dm.TextFinder );
 
/**
 * @inheritdoc
 */
ve.dm.RegExpTextFinder.prototype.findRanges = function ( line ) {
	this.query.lastIndex = 0;
	let match;
	const ranges = [];
	while ( ( match = this.query.exec( line ) ) !== null ) {
		const matchText = match[ 0 ];
 
		// Skip empty string matches (e.g. with .*)
		if ( matchText.length === 0 ) {
			// Set lastIndex to the next character to avoid an infinite
			// loop. Browsers differ in whether they do this for you
			// for empty matches; see
			// http://blog.stevenlevithan.com/archives/exec-bugs
			this.query.lastIndex = match.index + 1;
			continue;
		}
 
		ranges.push( [ match.index, match.index + matchText.length ] );
		if ( !this.options.noOverlaps || matchText[ 0 ] === '\uFFFC' ) {
			// We want overlaps, or the match starts with OBJECT REPLACEMENT CHARACTER,
			// so jump back to the next character after the start of the match
			this.query.lastIndex = match.index + 1;
		}
	}
	return ranges;
};
 
/**
 * Wrap a finder in a memoizing layer
 *
 * @constructor
 * @param {ve.dm.TextFinder} finder The finder to wrap
 * @param {number} [capacity=1000] Max number of results to remember
 */
ve.dm.MemoizedTextFinder = function VeDmMemoizedTextFinder( finder, capacity = 1000 ) {
	this.finder = finder;
	this.memory = new Map();
	this.capacity = capacity;
};
 
OO.inheritClass( ve.dm.MemoizedTextFinder, ve.dm.TextFinder );
 
/**
 * @inheritdoc
 */
ve.dm.MemoizedTextFinder.prototype.find = function ( line ) {
	let result = this.memory.get( line );
	if ( result === undefined ) {
		result = this.finder.find( line );
	} else {
		// We'll refresh the key for LRU purposes
		this.memory.delete( result );
	}
	this.memory.set( line, result );
	Iif ( this.memory.size > this.capacity ) {
		// Purge stalest key
		this.memory.delete( this.memory.keys().next().value );
	}
	return result;
};