Source: src/ve.DiffMatchPatch.js, VisualEditor, JSDoc

/*!
 * VisualEditor DiffMatchPatch implementation for linear model
 *
 * @copyright See AUTHORS.txt
 */

/* global diff_match_patch */

/**
 * DiffMatchPatch implementation
 *
 * @class
 * @extends diff_match_patch
 * @constructor
 * @param {ve.dm.HashValueStore} oldStore
 * @param {ve.dm.HashValueStore} newStore
 */
ve.DiffMatchPatch = function VeDiffMatchPatch( oldStore, newStore ) {
	// Parent constructor
	ve.DiffMatchPatch.super.call( this );

	this.store = oldStore.slice();
	this.store.merge( newStore );
};

/* Inheritance */

OO.inheritClass( ve.DiffMatchPatch, diff_match_patch );

/* Static properties */

ve.DiffMatchPatch.static.DIFF_DELETE = -1;
ve.DiffMatchPatch.static.DIFF_INSERT = 1;
ve.DiffMatchPatch.static.DIFF_EQUAL = 0;
ve.DiffMatchPatch.static.DIFF_CHANGE_DELETE = -2;
ve.DiffMatchPatch.static.DIFF_CHANGE_INSERT = 2;

/* Methods */

ve.DiffMatchPatch.prototype.isEqualChar = function ( a, b ) {
	return a === b || ve.dm.ElementLinearData.static.compareElements( a, b, this.store, this.store );
};

ve.DiffMatchPatch.prototype.isEqualString = function ( a, b ) {
	if ( a === b ) {
		return true;
	}
	if ( a === null || b === null ) {
		return false;
	}
	if ( a.length !== b.length ) {
		return false;
	}

	for ( let i = 0, l = a.length; i < l; i++ ) {
		if ( !this.isEqualChar( a[ i ], b[ i ] ) ) {
			return false;
		}
	}
	return true;
};

ve.DiffMatchPatch.prototype.charsToString = function ( chars ) {
	return chars.slice();
};

ve.DiffMatchPatch.prototype.getEmptyString = function () {
	return [];
};

ve.DiffMatchPatch.prototype.indexOf = function indexOf( text, searchValue, fromIndex ) {
	// fromIndex defaults to 0 and is bounded by 0 and text.length
	// Note that indexOf( 'foo', '', 99 ) is supposed to return 3 (text.length), which is why we allow
	// starting the search beyond the end of the string
	// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/indexOf
	for (
		let i = fromIndex === undefined ? 0 : Math.min( Math.max( fromIndex, 0 ), text.length );
		i <= text.length - searchValue.length;
		i++
	) {
		let found = true;
		for ( let j = 0; j < searchValue.length; j++ ) {
			if ( !this.isEqualChar( text[ i + j ], searchValue[ j ] ) ) {
				found = false;
				break;
			}
		}
		if ( found ) {
			return i;
		}
	}
	return -1;
};

ve.DiffMatchPatch.prototype.lastIndexOf = function lastIndexOf( text, searchValue, fromIndex ) {
	const iLen = text.length - searchValue.length;
	// fromIndex defaults to the end, and must be greater than 0
	// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/lastIndexOf
	for (
		let i = fromIndex === undefined ? iLen : Math.min( Math.max( fromIndex, 0 ), iLen );
		i >= 0;
		i--
	) {
		let found = true;
		for ( let j = 0; j < searchValue.length; j++ ) {
			if ( !this.isEqualChar( text[ i + j ], searchValue[ j ] ) ) {
				found = false;
				break;
			}
		}
		if ( found ) {
			return i;
		}
	}
	return -1;
};

ve.DiffMatchPatch.prototype.getCleanDiff = function ( oldData, newData, options ) {
	const store = this.store,
		DIFF_DELETE = this.constructor.static.DIFF_DELETE,
		DIFF_INSERT = this.constructor.static.DIFF_INSERT,
		DIFF_EQUAL = this.constructor.static.DIFF_EQUAL,
		DIFF_CHANGE_DELETE = this.constructor.static.DIFF_CHANGE_DELETE,
		DIFF_CHANGE_INSERT = this.constructor.static.DIFF_CHANGE_INSERT;

	/**
	 * Remove the close elements from the linear data, to force a balanced diff.
	 *
	 * Otherwise, for example, removing the second of two consecutive comment
	 * nodes could result in a diff where [{type: '/comment'}, {type: 'comment'}]
	 * is removed, which is unbalanced.
	 *
	 * Warning: this step assumes that, within a content branch node, an element
	 * is always immediately followed by its close element.
	 *
	 * @param {Array} data Linear data
	 * @return {Array} Linear data without close elements
	 */
	function removeCloseElements( data ) {
		for ( let i = 0, ilen = data.length; i < ilen; i++ ) {
			if ( data[ i ].type && data[ i ].type[ 0 ] === '/' ) {
				data.splice( i, 1 );
				ilen--;
				i--;
			}
		}
		return data;
	}

	/**
	 * Get the index of the first or last wordbreak in a data array
	 *
	 * @param {Array} data Linear data
	 * @param {boolean} reversed Get the index of the last wordbreak
	 * @return {number|null} Index of the first or last wordbreak, or null if no
	 *  wordbreak was found
	 */
	function findWordbreaks( data, reversed ) {
		const dataString = new ve.dm.DataString( data );

		const offset = unicodeJS.wordbreak.moveBreakOffset(
			reversed ? -1 : 1,
			dataString,
			reversed ? data.length : 0
		);

		if ( ( reversed && offset === 0 ) || ( !reversed && offset === data.length ) ) {
			return null;
		} else {
			return offset;
		}
	}

	/**
	 * Determine whether there is a wordbreak at an offset
	 *
	 * @param {Array} data Linear data
	 * @param {number} offset
	 * @return {boolean} There is a wordbreak at the offset
	 */
	function isBreak( data, offset ) {
		return !!( unicodeJS.wordbreak.isBreak( new ve.dm.DataString( data ), offset ) );
	}

	/**
	 * The perfect diff is not always human-friendly, so clean it up.
	 * Make sure retained content spans whole words (no wordbreaks),
	 * and "de-stripe" any sequences of alternating removes and inserts
	 * (with no retains) to look like one continuous removal and one continuous
	 * insert.
	 *
	 * Additionally clean up mistakes made by the linear differ, such as removing
	 * and inserting identical content (insetead of retaining it) and removing,
	 * inserting or retaining an empty content array.
	 *
	 * @param {Array} diff Linear diff, as arrays of inserted, removed and retained
	 * content
	 * @return {Array} A human-friendlier linear diff
	 */
	function getCleanDiff( diff ) {
		let previousData = null,
			previousAction = null,
			remove = [],
			insert = [];
		const cleanDiff = [];

		function equalUnannotated( other, element, index ) {
			return ve.dm.ElementLinearData.static.compareElementsUnannotated( element, other[ index ] );
		}

		function equalElements( other, element, index ) {
			// Non-elements can't be equal to other elements
			if ( !element.type || !other[ index ].type ) {
				return false;
			}
			if ( ve.dm.LinearData.static.isOpenElementData( element ) ) {
				return ve.dm.modelRegistry.lookup( element.type ).static.isDiffComparable( element, other[ index ], store, store );
			} else {
				// Ignore close elements
				return true;
			}
		}

		function isWhitespace( element ) {
			const value = Array.isArray( element ) ? element[ 0 ] : element;
			return typeof value === 'string' && /^\s+$/.test( value );
		}

		// Where the same data is removed and inserted, replace it with a retain
		for ( let i = 0; i < diff.length; i++ ) {
			const action = diff[ i ][ 0 ];
			const data = diff[ i ][ 1 ];
			// Should improve on JSON.stringify
			if ( ( action > 0 || previousAction > 0 ) && action + previousAction === 0 && JSON.stringify( data ) === JSON.stringify( previousData ) ) {
				diff.splice( i - 1, 2, [ DIFF_EQUAL, data ] );
				i++;
			}
			previousAction = action;
			previousData = data;
		}
		previousData = null;
		previousAction = null;

		// Join any consecutive actions that are the same
		for ( let i = 0; i < diff.length; i++ ) {
			const action = diff[ i ][ 0 ];
			if ( action === previousAction ) {
				ve.batchPush( diff[ i - 1 ][ 1 ], diff[ i ][ 1 ] );
				diff.splice( i, 1 );
				i--;
			} else if ( diff[ i ][ 1 ].length === 0 ) {
				diff.splice( i, 1 );
				i--;
				continue;
			}
			previousAction = action;
		}

		// Convert any retains that do not end and start with spaces into remove-
		// inserts
		for ( let i = 0; i < diff.length; i++ ) {
			const action = diff[ i ][ 0 ];
			const data = diff[ i ][ 1 ];
			if ( action === DIFF_EQUAL ) {
				let start = [];
				let end = [];
				const firstWordbreak = findWordbreaks( data, false );
				const lastWordbreak = firstWordbreak === null ? null : findWordbreaks( data, true );
				const notNextStartsWithWordbreak = i !== diff.length - 1 && !isBreak( data.concat( diff[ i + 1 ][ 1 ] ), data.length );
				const notPreviousEndsWithWordBreak = i !== 0 && !isBreak( previousData.concat( data ), previousData.length );

				if ( firstWordbreak === null && ( notNextStartsWithWordbreak || notPreviousEndsWithWordBreak ) ) {
					// If there was no wordbreak, and there are no wordbreaks either side,
					// the retain should be replaced with a remove-insert
					diff.splice( i, 1, [ DIFF_DELETE, data ], [ DIFF_INSERT, data ] );
					i++;
				} else {
					if ( notNextStartsWithWordbreak ) {
						// Unless the next item starts with a wordbreak, replace the portion
						// after the last wordbreak.
						end = data.splice( lastWordbreak );
					}
					if ( notPreviousEndsWithWordBreak ) {
						// Unless the previous item ends with a word break,replace the portion
						// before the first wordbreak.
						start = data.splice( 0, firstWordbreak );
					} else {
						// Skip over close tags to ensure a balanced remove/insert
						// Word break logic should ensure that there aren't unbalanced
						// tags on the left of the remove/insert
						let j = 0;
						while ( ve.dm.LinearData.static.isCloseElementData( data[ j ] ) ) {
							j++;
						}
						start = data.splice( 0, j );
					}

					// At this point the only portion we want to retain is what's left of
					// data (if anything; if firstWordbreak === lastWordbreak !== null, then
					// data has been spliced away completely).
					if ( start.length > 0 ) {
						diff.splice( i, 0, [ DIFF_DELETE, start ], [ DIFF_INSERT, start ] );
						i += 2;
					}
					if ( end.length > 0 ) {
						diff.splice( i + 1, 0, [ DIFF_DELETE, end ], [ DIFF_INSERT, end ] );
						i += 2;
					}
				}
			}
			previousData = data;
		}

		// In a sequence of -remove-insert-remove-insert- make the removes into a
		// single action and the inserts into a single action
		for ( let i = 0, ilen = diff.length; i < ilen; i++ ) {
			const action = diff[ i ][ 0 ];
			const data = diff[ i ][ 1 ];
			if ( action === DIFF_DELETE ) {
				ve.batchPush( remove, data );
			} else if ( action === DIFF_INSERT ) {
				ve.batchPush( insert, data );
			} else if ( action === DIFF_EQUAL && data.length > 0 ) {
				if ( data.every( isWhitespace ) ) {
					ve.batchPush( remove, data );
					ve.batchPush( insert, data );
				} else {
					if ( remove.length > 0 ) {
						cleanDiff.push( [ DIFF_DELETE, remove ] );
					}
					remove = [];
					if ( insert.length > 0 ) {
						cleanDiff.push( [ DIFF_INSERT, insert ] );
					}
					insert = [];
					cleanDiff.push( diff[ i ] );
				}
			}
		}

		if ( remove.length > 0 ) {
			cleanDiff.push( [ DIFF_DELETE, remove ] );
		}
		if ( insert.length > 0 ) {
			cleanDiff.push( [ DIFF_INSERT, insert ] );
		}

		// Now go over any consecutive remove-inserts (also insert-removes?) and
		// if they have the same character data, or are modified content nodes,
		// make them changes instead
		for ( let i = 0, ilen = cleanDiff.length - 1; i < ilen; i++ ) {
			const aItem = cleanDiff[ i ];
			const bItem = cleanDiff[ i + 1 ];
			const aData = aItem[ 1 ];
			const bData = bItem[ 1 ];
			const aAction = aItem[ 0 ];
			const bAction = bItem[ 0 ];
			// If they have the same length content and they are a consecutive
			// remove and insert, and they have the same content then mark the
			// old one as a change-remove (-2) and the new one as a change-insert
			// (2)
			if (
				aData.length === bData.length &&
				( ( aAction === DIFF_DELETE && bAction === DIFF_INSERT ) || ( aAction === DIFF_INSERT && bAction === DIFF_DELETE ) )
			) {
				if ( aData.every( equalUnannotated.bind( this, bData ) ) ) {
					const aAnnotations = new ve.dm.ElementLinearData( store, aData ).getAnnotationsFromRange( new ve.Range( 0, aData.length ), true );
					const bAnnotations = new ve.dm.ElementLinearData( store, bData ).getAnnotationsFromRange( new ve.Range( 0, bData.length ), true );

					const annotationChanges = [];
					bAnnotations.get().forEach( ( b ) => {
						const sameName = aAnnotations.getAnnotationsByName( b.name );
						if ( !aAnnotations.containsComparable( b ) ) {
							if ( sameName.getLength() ) {
								// Annotations which have the same type, but are non-comparable, e.g. link with a different href
								annotationChanges.push( { oldAnnotation: sameName.get( 0 ), newAnnotation: b } );
							} else {
								annotationChanges.push( { newAnnotation: b } );
							}
						}
					} );
					aAnnotations.get().forEach( ( a ) => {
						if ( !(
							// Check the old annotation hasn't already been described as a insertion...
							bAnnotations.containsComparable( a ) ||
							// ...or a change
							bAnnotations.getAnnotationsByName( a.name ).getLength()
						) ) {
							annotationChanges.push( { oldAnnotation: a } );
						}
					} );

					if ( annotationChanges.length ) {
						cleanDiff[ i + 1 ].annotationChanges = annotationChanges;
						cleanDiff[ i ][ 0 ] = aAction === DIFF_DELETE ? DIFF_CHANGE_DELETE : DIFF_CHANGE_INSERT;
						cleanDiff[ i + 1 ][ 0 ] = bAction === DIFF_DELETE ? DIFF_CHANGE_DELETE : DIFF_CHANGE_INSERT;
					}
				}
				if ( aData.every( equalElements.bind( this, bData ) ) ) {
					const attributeChanges = [];
					bData.forEach( ( element, n ) => {
						if ( ve.dm.LinearData.static.isOpenElementData( element ) ) {
							attributeChanges.push( { oldAttributes: aData[ n ].attributes, newAttributes: element.attributes, index: n } );
						}
					} );
					if ( attributeChanges.length ) {
						cleanDiff[ i + 1 ].attributeChanges = attributeChanges;
						cleanDiff[ i ][ 0 ] = aAction === DIFF_DELETE ? DIFF_CHANGE_DELETE : DIFF_CHANGE_INSERT;
						cleanDiff[ i + 1 ][ 0 ] = bAction === DIFF_DELETE ? DIFF_CHANGE_DELETE : DIFF_CHANGE_INSERT;
					}
				}

				// No need to check bItem against the following item
				i += 1;
			}
		}

		return cleanDiff;
	}

	// Remove the close elements
	oldData = removeCloseElements( oldData );
	newData = removeCloseElements( newData );

	// Get the diff
	const finalDiff = getCleanDiff( this.diff_main( oldData, newData, options ) );

	// Re-insert the close elements
	for ( let k = 0, klen = finalDiff.length; k < klen; k++ ) {
		for ( let m = 0; m < finalDiff[ k ][ 1 ].length; m++ ) {
			if ( finalDiff[ k ][ 1 ][ m ].type ) {
				finalDiff[ k ][ 1 ].splice( m + 1, 0, {
					type: '/' + finalDiff[ k ][ 1 ][ m ].type
				} );
				m++;
			}
		}
	}

	finalDiff.timedOut = this.lastDiffTimedOut;

	return finalDiff;
};