/**
 * A bidirectional Language Converter, capable of round-tripping variant
 * conversion.
 *
 * Language conversion is as DOMPostProcessor pass, run over the
 * Parsoid-format HTML output, which may have embedded language converter
 * rules.  We first assign a (guessed) source variant to each DOM node,
 * which will be used when round-tripping the result back to the original
 * source variant.  Then for each applicable text node in the DOM, we
 * first "bracket" the text, splitting it into cleanly round-trippable
 * segments and lossy/unclean segments.  For the lossy segments we add
 * additional metadata to the output to record the original source variant
 * text to allow round-tripping (and variant-aware editing).
 *
 * Like in the PHP implementation, each individual language has a
 * dynamically-loaded subclass of `Language`, which may also have a
 * `LanguageConverter` subclass to load appropriate `ReplacementMachine`s
 * and do other language-specific customizations.
 *
 * @module
 */

'use strict';

require('../../core-upgrade.js');

const { DOMTraverser } = require('../utils/DOMTraverser.js');
const { DOMPostOrder } = require('../utils/DOMPostOrder.js');
const { DOMDataUtils } = require('../utils/DOMDataUtils.js');
const { DOMUtils } = require('../utils/DOMUtils.js');
const { Language } = require('./Language.js');
const { Util } = require('../utils/Util.js');

/**
 * An oracle that gives you a predicted "source language" for every node
 * in a DOM, which is used when converting the result back to the source
 * language during round-tripping.
 *
 * This code is unique to Parsoid; the PHP implementation does not
 * round-trip. Do not confuse this with PHP's (soon to be deprecated) method
 * `LanguageConverter::guessVariant()`, which is a heuristic used to
 * *disable* language conversion when the text is guessed to be already
 * in the desired variant.
 */
class LanguageGuesser {
	guessLang(node) { throw new Error("abstract class"); }
}

/**
 * A simple {@link LanguageGuesser} that returns the same "source language"
 * for every node.  Appropriate for wikis which by convention are written
 * in a single variant.
 */
class ConstantLanguageGuesser extends LanguageGuesser {
	constructor(langCode) {
		super();
		this.langCode = langCode;
	}
	guessLang(node) { return this.langCode; }
}

/**
 * Use a {@Link ReplacementMachine} to predict the best "source language"
 * for every node in a DOM.  Appropriate for wikis which are written
 * in a mix of variants.
 */
class MachineLanguageGuesser extends LanguageGuesser {
	constructor(machine, root, destCode) {
		super();
		const codes = machine.codes.filter(
			invertCode => machine.validCodePair(destCode, invertCode)
		);
		const countMap = new Map();
		const merge = (node, map) => {
			if (!countMap.has(node)) {
				countMap.set(node, map);
				map.set('$shared$', true);
				return;
			}
			let m = countMap.get(node);
			if (m.has('$shared$')) {
				// Clone the map (and mark the clone not-shared)
				m = new Map(
					Array.from(m.entries()).filter(e => e[0] !== '$shared$')
				);
				countMap.set(node, m);
			}
			for (const c of codes) {
				m.set(c, m.get(c) + map.get(c));
			}
		};
		DOMPostOrder(root, (node) => {
			// XXX look at `lang` attribute and use it to inform guess?
			if (DOMUtils.isText(node)) {
				countMap.set(
					node,
					new Map(
						codes.map(invertCode => [
							invertCode,
							machine.countBrackets(
								node.textContent, destCode, invertCode
							).safe
						])
					)
				);
			} else if (!node.firstChild) {
				countMap.set(node, new Map(codes.map(ic => [ic,0])));
			} else {
				// Accumulate counts from children
				for (let child = node.firstChild;
					child;
					child = child.nextSibling) {
					merge(node, countMap.get(child));
				}
			}
		});
		// Post-process the counts to yield a guess for each node.
		this.nodeMap = new Map();
		for (var [node, counts] of countMap.entries()) {
			const best = codes.map(
				(code) => { return { code, safe: counts.get(code) }; }
			).sort(
				// Sort for maximum safe chars
				(a,b) => b.safe - a.safe
			)[0].code;
			this.nodeMap.set(node, best);
		}
	}
	guessLang(node) { return this.nodeMap.get(node); }
}

function docFragToString(docFrag, force) {
	if (!force) {
		for (let child = docFrag.firstChild; child; child = child.nextSibling) {
			if (!DOMUtils.isText(child)) { return null; /* unsafe */ }
		}
	}
	return docFrag.textContent;
}

class ConversionTraverser extends DOMTraverser {
	/**
	 * @param {string} toLang
	 * @param {LanguageGuesser} guesser
	 * @param {ReplacementMachine} machine
	 */
	constructor(toLang, guesser, machine) {
		super();
		/** Target language for conversion. */
		this.toLang = toLang;
		/** Oracle to determine "original language" for round-tripping. */
		this.guesser = guesser;
		/** ReplacementMachine to do actual conversion. */
		this.machine = machine;
		/** The currently-active "original language" */
		this.fromLang = null; // will be set by BODY and P handlers

		// Handlers are applied in order they are registered.

		// No conversion inside <code>, <script>, <pre>, <cite>
		// (See adhoc regexps inside LanguageConverter.php::autoConvert)
		// XXX: <cite> ought to probably be handled more generically
		// as extension output, not special-cased as a HTML tag.
		for (const el of ['code','script','pre','cite']) {
			this.addHandler(el, (...args) => this.noConvertHandler(...args));
		}
		// Setting/saving the language context
		this.addHandler(null, (...args) => this.anyHandler(...args));
		this.addHandler('p', (...args) => this.langContextHandler(...args));
		this.addHandler('body', (...args) => this.langContextHandler(...args));
		// Converting #text, <a> nodes, and title/alt attributes
		this.addHandler('#text', (...args) => this.textHandler(...args));
		this.addHandler('a', (...args) => this.aHandler(...args));
		this.addHandler(null, (...args) => this.attrHandler(...args));
		// LanguageConverter markup
		for (const el of ['meta','div','span']) {
			this.addHandler(el, (...args) => this.lcHandler(...args));
		}
	}
	noConvertHandler(node, env, atTopLevel, tplInfo) {
		// Don't touch the inside of this node!
		return node.nextSibling;
	}
	anyHandler(node, env, atTopLevel, tplInfo) {
		/* Look for `lang` attributes */
		if (DOMUtils.isElt(node)) {
			if (node.hasAttribute('lang')) {
				const lang = node.getAttribute('lang'); // eslint-disable-line no-unused-vars
				// XXX validate lang! override fromLang?
				// this.fromLang = lang;
			}
		}
		// Continue with other handlers.
		return true;
	}
	langContextHandler(node, env, atTopLevel, tplInfo) {
		this.fromLang = this.guesser.guessLang(node);
		node.setAttribute('data-mw-variant-lang', this.fromLang);
		return true; // Continue with other handlers
	}
	textHandler(node, env, atTopLevel, tplInfo) {
		console.assert(this.fromLang !== null, "Text w/o a context");
		return this.machine.replace(node, this.toLang, this.fromLang);
	}
	aHandler(node, env, atTopLevel, tplInfo) {
		// Is this a wikilink?  If so, extract title & convert it
		const rel = node.getAttribute('rel') || '';
		if (rel === 'mw:WikiLink') {
			const href = node.getAttribute('href').replace(/^(\.\.?\/)+/, '');
			const fromPage = Util.decodeURI(href);
			const toPageFrag = this.machine.convert(
				node.ownerDocument, fromPage, this.toLang, this.fromLang
			);
			let toPage = docFragToString(toPageFrag);
			if (toPage === null) {
				// Non-reversible transform (sigh); mark this for rt.
				node.setAttribute('data-mw-variant-orig', fromPage);
				toPage = docFragToString(toPageFrag, true /* force */);
			}
			if (node.hasAttribute('title')) {
				node.setAttribute('title', toPage.replace(/_/g, ' '));
			}
			node.setAttribute('href', `./${toPage}`);
		} else if (rel === 'mw:WikiLink/Interwiki') {
			// Don't convert title or children of interwiki links
			return node.nextSibling;
		} else if (rel === 'mw:ExtLink') {
			// WTUtils.usesURLLinkSyntax uses data-parsoid, so don't use it,
			// but syntactic free links should also have class="external free"
			if (node.classList.contains('free')) {
				// Don't convert children of syntactic "free links"
				return node.nextSibling;
			}
			// Other external link text is protected from conversion iff
			// (a) it doesn't starts/end with -{ ... }-
			if (node.firstChild && DOMDataUtils.hasTypeOf(node.firstChild, 'mw:LanguageVariant')) {
				return true;
			}
			// (b) it looks like a URL (protocol-relative links excluded)
			const linkText = node.textContent; // XXX: this could be expensive
			if (Util.isProtocolValid(linkText, env) &&
				!linkText.startsWith('//')) {
				return node.nextSibling;
			}
		}
		return true;
	}
	attrHandler(node, env, atTopLevel, tplInfo) {
		// Convert `alt` and `title` attributes on elements
		// (Called before aHandler, so the `title` might get overwritten there)
		if (!DOMUtils.isElt(node)) { return true; }
		for (const attr of ['title','alt']) {
			if (!node.hasAttribute(attr)) { continue; }
			if (attr === 'title' && node.getAttribute('rel') === 'mw:WikiLink') {
				// We've already converted the title in aHandler above.
				continue;
			}
			const orig = node.getAttribute(attr);
			if (/:\/\//.test(orig)) { continue; /* Don't convert URLs */ }
			const toFrag = this.machine.convert(
				node.ownerDocument, orig, this.toLang, this.fromLang
			);
			let to = docFragToString(toFrag);
			if (to === null) {
				// Non-reversible transform (sigh); mark for rt.
				node.setAttribute(`data-mw-variant-${attr}`, orig);
				to = docFragToString(toFrag, true /* force */);
			}
			node.setAttribute(attr, to);
		}
		return true;
	}
	// LanguageConverter markup
	lcHandler(node, env, atTopLevel, tplInfo) {
		if (!DOMDataUtils.hasTypeOf(node, 'mw:LanguageVariant')) {
			return true; /* not language converter markup */
		}
		const dmv = DOMDataUtils.getJSONAttribute(node, 'data-mw-variant', {});
		if (dmv.disabled) {
			node.innerHTML = dmv.disabled.t;
			// XXX check handling of embedded data-parsoid
			// XXX check handling of nested constructs
			return node.nextSibling;
		} else if (dmv.twoway) {
			// FIXME
		} else if (dmv.oneway) {
			// FIXME
		} else if (dmv.name) {
			// FIXME
		} else if (dmv.filter) {
			// FIXME
		} else if (dmv.describe) {
			// FIXME
		}
		return true;
	}
}

/**
 * Base class for language variant conversion.
 */
class LanguageConverter {
	/**
	 * @param {Language} langobj
	 * @param {string} maincode The main language code of this language
	 * @param {string[]} variants The supported variants of this language
	 * @param {Map} variantfallbacks The fallback language of each variant
	 * @param {Map} flags Defining the custom strings that maps to the flags
	 * @param {Map} manualLevel Limit for supported variants
	 */
	constructor(langobj, maincode, variants, variantfallbacks, flags, manualLevel) {
		this.mLangObj = langobj;
		this.mMainLanguageCode = maincode;
		this.mVariants = variants; // XXX subtract disabled variants
		this.mVariantFallbacks = variantfallbacks;
		// this.mVariantNames = Language.// XXX

		// Eagerly load conversion tables.
		// XXX we could defer loading in the future.
		this.loadDefaultTables();
	}

	// We don't really support lazy loading of conversion tables, but
	// for consistency with PHP's code we'll split the load into a separate
	// abstract method.
	loadDefaultTables() {
	}

	/**
	 * Return the {@link ReplacementMachine} powering this conversion.
	 * Parsoid-specific.
	 * @return {ReplacementMachine}
	 */
	getMachine() {
		// For rough consistency with PHP, we use the field name which PHP
		// uses for its ReplacementArray.
		return this.mTables;
	}

	/**
	 * Try to return a classname from a given code.
	 * @param {string} code
	 * @param {boolean} fallback Whether we're going through language fallback
	 * @return {string} Name of the language class (if one were to exist)
	 */
	static classFromCode(code, fallback) {
		if (fallback && code === 'en') {
			return 'Language';
		} else {
			const ncode = code
				.replace(/^\w/, c => c.toUpperCase())
				.replace(/-/g, '_')
				.replace(/\/|^\.+/g, ''); // avoid path attacks
			return `Language${ncode}`;
		}
	}

	static loadLanguage(env, lang, fallback) {
		try {
			if (Language.isValidCode(lang)) {
				return require(`./${this.classFromCode(lang, fallback)}.js`);
			}
		} catch (e) { /* fall through */ }
		env.log(
			"info",
			`Couldn\'t load language: ${lang} fallback=${!!fallback}`
		);
		return Language;
	}

	findVariantLink(link, nt, ignoreOtherCond) {
		// XXX unimplemented
		return { nt, link };
	}
	translate(fromVariant, text, toVariant) {
		// XXX unimplemented
	}
	guessVariant(text, variant) { return false; }

	static maybeConvert(env, doc, targetVariant, sourceVariant) {
		// language converter must be enabled for the pagelanguage
		if (!env.langConverterEnabled()) { return; }
		// targetVariant must be specified, and a language-with-variants
		if (!(targetVariant && env.conf.wiki.variants.has(targetVariant))) {
			return;
		}
		// targetVariant must not be a base language code
		if (env.conf.wiki.variants.get(targetVariant).base === targetVariant) {
			// XXX in the future we probably want to go ahead and expand
			// empty <span>s left by -{...}- constructs, etc.
			return;
		}

		// Record the fact that we've done conversion to targetVariant
		env.page.setVariant(targetVariant);
		// But don't actually do the conversion if __NOCONTENTCONVERT__
		if (doc.querySelector(
			'meta[property="mw:PageProp/nocontentconvert"]'
		)) {
			return;
		}
		// OK, convert!
		this.baseToVariant(env, doc.body, targetVariant, sourceVariant);
	}

	/**
	 * Convert a text in the "base variant" to a specific variant, given
	 * by `targetVariant`.  If `sourceVariant` is given, assume that the
	 * input wikitext is in `sourceVariant` to construct round-trip
	 * metadata, instead of using a heuristic to guess the best variant
	 * for each DOM subtree of wikitext.
	 *
	 * @param {MWParserEnvironment} env
	 * @param {Node} rootNode The root node of a fragment to convert.
	 * @param {string} targetVariant The variant to be used for the output
	 *   DOM.
	 * @param {string} [sourceVariant] An optional variant assumed for
	 *   the input DOM in order to create roundtrip metadata.
	 */
	static baseToVariant(env, rootNode, targetVariant, sourceVariant) {
		const pageLangCode = env.page.pagelanguage || env.conf.wiki.lang || 'en';
		let guesser;

		const lang = new (this.loadLanguage(env, pageLangCode))();
		const langconv = lang.getConverter();
		// XXX we might want to lazily-load conversion tables here.

		// Check the the target variant is valid (and implemented!)
		const validTarget = langconv && langconv.getMachine() &&
			langconv.getMachine().codes.includes(targetVariant);
		if (!validTarget) {
			// XXX create a warning header? (T197949)
			env.log('info', `Unimplemented variant: ${targetVariant}`);
			return; /* no conversion */
		}

		const metrics = env.conf.parsoid.metrics;
		let startTime;
		if (metrics) {
			startTime = Date.now();
			metrics.increment('langconv.count');
			metrics.increment(`langconv.${targetVariant}.count`);
		}

		// XXX Eventually we'll want to consult some wiki configuration to
		// decide whether a ConstantLanguageGuesser is more appropriate.
		if (sourceVariant) {
			guesser = new ConstantLanguageGuesser(sourceVariant);
		} else {
			guesser = new MachineLanguageGuesser(
				langconv.getMachine(), rootNode, targetVariant
			);
		}
		new ConversionTraverser(targetVariant, guesser, langconv.getMachine())
			.traverse(rootNode, env, null, true);

		if (metrics) {
			metrics.endTiming('langconv.total', startTime);
			metrics.endTiming(`langconv.${targetVariant}.total`, startTime);
		}
	}
}

module.exports.LanguageConverter = LanguageConverter;