/**
 * Perform post-processing steps on an already-built HTML DOM.
 * @module
 */

'use strict';

require('../../core-upgrade');

var events = require('events');
var url = require('url');
var util = require('util');

var ContentUtils = require('../utils/ContentUtils.js').ContentUtils;
var DOMDataUtils = require('../utils/DOMDataUtils.js').DOMDataUtils;
var Util = require('../utils/Util.js').Util;
var DOMTraverser = require('../utils/DOMTraverser.js').DOMTraverser;
var Promise = require('../utils/promise.js');
var JSUtils = require('../utils/jsutils.js').JSUtils;

// processors
var requireProcessor = function(p) {
	return require('./pp/processors/' + p + '.js')[p];
};
var AddExtLinkClasses = requireProcessor('AddExtLinkClasses');
var AddMediaInfo = requireProcessor('AddMediaInfo');
var AddRedLinks = requireProcessor('AddRedLinks');
var ComputeDSR = requireProcessor('ComputeDSR');
var HandlePres = requireProcessor('HandlePres');
var LangConverter = requireProcessor('LangConverter');
var Linter = requireProcessor('Linter');
var MarkFosteredContent = requireProcessor('MarkFosteredContent');
var MigrateTemplateMarkerMetas = requireProcessor('MigrateTemplateMarkerMetas');
var MigrateTrailingNLs = requireProcessor('MigrateTrailingNLs');
var Normalize = requireProcessor('Normalize');
var ProcessTreeBuilderFixups = requireProcessor('ProcessTreeBuilderFixups');
var PWrap = requireProcessor('PWrap');
var WrapSections = requireProcessor('WrapSections');
var WrapTemplates = requireProcessor('WrapTemplates');

// handlers
var requireHandlers = function(h) {
	return require('./pp/handlers/' + h + '.js')[h];
};
var CleanUp = requireHandlers('CleanUp');
var DedupeStyles = requireHandlers('DedupeStyles');
var HandleLinkNeighbours = requireHandlers('HandleLinkNeighbours');
var Headings = requireHandlers('Headings');
var LiFixups = requireHandlers('LiFixups');
var TableFixups = requireHandlers('TableFixups');
var UnpackDOMFragments = requireHandlers('UnpackDOMFragments');

/**
 * @class
 * @extends EventEmitter
 * @param {MWParserEnvironment} env
 * @param {Object} options
 */
function DOMPostProcessor(env, options) {
	events.EventEmitter.call(this);

	this.env = env;
	this.options = Object.assign({ frame:env.topFrame }, options);
	this.seenIds = new Set();

	const tableFixer = new TableFixups(env);

	/* ---------------------------------------------------------------------------
	 * FIXME:
	 * 1. PipelineFactory caches pipelines per env
	 * 2. PipelineFactory.parse uses a default cache key
	 * 3. ParserTests uses a shared/global env object for all tests.
	 * 4. ParserTests also uses PipelineFactory.parse (via env.getContentHandler())
	 *    => the pipeline constructed for the first test that runs wt2html
	 *       is used for all subsequent wt2html tests
	 * 5. If we are selectively turning on/off options on a per-test basis
	 *    in parser tests, those options won't work if those options are
	 *    also used to configure pipeline construction (including which DOM passes
	 *    are enabled).
	 *
	 *    Ex: if (env.wrapSections) { addPP('wrapSections', wrapSections); }
	 *
	 *    This won't do what you expect it to do. This is primarily a
	 *    parser tests script issue -- but given the abstraction layers that
	 *    are on top of the parser pipeline construction, fixing that is
	 *    not straightforward right now. So, this note is a warning to future
	 *    developers to pay attention to how they construct pipelines.
	 * --------------------------------------------------------------------------- */

	let processors = [
		// Common post processing
		{
			Processor: MarkFosteredContent,
			shortcut: 'fostered',
		},
		{
			Processor: ProcessTreeBuilderFixups,
			shortcut: 'process-fixups',
		},
		{
			Processor: Normalize,
		},
		{
			Processor: PWrap,
			shortcut: 'pwrap',
			skipNested: true,
		},
		// Run this after 'ProcessTreeBuilderFixups' because the mw:StartTag
		// and mw:EndTag metas would otherwise interfere with the
		// firstChild/lastChild check that this pass does.
		{
			Processor: MigrateTemplateMarkerMetas,
			shortcut: 'migrate-metas',
		},
		{
			Processor: HandlePres,
			shortcut: 'pres',
		},
		{
			Processor: MigrateTrailingNLs,
			shortcut: 'migrate-nls',
		},
		// dsr computation and tpl encap are only relevant for top-level content
		{
			Processor: ComputeDSR,
			shortcut: 'dsr',
			omit: options.inTemplate,
		},
		{
			Processor: WrapTemplates,
			shortcut: 'tplwrap',
			omit: options.inTemplate,
		},
		// 1. Link prefixes and suffixes
		// 2. Unpack DOM fragments
		// FIXME: Picked a terse 'v' varname instead of trying to find
		// a suitable name that addresses both functions above.
		{
			name: 'HandleLinkNeighbours,UnpackDOMFragments',
			shortcut: 'dom-unpack',
			isTraverser: true,
			handlers: [
				{
					nodeName: 'a',
					action: HandleLinkNeighbours.handleLinkNeighbours,
				},
				{
					nodeName: null,
					action: UnpackDOMFragments.unpackDOMFragments,
				},
			],
		},
	];

	// FIXME: There are two potential ordering problems here.
	//
	// 1. unpackDOMFragment should always run immediately
	//    before these extensionPostProcessors, which we do currently.
	//    This ensures packed content get processed correctly by extensions
	//    before additional transformations are run on the DOM.
	//
	// This ordering issue is handled through documentation.
	//
	// 2. This has existed all along (in the PHP parser as well as Parsoid
	//    which is probably how the ref-in-ref hack works - because of how
	//    parser functions and extension tags are procesed, #tag:ref doesn't
	//    see a nested ref anymore) and this patch only exposes that problem
	//    more clearly with the sealFragment property.
	//
	// * Consider the set of extensions that
	//   (a) process wikitext
	//   (b) provide an extensionPostProcessor
	//   (c) run the extensionPostProcessor only on the top-level
	//   As of today, there is exactly one extension (Cite) that has all
	//   these properties, so the problem below is a speculative problem
	//   for today. But, this could potentially be a problem in the future.
	//
	// * Let us say there are at least two of them, E1 and E2 that
	//   support extension tags <e1> and <e2> respectively.
	//
	// * Let us say in an instance of <e1> on the page, <e2> is present
	//   and in another instance of <e2> on the page, <e1> is present.
	//
	// * In what order should E1's and E2's extensionPostProcessors be
	//   run on the top-level? Depending on what these handlers do, you
	//   could get potentially different results. You can see this quite
	//   starkly with the sealFragment flag.
	//
	// * The ideal solution to this problem is to require that every extension's
	//   extensionPostProcessor be idempotent which lets us run these
	//   post processors repeatedly till the DOM stabilizes. But, this
	//   still doesn't necessarily guarantee that ordering doesn't matter.
	//   It just guarantees that with the sealFragment flag set on
	//   multiple extensions, all sealed fragments get fully processed.
	//   So, we still need to worry about that problem.
	//
	//   But, idempotence *could* potentially be a sufficient property in most cases.
	//   To see this, consider that there is a Footnotes extension which is similar
	//   to the Cite extension in that they both extract inline content in the
	//   page source to a separate section of output and leave behind pointers to
	//   the global section in the output DOM. Given this, the Cite and Footnote
	//   extension post processors would essentially walk the dom and
	//   move any existing inline content into that global section till it is
	//   done. So, even if a <footnote> has a <ref> and a <ref> has a <footnote>,
	//   we ultimately end up with all footnote content in the footnotes section
	//   and all ref content in the references section and the DOM stabilizes.
	//   Ordering is irrelevant here.
	//
	//   So, perhaps one way of catching these problems would be in code review
	//   by analyzing what the DOM postprocessor does and see if it introduces
	//   potential ordering issues.
	env.conf.wiki.extConfig.domProcessors.forEach((extProcs) => {
		processors.push({
			name: 'tag:' + extProcs.extName,
			Processor: extProcs.procs.wt2htmlPostProcessor,
		});
	});

	processors = processors.concat([
		{
			name: 'LiFixups,TableFixups,DedupeStyles',
			shortcut: 'fixups',
			isTraverser: true,
			skipNested: true,
			handlers: [
				// 1. Deal with <li>-hack and move trailing categories in <li>s out of the list
				{
					nodeName: 'li',
					action: LiFixups.handleLIHack,
				},
				{
					nodeName: 'li',
					action: LiFixups.migrateTrailingCategories,
				},
				{
					nodeName: 'dt',
					action: LiFixups.migrateTrailingCategories,
				},
				{
					nodeName: 'dd',
					action: LiFixups.migrateTrailingCategories,
				},
				// 2. Fix up issues from templated table cells and table cell attributes
				{
					nodeName: 'td',
					action: (node, env) => tableFixer.stripDoubleTDs(node, this.options.frame),
				},
				{
					nodeName: 'td',
					action: (node, env) => tableFixer.handleTableCellTemplates(node, this.options.frame),
				},
				{
					nodeName: 'th',
					action: (node, env) => tableFixer.handleTableCellTemplates(node, this.options.frame),
				},
				// 3. Deduplicate template styles
				//   (should run after dom-fragment expansion + after extension post-processors)
				{
					nodeName: 'style',
					action: DedupeStyles.dedupe,
				},
			],
		},
		// This is run at all levels since, for now, we don't have a generic
		// solution to running top level passes on HTML stashed in data-mw.
		// See T214994 for that.
		//
		// Also, the gallery extension's "packed" mode would otherwise need a
		// post-processing pass to scale media after it has been fetched.  That
		// introduces an ordering dependency that may or may not complicate things.
		{
			Processor: AddMediaInfo,
			shortcut: 'media',
		},
		// Benefits from running after determining which media are redlinks
		{
			name: 'Headings-genAnchors',
			shortcut: 'headings',
			isTraverser: true,
			skipNested: true,
			handlers: [
				{
					nodeName: null,
					action: Headings.genAnchors,
				},
			],
		},
		// Add <section> wrappers around sections
		{
			Processor: WrapSections,
			shortcut: 'sections',
			skipNested: true,
		},
		// Make heading IDs unique
		{
			name: 'Headings-dedupeHeadingIds',
			shortcut: 'heading-ids',
			isTraverser: true,
			skipNested: true,
			handlers: [
				{
					nodeName: null,
					action: (node, env) => Headings.dedupeHeadingIds(this.seenIds, node, env),
				},
			],
		},
		{
			Processor: Linter,
			omit: !env.conf.parsoid.linting,
			skipNested: true,
		},
		// Strip marker metas -- removes left over marker metas (ex: metas
		// nested in expanded tpl/extension output).
		{
			name: 'CleanUp-stripMarkerMetas',
			shortcut: 'strip-metas',
			isTraverser: true,
			handlers: [
				{
					nodeName: 'meta',
					action: CleanUp.stripMarkerMetas,
				},
			],
		},
		{
			Processor: AddExtLinkClasses,
			shortcut: 'linkclasses',
			skipNested: true,
		},
		{
			name: 'CleanUp-handleEmptyElts,CleanUp-cleanupAndSaveDataParsoid',
			shortcut: 'cleanup',
			isTraverser: true,
			handlers: [
				// Strip empty elements from template content
				{
					nodeName: null,
					action: CleanUp.handleEmptyElements,
				},
				// Save data.parsoid into data-parsoid html attribute.
				// Make this its own thing so that any changes to the DOM
				// don't affect other handlers that run alongside it.
				{
					nodeName: null,
					action: CleanUp.cleanupAndSaveDataParsoid,
				},
			],
		},
		// Language conversion
		{
			Processor: LangConverter,
			shortcut: 'lang-converter',
			skipNested: true,
		},
		// (Optional) red links
		{
			Processor: AddRedLinks,
			shortcut: 'redlinks',
			omit: !env.conf.parsoid.useBatchAPI,
			skipNested: true,
		},
	]);

	this.processors = processors.filter((p) => {
		if (p.omit) { return false; }
		if (!p.name) { p.name = p.Processor.name; }
		if (!p.shortcut) { p.shortcut = p.name; }
		if (p.isTraverser) {
			const t = new DOMTraverser();
			p.handlers.forEach(h => t.addHandler(h.nodeName, h.action));
			p.proc = (...args) => t.traverse(...args);
		} else {
			const c = new p.Processor();
			p.proc = (...args) => c.run(...args);
		}
		return true;
	});
}

// Inherit from EventEmitter
util.inherits(DOMPostProcessor, events.EventEmitter);

/**
 * Debugging aid: set pipeline id
 */
DOMPostProcessor.prototype.setPipelineId = function(id) {
	this.pipelineId = id;
};

DOMPostProcessor.prototype.setSourceOffsets = function(start, end) {
	this.options.sourceOffsets = [start, end];
};

DOMPostProcessor.prototype.setFrame = function(parentFrame, title, args, srcText) {
	if (parentFrame) {
		if (title === null) {
			this.options.frame = parentFrame.newChild(parentFrame.title, parentFrame.args, srcText);
		} else {
			this.options.frame = parentFrame.newChild(title, args, srcText);
		}
	} else {
		this.options.frame = this.env.topFrame.newChild(title, args, srcText);
	}
};

DOMPostProcessor.prototype.resetState = function(opts) {
	this.atTopLevel = opts && opts.toplevel;
	this.env.page.meta.displayTitle = null;
	this.seenIds.clear();
};

// map from mediawiki metadata names to RDFa property names
var metadataMap = {
	ns: {
		property: 'mw:pageNamespace',
		content: '%d',
	},
	id: {
		property: 'mw:pageId',
		content: '%d',
	},

	// DO NOT ADD rev_user, rev_userid, and rev_comment (See T125266)

	// 'rev_revid' is used to set the overall subject of the document, we don't
	// need to add a specific <meta> or <link> element for it.

	rev_parentid: {
		rel: 'dc:replaces',
		resource: 'mwr:revision/%d',
	},
	rev_timestamp: {
		property: 'dc:modified',
		content: function(m) {
			return new Date(m.get('rev_timestamp')).toISOString();
		},
	},
	rev_sha1: {
		property: 'mw:revisionSHA1',
		content: '%s',
	},
};

/**
 * Create an element in the document.head with the given attrs.
 */
function appendToHead(document, tagName, attrs) {
	var elt = document.createElement(tagName);
	DOMDataUtils.addAttributes(elt, attrs || Object.create(null));
	document.head.appendChild(elt);
}

// FIXME: consider moving to DOMUtils or MWParserEnvironment.
DOMPostProcessor.addMetaData = function(env, document) {
	// add <head> element if it was missing
	if (!document.head) {
		document.documentElement
			.insertBefore(document.createElement('head'), document.body);
	}

	// add mw: and mwr: RDFa prefixes
	var prefixes = [
		'dc: http://purl.org/dc/terms/',
		'mw: http://mediawiki.org/rdf/',
	];
	// add 'https://' to baseURI if it was missing
	var mwrPrefix = url.resolve('https://',
		env.conf.wiki.baseURI + 'Special:Redirect/');
	document.documentElement.setAttribute('prefix', prefixes.join(' '));
	document.head.setAttribute('prefix', 'mwr: ' + mwrPrefix);

	// add <head> content based on page meta data:

	// Set the charset first.
	appendToHead(document, 'meta', { charset: 'utf-8' });

	// collect all the page meta data (including revision metadata) in 1 object
	var m = new Map();
	Object.keys(env.page.meta || {}).forEach(function(k) {
		m.set(k, env.page.meta[k]);
	});
	// include some other page properties
	["ns", "id"].forEach(function(p) {
		m.set(p, env.page[p]);
	});
	var rev = m.get('revision');
	Object.keys(rev || {}).forEach(function(k) {
		m.set('rev_' + k, rev[k]);
	});
	// use the metadataMap to turn collected data into <meta> and <link> tags.
	m.forEach(function(g, f) {
		var mdm = metadataMap[f];
		if (!m.has(f) || m.get(f) === null || m.get(f) === undefined || !mdm) {
			return;
		}
		// generate proper attributes for the <meta> or <link> tag
		var attrs = Object.create(null);
		Object.keys(mdm).forEach(function(k) {
			// evaluate a function, or perform sprintf-style formatting, or
			// use string directly, depending on value in metadataMap
			var v = (typeof (mdm[k]) === 'function') ? mdm[k](m) :
				mdm[k].indexOf('%') >= 0 ? util.format(mdm[k], m.get(f)) :
				mdm[k];
			attrs[k] = v;
		});
		// <link> is used if there's a resource or href attribute.
		appendToHead(document,
			(attrs.resource || attrs.href) ? 'link' : 'meta',
			attrs);
	});
	if (m.has('rev_revid')) {
		document.documentElement.setAttribute(
			'about', mwrPrefix + 'revision/' + m.get('rev_revid'));
	}

	// Normalize before comparison
	if (env.conf.wiki.mainpage.replace(/_/g, ' ') === env.page.name.replace(/_/g, ' ')) {
		appendToHead(document, 'meta', {
			'property': 'isMainPage',
			'content': true,
		});
	}

	// Set the parsoid content-type strings
	// FIXME: Should we be using http-equiv for this?
	appendToHead(document, 'meta', {
		'property': 'mw:html:version',
		'content': env.outputContentVersion,
	});
	var wikiPageUrl = env.conf.wiki.baseURI +
		env.page.name.split('/').map(encodeURIComponent).join('/');
	appendToHead(document, 'link',
		{ rel: 'dc:isVersionOf', href: wikiPageUrl });

	document.title = env.page.meta.displayTitle || env.page.meta.title || '';

	// Add base href pointing to the wiki root
	appendToHead(document, 'base', { href: env.conf.wiki.baseURI });

	// Hack: link styles
	var modules = new Set([
		'mediawiki.legacy.commonPrint,shared',
		'mediawiki.skinning.content.parsoid',
		'mediawiki.skinning.interface',
		'skins.vector.styles',
		'site.styles',
	]);
	// Styles from native extensions
	env.conf.wiki.extConfig.styles.forEach(function(mo) {
		modules.add(mo);
	});
	// Styles from modules returned from preprocessor / parse requests
	if (env.page.extensionModuleStyles) {
		env.page.extensionModuleStyles.forEach(function(mo) {
			modules.add(mo);
		});
	}
	var styleURI = env.getModulesLoadURI() +
		'?modules=' + encodeURIComponent(Array.from(modules).join('|')) + '&only=styles&skin=vector';
	appendToHead(document, 'link', { rel: 'stylesheet', href: styleURI });

	// Stick data attributes in the head
	if (env.pageBundle) {
		DOMDataUtils.injectPageBundle(document, DOMDataUtils.getPageBundle(document));
	}

	// html5shiv
	var shiv = document.createElement('script');
	var src =  env.getModulesLoadURI() + '?modules=html5shiv&only=scripts&skin=vector&sync=1';
	shiv.setAttribute('src', src);
	var fi = document.createElement('script');
	fi.appendChild(document.createTextNode('html5.addElements(\'figure-inline\');'));
	var comment = document.createComment(
		'[if lt IE 9]>' + shiv.outerHTML + fi.outerHTML + '<![endif]'
	);
	document.head.appendChild(comment);

	var lang = env.page.pagelanguage || env.conf.wiki.lang || 'en';
	var dir = env.page.pagelanguagedir || (env.conf.wiki.rtl ? "rtl" : "ltr");

	// Indicate whether LanguageConverter is enabled, so that downstream
	// caches can split on variant (if necessary)
	appendToHead(document, 'meta', {
		'http-equiv': 'content-language',
		'content': env.htmlContentLanguage(),
	});
	appendToHead(document, 'meta', {
		'http-equiv': 'vary',
		'content': env.htmlVary(),
	});

	// Indicate language & directionality on body
	document.body.setAttribute('lang', Util.bcp47(lang));
	document.body.classList.add('mw-content-' + dir);
	document.body.classList.add('sitedir-' + dir);
	document.body.classList.add(dir);
	document.body.setAttribute('dir', dir);

	// Set 'mw-body-content' directly on the body.
	// This is the designated successor for #bodyContent in core skins.
	document.body.classList.add('mw-body-content');
	// Set 'parsoid-body' to add the desired layout styling from Vector.
	document.body.classList.add('parsoid-body');
	// Also, add the 'mediawiki' class.
	// Some Mediawiki:Common.css seem to target this selector.
	document.body.classList.add('mediawiki');
	// Set 'mw-parser-output' directly on the body.
	// Templates target this class as part of the TemplateStyles RFC
	document.body.classList.add('mw-parser-output');
};

function processDumpFlag(atTopLevel, psd, body, shortcut, opts, preOrPost) {
	if (psd.dumpFlags && psd.dumpFlags.has('dom:' + preOrPost + '-' + shortcut)) {
		ContentUtils.dumpDOM(body, 'DOM: ' + preOrPost + '-' + shortcut, opts);
	}
}

DOMPostProcessor.prototype.doPostProcess = Promise.async(function *(document) {
	var env = this.env;

	var psd = env.conf.parsoid;
	if (psd.dumpFlags && psd.dumpFlags.has("dom:post-builder")) {
		ContentUtils.dumpDOM(document.body, 'DOM: after tree builder');
	}

	var tracePP = psd.traceFlags && (psd.traceFlags.has("time/dompp") || psd.traceFlags.has("time"));
	var haveDumpFlags = psd.dumpFlags;

	var startTime, endTime, prefix, logLevel, resourceCategory;
	if (tracePP) {
		if (this.atTopLevel) {
			prefix = "TOP";
			// Turn off DOM pass timing tracing on non-top-level documents
			logLevel = "trace/time/dompp";
			resourceCategory = "DOMPasses:TOP";
		} else {
			prefix = "---";
			logLevel = "debug/time/dompp";
			resourceCategory = "DOMPasses:NESTED";
		}
		startTime = JSUtils.startTime();
		env.log(logLevel, prefix + "; start=" + startTime);
	}

	for (var i = 0; i < this.processors.length; i++) {
		var pp = this.processors[i];
		if (pp.skipNested && !this.atTopLevel) {
			continue;
		}
		try {
			// Trace
			var ppStart, ppElapsed, ppName;
			if (tracePP) {
				ppName = pp.name + ' '.repeat(pp.name.length < 30 ? 30 - pp.name.length : 0);
				ppStart = JSUtils.startTime();
				env.log(logLevel, prefix + "; " + ppName + " start");
			}
			var opts;
			if (haveDumpFlags) {
				opts = {
					env: env,
					dumpFragmentMap: this.atTopLevel,
					keepTmp: true
				};
				processDumpFlag(this.atTopLevel, psd, document.body, pp.shortcut, opts, 'pre');
			}
			var ret = pp.proc(document.body, env, this.options, this.atTopLevel);
			if (ret) {
				// Processors can return a Promise iff they need to be async.
				yield ret;
			}
			if (haveDumpFlags) {
				processDumpFlag(this.atTopLevel, psd, document.body, pp.shortcut, opts, 'post');
			}
			if (tracePP) {
				ppElapsed = JSUtils.elapsedTime(ppStart);
				env.log(logLevel, prefix + "; " + ppName + " end; time = " + ppElapsed.toFixed(5));
				env.bumpTimeUse(resourceCategory, ppElapsed, 'DOM');
			}
		} catch (e) {
			env.log('fatal', e);
			return;
		}
	}
	if (tracePP) {
		endTime = JSUtils.startTime();
		env.log(logLevel, prefix + "; end=" + endTime.toFixed(5) + "; time = " + JSUtils.elapsedTime(startTime).toFixed(5));
	}

	// For sub-pipeline documents, we are done.
	// For the top-level document, we generate <head> and add it.
	if (this.atTopLevel) {
		DOMPostProcessor.addMetaData(env, document);
		if (psd.traceFlags && psd.traceFlags.has('time')) {
			env.printTimeProfile();
		}
		if (psd.dumpFlags && psd.dumpFlags.has('wt2html:limits')) {
			env.printWt2HtmlResourceUsage({ 'HTML Size': document.outerHTML.length });
		}
	}

	this.emit('document', document);
});

/**
 * Register for the 'document' event, normally emitted from the HTML5 tree
 * builder.
 */
DOMPostProcessor.prototype.addListenersOn = function(emitter) {
	emitter.addListener('document', (document) => {
		this.doPostProcess(document).done();
	});
};

if (typeof module === "object") {
	module.exports.DOMPostProcessor = DOMPostProcessor;
}