Source: config/MWParserEnvironment.js: JSDoc

/**
 * Main parser environment object.  Holds configuration data that isn't
 * modified at runtime, debugging objects, a page object that represents
 * the article we're parsing, and more.
 *
 * @module
 */

'use strict';

require('../../core-upgrade.js');

var semver = require('semver');
var Title = require('mediawiki-title').Title;
var Promise = require('../utils/promise.js');
var WikiConfig = require('./WikiConfig.js').WikiConfig;
var ConfigRequest = require('../mw/ApiRequest.js').ConfigRequest;
var Batcher = require('../mw/Batcher.js').Batcher;
var ContentUtils = require('../utils/ContentUtils.js').ContentUtils;
var DOMUtils = require('../utils/DOMUtils.js').DOMUtils;
var DOMDataUtils = require('../utils/DOMDataUtils.js').DOMDataUtils;
var Util = require('../utils/Util.js').Util;
var JSUtils = require('../utils/jsutils.js').JSUtils;
var TokenUtils = require('../utils/TokenUtils.js').TokenUtils;
var PipelineUtils = require('../utils/PipelineUtils.js').PipelineUtils;
var ParserPipelineFactory = require('../wt2html/parser.js').ParserPipelineFactory;
var LintLogger = require('../logger/LintLogger.js').LintLogger;
var ParsoidLogger = require('../logger/ParsoidLogger.js').ParsoidLogger;
var Sanitizer = require('../wt2html/tt/Sanitizer.js').Sanitizer;

const { Frame } = require('../wt2html/Frame.js');

/**
 * Represents the title, language, and other properties of a given article.
 *
 * @class
 */
var Page = function() {
	this.reset();
};

/**
 * The "true" url-decoded title; ie without any url-encoding which
 * might be necessary if the title were referenced in wikitext.
 *
 * @property {string} name
 */
Page.prototype.name = '';

/**
 * Any leading ..?/ strings that will be necessary for building links.
 *
 * @property {string} relativeLinkPrefix
 */
Page.prototype.relativeLinkPrefix = '';

/**
 * The page's ID.  Don't get this confused w/ `meta.revision.revid`
 * At present, it's only used in diff marking.
 *
 * @property {Number} id
 */
Page.prototype.id = -1;

/**
 * Start as null to distinguish the empty string.
 *
 * @property {string|null} src
 */
Page.prototype.src = null;

/**
 * @property {Node|null} dom
 */
Page.prototype.dom = null;

/**
 * @property {Number} ns
 */
Page.prototype.ns = 0;

/**
 * A full Title object.
 * @property {Object|null} title
 */
Page.prototype.title = null;

/**
 * The page language code, in mediawiki format.
 * Use `DOMUtils.BCP47()` to turn this into a proper BCP47 code
 * suitable for inclusion in HTML5.
 * @property {string|null} pagelanguage
 */
Page.prototype.pagelanguage = null;

/**
 * The page directionality.  Either `ltr` or `rtl`.
 * @property {string|null} pagelanguagedir
 */
Page.prototype.pagelanguagedir = null;

Page.prototype.reset = function() {
	this.meta = { revision: {} };
	this.setVariant(null);
};

Page.prototype.getContentModel = function() {
	// defaults to 'wikitext'
	return this.meta.revision.contentmodel || 'wikitext';
};

/**
 * Does this page's content model have content that is lintable?
 */
Page.prototype.hasLintableContentModel = function() {
	var contentmodel = this.getContentModel();

	// wikitext or anything that uses wikitext for content blobs
	return contentmodel === 'wikitext' || contentmodel === 'proofread-page';
};

Page.prototype.setVariant = function(code) {
	this.htmlVariant = code;
};

/**
 * Holds configuration data that isn't modified at runtime, debugging objects,
 * a page object that represents the page we're parsing, and more.
 * The title of the page is held in `this.page.name` and is stored
 * as a "true" url-decoded title, ie without any url-encoding which
 * might be necessary if the title were referenced in wikitext.
 *
 * Should probably be constructed with {@link .getParserEnv}.
 *
 * @class
 * @param {ParsoidConfig} parsoidConfig
 * @param {Object} [options]
 */
var MWParserEnvironment = function(parsoidConfig, options) {
	options = options || {};

	// page information
	this.page = new Page();
	this.topFrame = new Frame(null, this, [], '<uninitialized frame>');
	// XXX create `this.currentFrame` from TokenTransformManager#frame
	// once we've removed all async parsing.

	Object.assign(this, options);

	// Record time spent in various passes
	this.timeProfile = {};
	this.ioProfile = {};
	this.mwProfile = {};
	this.timeCategories = {};
	this.counts = {};

	// execution state
	this.setCaches({});

	// Configuration
	this.conf = {
		parsoid: parsoidConfig,
		wiki: null,
	};

	// FIXME: This is temporary and will be replaced after the call to
	// `switchToConfig`.  However, it may somehow be used in the
	// `ConfigRequest` along the way. Perhaps worth seeing if that can be
	// eliminated so `WikiConfig` can't be instantiated without a `resultConf`.
	console.assert(parsoidConfig.mwApiMap.has(options.prefix));
	this.conf.wiki = new WikiConfig(parsoidConfig, null, options.prefix);

	this.configureLogging();

	// FIXME: Continuing with the line above, we can't initialize a specific
	// page until we have the correct wiki config, since things like
	// namespace aliases may not be same as the baseconfig, which has an
	// effect on what are considered valid titles.  At present, the only
	// consequence should be that failed config requests would all be
	// attributed to the mainpage, which doesn't seem so bad.
	this.initializeForPageName(this.conf.wiki.mainpage);

	this.pipelineFactory = new ParserPipelineFactory(this);

	// Outstanding page requests (for templates etc)
	this.requestQueue = {};

	this.batcher = new Batcher(this);

	this.setResourceLimits();

	// Fragments have had `storeDataAttribs` called on them
	this.fragmentMap = new Map();
	this.fid = 1;
};

// NOTE: Here's the spot to stuff references to $doc in the PHP port.
MWParserEnvironment.prototype.referenceDataObject = function(doc, bag) {
	DOMDataUtils.setDocBag(doc, bag);
};

MWParserEnvironment.prototype.createDocument = function(html) {
	const doc = DOMUtils.parseHTML(html);
	this.referenceDataObject(doc);
	return doc;
};

MWParserEnvironment.prototype.setFragment = function(nodes) {
	var oid = 'mwf' + this.fid++;
	this.fragmentMap.set(oid, nodes);
	return oid;
};

MWParserEnvironment.prototype.setResourceLimits = function() {
	// This tracks resource usage in the parser
	var limits = this.conf.parsoid.limits;
	this.limits = {
		wt2html: {
			// The current resource limit strings seem to conveniently
			// fit the ('max' + capitalize(limit) + 's') convention.
			// We have one exception in the form of 'wikitextSize'.
			// Overall, I am not sure if this convention will hold in the
			// future but printWt2HtmlResourceUse exploits this convention.
			token: limits.wt2html.maxTokens,
			listItem: limits.wt2html.maxListItems,
			tableCell: limits.wt2html.maxTableCells,
			transclusion: limits.wt2html.maxTransclusions,
			image: limits.wt2html.maxImages,
			wikitextSize: limits.wt2html.maxWikitextSize,
		},
		html2wt: {
			htmlSize: limits.html2wt.maxHTMLSize,
		},
	};
};

MWParserEnvironment.prototype.bumpProfileTimeUse = function(profile, resource, time, cat) {
	if (!profile[resource]) {
		profile[resource] = 0;
	}
	profile[resource] += time;

	if (cat) {
		if (!this.timeCategories[cat]) {
			this.timeCategories[cat] = 0;
		}
		this.timeCategories[cat] += time;
	}
};

MWParserEnvironment.prototype.bumpTimeUse = function(resource, time, cat) {
	this.bumpProfileTimeUse(this.timeProfile, resource, time, cat);
};

MWParserEnvironment.prototype.bumpMWTime = function(resource, time, cat) {
	this.bumpProfileTimeUse(this.mwProfile, resource, time, cat);
};

MWParserEnvironment.prototype.bumpIOTime = function(resource, time, cat) {
	this.bumpProfileTimeUse(this.ioProfile, resource, time, cat);
};

MWParserEnvironment.prototype.bumpCount = function(resource, n) {
	if (!this.counts[resource]) {
		this.counts[resource] = 0;
	}
	if (!n) { n = 1; } // DEFAULT
	this.counts[resource] += n;
};

function formatLine(k, v, comment) {
	if (v === Math.round(v)) {
		return k.padStart(40) + ': ' + JSON.stringify(v).padStart(14) + (comment ? ' (' + comment + ')' : '');
	} else {
		return k.padStart(40) + ': ' + v.toFixed(5).padStart(14) + (comment ? ' (' + comment + ')' : '');
	}
}

MWParserEnvironment.prototype._formatProfile = function(profile, options) {
	if (!options) {
		options = {};
	}

	// Sort time profile in descending order
	var k, v;
	var total = 0;
	var outLines = [];
	for (k in profile) {
		v = profile[k];
		total += v;
		outLines.push([k, v]);
	}

	outLines.sort(function(a, b) {
		return b[1] - a[1];
	});

	var lines = [];
	for (var i = 0; i < outLines.length; i++) {
		k = outLines[i][0];
		v = outLines[i][1];
		let lineComment = '';
		if (options.printPercentage) {
			lineComment = Math.round(v * 1000 / total) / 10 + '%';
		}
		let buf = formatLine(k, v, lineComment);
		if (this.counts[k]) {
			buf += '; count: ' + JSON.stringify(this.counts[k]).padStart(6);
			buf += '; per-instance: ' +
			(v / this.counts[k]).toFixed(5).padEnd(10);
		}
		lines.push(buf);
	}
	return { buf: lines.join('\n'), total: total };
};

MWParserEnvironment.prototype.printTimeProfile = function() {
	var endTime = JSUtils.startTime();
	var mwOut = this._formatProfile(this.mwProfile);
	var ioOut = this._formatProfile(this.ioProfile);
	var cpuOut = this._formatProfile(this.timeProfile);
	this.log('trace/time', 'Finished parse at ', endTime);

	var outLines = [];
	outLines.push("-".repeat(85));
	outLines.push("Recorded times (in ms) for various parse components");
	outLines.push("");
	outLines.push(cpuOut.buf);
	outLines.push("-".repeat(85));
	outLines.push(ioOut.buf);
	outLines.push("");
	outLines.push(formatLine('Total API requests', this.counts["io.requests"]));
	if (this.counts.batches) {
		outLines.push(formatLine('# non-batched API requests', this.counts["io.requests"] - this.counts.batches));
		outLines.push(formatLine('# batches', this.counts.batches));
		outLines.push(formatLine('# API requests in batches', this.counts["batch.requests"]));
	}
	outLines.push("-".repeat(85));
	outLines.push(formatLine('TOTAL PARSE TIME (1)', endTime - this.startTime));
	outLines.push(formatLine('TOTAL PARSOID CPU TIME (2)', cpuOut.total));
	outLines.push(formatLine('Un/over-accounted parse time: (1) - (2)', endTime - this.startTime - cpuOut.total));
	outLines.push("");
	var catOut = this._formatProfile(this.timeCategories, { printPercentage: true });
	outLines.push(catOut.buf);
	outLines.push("");
	outLines.push(formatLine('TOTAL M/W API (I/O, CPU, QUEUE) TIME', ioOut.total, 'Total time across concurrent MW API requests'));
	if (mwOut.total > 0) {
		outLines.push(formatLine('TOTAL M/W CPU TIME', mwOut.total, 'Total CPU time across concurrent MW API requests'));
	}
	outLines.push("-".repeat(85));

	console.warn(outLines.join("\n"));
};

/**
 * @class
 */
function PayloadTooLargeError(message) {
	Error.captureStackTrace(this, PayloadTooLargeError);
	this.name = "PayloadTooLargeError";
	this.message = message ||
		"Refusing to process the request because the payload is " +
		"larger than the server is willing or able to handle.";
	this.httpStatus = 413;
	this.suppressLoggingStack = true;
}
PayloadTooLargeError.prototype = Error.prototype;

MWParserEnvironment.prototype.bumpWt2HtmlResourceUse = function(resource, count) {
	var n = this.limits.wt2html[resource];
	n -= (count || 1);
	if (n < 0) {
		throw new PayloadTooLargeError(
			'wt2html: Exceeded max resource use: ' + resource + '. Aborting!');
	}
	this.limits.wt2html[resource] = n;
};

MWParserEnvironment.prototype.bumpHtml2WtResourceUse = function(resource, count) {
	var n = this.limits.html2wt[resource];
	n -= (count || 1);
	if (n < 0) {
		throw new PayloadTooLargeError(
			'html2wt: Exceeded max resource use: ' + resource + '. Aborting!');
	}
	this.limits.html2wt[resource] = n;
};

MWParserEnvironment.prototype.printWt2HtmlResourceUsage = function(otherResources) {
	console.warn('-------------------- Used resources -------------------');
	var k, limit;
	for (k in this.limits.wt2html) {
		if (k === 'wikitextSize') {
			limit = this.conf.parsoid.limits.wt2html.maxWikitextSize;
			console.warn('wikitextSize'.padStart(30) + ': ' +
				this.page.src.length + ' / ' + limit);
		} else {
			var maxK = 'max' + k[0].toUpperCase() + k.slice(1) + 's';
			limit = this.conf.parsoid.limits.wt2html[maxK];
			console.warn(('# ' + k + 's').padStart(30) + ': ' +
				(limit - this.limits.wt2html[k]) + " / " + limit);
		}
	}
	for (k in otherResources) {
		console.warn(k.padStart(30) + ': ' + otherResources[k]);
	}
	console.warn('-'.repeat(55));
};

MWParserEnvironment.prototype.setLogger = function(logger) {
	this.logger = logger;
	this.log = (...args) => this.logger.log(...args);
};

MWParserEnvironment.prototype.configureLogging = function() {
	this.lintLogger = new LintLogger(this);
	var logger = new ParsoidLogger(this);
	var logLevels = this.logLevels || [
		"fatal", "error", "warn", "info",
	];
	logger.registerLoggingBackends(logLevels, this.conf.parsoid, this.lintLogger);
	this.setLogger(logger);
};

MWParserEnvironment.resetConfCache = function() {
	// Cache for wiki configurations, shared between requests.
	MWParserEnvironment.prototype.confCache = {};
};
MWParserEnvironment.resetConfCache();

MWParserEnvironment.prototype.setCaches = function(caches) {
	// TODO gwicke: probably not that useful any more as this is per-request
	// and the PHP preprocessor eliminates template source hits
	this.pageCache = caches.pages || {};

	// Global transclusion expansion cache (templates, parser functions etc)
	// Key: Full transclusion source
	this.transclusionCache = caches.transclusions || {};

	// Global extension tag expansion cache (templates, parser functions etc)
	// Key: Full extension source (including tags)
	this.extensionCache = caches.extensions || {};

	// Global image expansion cache
	// Key: Full image source
	this.mediaCache = caches.media || {};
};

/**
 * See if we can reuse transclusion or extension expansions.
 *
 * @param {Object} obj
 * @param {string} obj.html
 * @param {Object} obj.updates Update mode.
 */
MWParserEnvironment.prototype.cacheReusableExpansions = function(obj) {
	var body = ContentUtils.ppToDOM(this, obj.html);
	var expansions = PipelineUtils.extractExpansions(this, body);
	var updates = Object.assign({}, obj.updates);
	Object.keys(updates).forEach(function(mode) {
		switch (mode) {
			case 'transclusions':
			case 'media':
				// Truthy values indicate that these need updating,
				// so don't reuse them.
				if (updates[mode]) {
					expansions[mode] = {};
				}
				break;
			default:
				throw new Error('Received an unexpected update mode.');
		}
	});
	this.setCaches(expansions);
};

/**
 * Set the src and optionally meta information for the page we're parsing.
 *
 * If the argument is a simple string, will clear metadata and just
 * set `this.page.src`.  Otherwise, the provided metadata object should
 * have fields corresponding to the JSON output given by
 * `action=query&prop=revisions` on the MW API.  That is:
 * ```
 *     metadata = {
 *       title: // normalized title (ie, spaces not underscores)
 *       ns:    // namespace
 *       id:    // page id
 *       revision: {
 *         revid:    // revision id
 *         parentid: // revision parent
 *         timestamp:
 *         user:     // contributor username
 *         userid:   // contributor user id
 *         sha1:
 *         size:     // in bytes
 *         comment:
 *         slots: {
 *           main: {
 *             contentmodel:
 *             contentformat:
 *             "*   ":     // actual source text --> copied to this.page.src
 *           }
 *         }
 *       }
 *     }
 * ```
 * @param {string|Object} srcOrMetadata page source or metadata
 */
MWParserEnvironment.prototype.setPageSrcInfo = function(srcOrMetadata) {
	if (typeof srcOrMetadata === 'string' || !srcOrMetadata) {
		this.page.reset();
		this.page.src = srcOrMetadata || '';
		this.topFrame.srcText = this.page.src;
		return;
	}

	// I'm choosing to initialize this.page.meta "the hard way" (rather than
	// simply cloning the provided object) in part to document/enforce the
	// expected structure and fields.
	var metadata = srcOrMetadata;
	var m = this.page.meta;
	if (!m) { m = this.page.meta = {}; }
	m.title = metadata.title;
	var r = m.revision;
	if (!r) { r = m.revision = {}; }
	var content = Util.getStar(metadata.revision);
	if (metadata.revision) {
		r.revid = metadata.revision.revid;
		r.parentid = metadata.revision.parentid;
		r.timestamp = metadata.revision.timestamp;
		r.sha1 = metadata.revision.sha1;
		r.size = metadata.revision.size;
		r.contentmodel = content && content.contentmodel;
		r.contentformat = content && content.contentformat;
	}

	// Update other page properties
	this.page.id = metadata.id || -1;
	this.page.ns = metadata.ns;
	this.page.latest = metadata.latest;
	this.page.pagelanguage = metadata.pagelanguage;
	this.page.pagelanguagedir = metadata.pagelanguagedir;
	this.page.src = (content && content['*']) || '';
	this.page.setVariant(null);

	this.topFrame.srcText = this.page.src;
};

/**
 * Initialize the environment for the page.
 *
 * @param {string} pageName
 * The "true" url-decoded pagename (see above).
 */
MWParserEnvironment.prototype.initializeForPageName = function(pageName, dontReset) {
	// Don't use the previous page's namespace as the default
	this.page.ns = 0;
	// Create a title from the pageName
	var title = this.makeTitleFromURLDecodedStr(pageName);
	this.page.ns = title.getNamespace()._id;
	this.page.title = title;
	this.page.name = pageName;
	this.topFrame.title = title;

	// Always prefix a ./ so that we don't have to escape colons. Those
	// would otherwise fool browsers into treating namespaces (like File:)
	// as protocols.
	this.page.relativeLinkPrefix = "./";

	// makeLink uses the relative link prefix => this should always
	// be done after that initialization.
	this.page.titleURI = this.makeLink(title);

	if (!dontReset) {
		this.initUID();
	}
};

MWParserEnvironment.prototype.getVariable = function(varname, options) {
	// XXX what was the original author's intention?
	// something like this?:
	//  return this.options[varname];
	return this[varname];
};

MWParserEnvironment.prototype.setVariable = function(varname, value, options) {
	this[varname] = value;
};

/**
 * Alternate constructor for MWParserEnvironments
 *
 * @method
 * @param {ParsoidConfig} parsoidConfig
 * @param {Object} [options] Environment options.
 * @param {string} [options.pageName] the true url-decoded title of the page
 * @return {Promise<MWParserEnvironment>} The finished environment object
 * @static
 */
MWParserEnvironment.getParserEnv = Promise.async(function *(parsoidConfig, options) {
	// Get that wiki's config
	options = options || {};
	// Domain takes precedence over prefix; this call also allows for dynamic
	// configuration.
	if (options.domain) {
		options.prefix = parsoidConfig.getPrefixFor(options.domain);
	}
	if (!options.prefix || !parsoidConfig.mwApiMap.has(options.prefix)) {
		throw new Error('No API URI available for prefix: ' + options.prefix + '; domain: ' + options.domain);
	}
	var env = new MWParserEnvironment(parsoidConfig, options);
	yield env.switchToConfig(options.prefix, false);
	// Now that we have a config, we need to reinitialize the page
	// since, for example, not all wikis share the same namespace
	// aliases as enwiki.
	env.initializeForPageName(options.pageName || env.conf.wiki.mainpage, true);
	return env;
}, 2);

/**
 * Function that switches to a different configuration for a different wiki.
 * Caches all configs so we only need to get each one once (if we do it right)
 *
 * @method
 * @param {string} prefix The interwiki prefix that corresponds to the wiki we should use
 * @param {boolean} noCache Don't use cached configs; mainly for testing.
 * @return {Promise}
 */
MWParserEnvironment.prototype.switchToConfig = Promise.async(function *(prefix, noCache) {
	var env = this;
	var parsoidConfig = env.conf.parsoid;
	var resultConf;

	if (!prefix || !parsoidConfig.mwApiMap.has(prefix)) {
		throw new Error('No API URI available for prefix: ' + prefix);
	} else {
		if (!noCache && env.confCache[prefix]) {
			env.conf.wiki = env.confCache[prefix];
			return; // done!
		} else if (parsoidConfig.fetchConfig) {
			resultConf = yield ConfigRequest.promise(env);
		} else {
			// Load the config from cached config on disk
			var localConfigFile = '../../baseconfig/' + prefix + '.json';
			var localConfig = require(localConfigFile);
			if (localConfig && localConfig.query) {
				resultConf = localConfig.query;
			} else {
				throw new Error(
					'Could not read valid config from file: ' + localConfigFile
				);
			}
		}
	}

	env.conf.wiki = new WikiConfig(parsoidConfig, resultConf, prefix);
	env.confCache[prefix] = env.conf.wiki;
	if (parsoidConfig.fetchConfig) {
		yield env.conf.wiki.detectFeatures(env);
	}
});

/**
 * TODO: Handle namespaces relative links like [[User:../../]] correctly, they
 * shouldn't be treated like links at all.
 *
 * This function handles strings that are page-fragments or subpage references
 * and resolves those w.r.t the current page name so that title-handling code elsewhere
 * only deal with non-relative title strings.
 */
MWParserEnvironment.prototype.resolveTitle = function(urlDecodedStr, resolveOnly) {
	var origName = urlDecodedStr;
	urlDecodedStr = urlDecodedStr.trim();
	if (/^#/.test(urlDecodedStr)) {
		// Resolve lonely fragments (important if this.page is a subpage,
		// otherwise the relative link will be wrong)
		urlDecodedStr = this.page.name + urlDecodedStr;
	}

	// Default return value
	var titleKey = urlDecodedStr;
	if (this.conf.wiki.namespacesWithSubpages[this.page.ns]) {
		// Resolve subpages
		var relUp = urlDecodedStr.match(/^(\.\.\/)+/);
		var reNormalize = false;
		if (relUp) {
			var levels = relUp[0].length / 3;  // Levels are indicated by '../'.
			var titleBits = this.page.name.split(/\//);
			if (titleBits.length <= levels) {
				// Too many levels -- invalid relative link
				return origName;
			}
			var newBits = titleBits.slice(0, titleBits.length - levels);
			if (urlDecodedStr !== relUp[0]) {
				newBits.push(urlDecodedStr.substr(levels * 3));
			}
			urlDecodedStr = newBits.join('/');
			reNormalize = true;
		} else if (urlDecodedStr.length && urlDecodedStr[0] === '/') {
			// Resolve absolute subpage links
			urlDecodedStr = this.page.name + urlDecodedStr;
			reNormalize = true;
		}

		if (reNormalize && !resolveOnly) {
			// Remove final slashes if present.
			// See https://gerrit.wikimedia.org/r/173431
			urlDecodedStr = urlDecodedStr.replace(/\/+$/, '');
			titleKey = this.normalizedTitleKey(urlDecodedStr);
		}
	}

	// Strip leading ':'
	if (titleKey[0] === ':' && !resolveOnly) {
		titleKey = titleKey.substr(1);
	}
	return titleKey;
};

MWParserEnvironment.prototype._titleToString = function(title, ignoreFragment) {
	var fragment;
	if (ignoreFragment) {
		fragment = '';
	} else {
		fragment = title.getFragment() || '';
		if (fragment) {
			fragment = '#' + fragment;
		}
	}
	return title.getPrefixedDBKey() + fragment;
};

/**
 * Get normalized title key for a title string.
 *
 * @param {string} [urlDecodedStr] Should be in url-decoded format.
 * @param {boolean} [noExceptions] Return null instead of throwing exceptions.
 * @param {boolean} [ignoreFragment] Ignore the fragment, if any.
 * @return {string|null} Normalized title key for a title string (or null for invalid titles).
 */
MWParserEnvironment.prototype.normalizedTitleKey = function(urlDecodedStr, noExceptions, ignoreFragment) {
	var title = this.makeTitleFromURLDecodedStr(urlDecodedStr, undefined, noExceptions);
	if (!title) {
		return null;
	}

	return this._titleToString(title, ignoreFragment);
};

MWParserEnvironment.prototype.normalizeAndResolvePageTitle = function() {
	return this._titleToString(this.page.title);
};

/* urlDecodedText will be in url-decoded form */
MWParserEnvironment.prototype._makeTitle = function(urlDecodedText, defaultNS, noExceptions) {
	try {
		if (this.page && /^(\#|\/|\.\.\/)/.test(urlDecodedText)) {
			defaultNS = this.page.ns;
		}
		urlDecodedText = this.resolveTitle(urlDecodedText);
		return Title.newFromText(urlDecodedText, this.conf.wiki.siteInfo, defaultNS);
	} catch (e) {
		if (noExceptions) {
			return null;
		} else {
			throw e;
		}
	}
};

/* text might have url-encoded entities that need url-decoding */
/* See: Title::newFromURL in mediawiki. */
MWParserEnvironment.prototype.makeTitleFromText = function(str, defaultNS, noExceptions) {
	return this._makeTitle(Util.decodeURIComponent(str), defaultNS, noExceptions);
};

/* See: Title::newFromText in mediawiki. */
MWParserEnvironment.prototype.makeTitleFromURLDecodedStr = function(str, defaultNS, noExceptions) {
	return this._makeTitle(str, defaultNS, noExceptions);
};

MWParserEnvironment.prototype.makeLink = function(title) {
	return Sanitizer.sanitizeTitleURI(this.page.relativeLinkPrefix + this._titleToString(title), false);
};

MWParserEnvironment.prototype.isValidLinkTarget = function(href) {
	// decode percent-encoding so that we can reliably detect
	// bad page title characters
	var hrefToken = Util.decodeURIComponent(TokenUtils.tokensToString(href));
	return this.normalizedTitleKey(this.resolveTitle(hrefToken, true), true) !== null;
};

MWParserEnvironment.prototype.initUID = function() {
	this.uid = 1;
};

/**
 * Generate a UID.
 *
 * @return {number}
 * @private
 */
MWParserEnvironment.prototype.generateUID = function() {
	return this.uid++;
};

MWParserEnvironment.prototype.newObjectId = function() {
	return "mwt" + this.generateUID();
};

MWParserEnvironment.prototype.newAboutId = function() {
	return "#" + this.newObjectId();
};

/**
 * A passed-in cookie, if any
 */
MWParserEnvironment.prototype.cookie = null;

/**
 * A passed-in request id, if any
 */
MWParserEnvironment.prototype.reqId = null;

/**
 * A passed-in user agent, if any
 */
MWParserEnvironment.prototype.userAgent = null;

/**
 * Apply extra normalizations before serializing DOM.
 */
MWParserEnvironment.prototype.scrubWikitext = false;

/**
 * Sets ids on nodes and stores data-* attributes in a JSON blob
 */
MWParserEnvironment.prototype.pageBundle = false;

/**
 * @property {string} wikitextVersion
 */
MWParserEnvironment.prototype.wikitextVersion = '1.0.0';

/**
 * The content versions Parsoid knows how to produce.
 * Ordered by desirability.
 *
 * @property {Array} availableVersions
 */
MWParserEnvironment.prototype.availableVersions = ['2.1.0', '999.0.0'];

/**
 * The default content version that Parsoid will generate.
 *
 * @property {string} outputContentVersion
 */
MWParserEnvironment.prototype.outputContentVersion = MWParserEnvironment.prototype.availableVersions[0];

/**
 * The default content version that Parsoid assumes it's serializing or updating
 * in the pb2pb endpoints
 *
 * @property {string} inputContentVersion
 */
MWParserEnvironment.prototype.inputContentVersion = MWParserEnvironment.prototype.availableVersions[0];

/**
 * Whether Parsoid should add HTML section wrappers around logical sections.
 * Defaults to true.
 *
 * @property {string} wrapSections
 */
MWParserEnvironment.prototype.wrapSections = true;

/**
 * If non-null, the language variant used for Parsoid HTML; we convert
 * to this if wt2html, or from this (if html2wt).
 */
MWParserEnvironment.prototype.htmlVariantLanguage = null;

/**
 * If non-null, the language variant to be used for wikitext.  If null,
 * heuristics will be used to identify the original wikitext variant
 * in wt2html mode, and in html2wt mode new or edited HTML will be left
 * unconverted.
 */
MWParserEnvironment.prototype.wtVariantLanguage = null;

/**
 * See if any content version Parsoid knows how to produce satisfies the
 * the supplied version, when interpreted with semver caret semantics.
 * This will allow us to make backwards compatible changes, without the need
 * for clients to bump the version in their headers all the time.
 *
 * @param {string} v
 * @return {string|null}
 */
MWParserEnvironment.prototype.resolveContentVersion = function(v) {
	for (var i = 0; i < this.availableVersions.length; i++) {
		var a = this.availableVersions[i];
		if (semver.satisfies(a, '^' + v) &&
				// The section wrapping in 1.6.x should have induced a major
				// version bump, since it requires upgrading clients to
				// handle it.  We therefore hardcode this in so that we can
				// fail hard.
				semver.gte(v, '1.6.0')) {
			return a;
		}
	}
	return null;
};

/**
 * @param {string} v
 */
MWParserEnvironment.prototype.setOutputContentVersion = function(v) {
	if (this.availableVersions.indexOf(v) < 0) {
		throw new Error('Not an available content version.');
	}
	this.outputContentVersion = v;
};

MWParserEnvironment.prototype.scriptPath = function() {
	return this.conf.wiki.server.replace(/^[^\/]*\/\//, '//') +
		(this.conf.wiki.scriptpath || '');
};

MWParserEnvironment.prototype.getModulesLoadURI = function() {
	var modulesLoadURI = this.conf.parsoid.modulesLoadURI;
	if (modulesLoadURI === true) {
		this.log('warn',
			'Setting `modulesLoadURI` to `true` is no longer supported.');
		modulesLoadURI = undefined;
	}
	if (modulesLoadURI === undefined) {
		return this.scriptPath() + '/load.php';
	} else {
		return modulesLoadURI;
	}
};

MWParserEnvironment.prototype.setPageProperty = function(src, property) {
	console.assert(this.page);
	if (Array.isArray(src) && src.length > 0) {
		// This info comes back from the MW API when extension tags are parsed.
		// Since a page can have multiple extension tags, we can hit this code
		// multiple times and run into an already initialized set.
		if (!this.page[property]) {
			this.page[property] = new Set();
		}
		src.forEach(function(s) {
			this.page[property].add(s);
		}, this);
	}
};

/**
 * Content model whitelist
 *
 * Suppress warnings for these fallbacks to wikitext.
 * @private
 */
var whitelist = new Set([
	'css',
	'javascript',
	'wikibase-item',
	'wikibase-lexeme',
	'wikibase-property',
	'proofread-page',
	'proofread-index',
	'Scribunto',
	'flow-board',
]);

/**
 * Get an appropriate content handler, given a contentmodel.
 *
 * @param {string} [forceContentModel] An optional content model
 *   which will override whatever the source specifies.
 * @return {Object} An appropriate content handler with `toHTML` and `fromHTML`
 *   methods.
 */
MWParserEnvironment.prototype.getContentHandler = function(forceContentModel) {
	var contentmodel = forceContentModel || this.page.getContentModel();
	if (!this.conf.wiki.extConfig.contentModels.has(contentmodel)) {
		if (!whitelist.has(contentmodel)) {
			this.log('warn', 'Unknown contentmodel', contentmodel);
		}
		contentmodel = 'wikitext';
	}
	return this.conf.wiki.extConfig.contentModels.get(contentmodel);
};

/**
 * Determine if LanguageConverter markup should be parsed on this page,
 * based on the wiki configuration and the current page language.
 *
 * @return {boolean}
 */
MWParserEnvironment.prototype.langConverterEnabled = function() {
	var lang = this.page.pagelanguage || this.conf.wiki.lang || 'en';
	return this.conf.wiki.langConverterEnabled.has(lang);
};

/**
 * Determine an appropriate content-language for the HTML form of this page.
 */
MWParserEnvironment.prototype.htmlContentLanguage = function() {
	// this.page.htmlVariant is set iff we do variant conversion on the HTML
	return this.page.htmlVariant ||
		this.page.pagelanguage || this.conf.wiki.lang || 'en';
};

/**
 * Determine appropriate vary headers for the HTML form of this page.
 */
MWParserEnvironment.prototype.htmlVary = function() {
	const varies = [ 'Accept' ]; // varies on Content-Type
	if (this.langConverterEnabled()) {
		varies.push('Accept-Language');
	}
	return varies.sort().join(', ');
};


if (typeof module === "object") {
	module.exports.MWParserEnvironment = MWParserEnvironment;
}