Source: wt2html/tt/ExternalLinkHandler.js

'use strict';

const { PegTokenizer } = require('../tokenizer.js');
const { Sanitizer } = require('./Sanitizer.js');
const { PipelineUtils } = require('../../utils/PipelineUtils.js');
const { TokenUtils } = require('../../utils/TokenUtils.js');
const { Util } = require('../../utils/Util.js');
const TokenHandler = require('./TokenHandler.js');
const { JSUtils } = require('../../utils/jsutils.js');
const { KV, TagTk, SelfclosingTagTk, EndTagTk } = require('../../tokens/TokenTypes.js');
const { WikiLinkHandler } = require('./WikiLinkHandler.js');

// shortcuts
const lastItem = JSUtils.lastItem;

/**
 * @class
 * @extends module:wt2html/tt/TokenHandler
 */
class ExternalLinkHandler extends TokenHandler {
	constructor(manager, options) {
		super(manager, options);
		this.manager.addTransform(
			(token, cb) => this.onUrlLink(token, cb),
			'ExternalLinkHandler:onUrlLink',
			ExternalLinkHandler.rank(), 'tag', 'urllink');
		this.manager.addTransform(
			(token, cb) => this.onExtLink(token, cb),
			'ExternalLinkHandler:onExtLink',
			ExternalLinkHandler.rank() - 0.001, 'tag', 'extlink');
		this.manager.addTransform(
			(token, cb) => this.onEnd(token, cb),
			'ExternalLinkHandler:onEnd',
			ExternalLinkHandler.rank(), 'end');

		// Create a new peg parser for image options.
		if (!this.urlParser) {
			// Actually the regular tokenizer, but we'll call it with the
			// url rule only.
			ExternalLinkHandler.prototype.urlParser = new PegTokenizer(this.env);
		}

		this._reset();
	}

	static rank() { return 1.15; }

	_reset() {
		this.linkCount = 1;
	}

	static _imageExtensions(str) {
		switch (str) {
			case 'jpg': // fall through
			case 'png': // fall through
			case 'gif': // fall through
			case 'svg': // fall through
				return true;
			default:
				return false;
		}
	}

	_hasImageLink(href) {
		const allowedPrefixes = this.manager.env.conf.wiki.allowExternalImages;
		const bits = href.split('.');
		const hasImageExtension = bits.length > 1 &&
			ExternalLinkHandler._imageExtensions(lastItem(bits)) &&
			href.match(/^https?:\/\//i);
		// Typical settings for mediawiki configuration variables
		// $wgAllowExternalImages and $wgAllowExternalImagesFrom will
		// result in values like these:
		//  allowedPrefixes = undefined; // no external images
		//  allowedPrefixes = [''];      // allow all external images
		//  allowedPrefixes = ['http://127.0.0.1/', 'http://example.com'];
		// Note that the values include the http:// or https:// protocol.
		// See https://phabricator.wikimedia.org/T53092
		return hasImageExtension && Array.isArray(allowedPrefixes) &&
			// true iff some prefix in the list matches href
			allowedPrefixes.some(
				prefix => href.indexOf(prefix) === 0
			);
	}

	onUrlLink(token, cb) {
		let tagAttrs, builtTag;
		const env = this.manager.env;
		const origHref = token.getAttribute('href');
		const href = TokenUtils.tokensToString(origHref);
		const dataAttribs = Util.clone(token.dataAttribs);

		if (this._hasImageLink(href)) {
			tagAttrs = [
				new KV('src', href),
				new KV('alt', lastItem(href.split('/'))),
				new KV('rel', 'mw:externalImage'),
			];

			// combine with existing rdfa attrs
			tagAttrs = WikiLinkHandler.buildLinkAttrs(token.attribs, false, null, tagAttrs).attribs;
			cb({ tokens: [ new SelfclosingTagTk('img', tagAttrs, dataAttribs) ] });
		} else {
			tagAttrs = [
				new KV('rel', 'mw:ExtLink'),
				// href is set explicitly below
			];

			// combine with existing rdfa attrs
			tagAttrs = WikiLinkHandler.buildLinkAttrs(token.attribs, false, null, tagAttrs).attribs;
			builtTag = new TagTk('a', tagAttrs, dataAttribs);
			dataAttribs.stx = 'url';

			if (!this.options.inTemplate) {
				// Since we messed with the text of the link, we need
				// to preserve the original in the RT data. Or else.
				builtTag.addNormalizedAttribute('href', href, token.getWTSource(env));
			} else {
				builtTag.addAttribute('href', href);
			}

			cb({
				tokens: [
					builtTag,
					// Make sure there are no IDN-ignored characters in the text so
					// the user doesn't accidentally copy any.
					Sanitizer.cleanUrl(env, href),
					new EndTagTk('a', [], { tsr: [dataAttribs.tsr[1], dataAttribs.tsr[1]] }),
				],
			});
		}
	}

	// Bracketed external link
	onExtLink(token, cb) {
		let newAttrs, aStart;
		const env = this.manager.env;
		const origHref = token.getAttribute('href');
		const hasExpandedAttrs = /mw:ExpandedAttrs/.test(token.getAttribute('typeof'));
		const href = TokenUtils.tokensToString(origHref);
		const hrefWithEntities = TokenUtils.tokensToString(origHref, false, {
			includeEntities: true,
		});
		let content = token.getAttribute('mw:content');
		const dataAttribs = Util.clone(token.dataAttribs);
		let rdfaType = token.getAttribute('typeof');
		const magLinkRe = /(?:^|\s)(mw:(?:Ext|Wiki)Link\/(?:ISBN|RFC|PMID))(?=$|\s)/;
		let tokens;

		if (rdfaType && magLinkRe.test(rdfaType)) {
			let newHref = href;
			let newRel = 'mw:ExtLink';
			if (/(?:^|\s)mw:(Ext|Wiki)Link\/ISBN/.test(rdfaType)) {
				newHref = env.page.relativeLinkPrefix + href;
				// ISBNs use mw:WikiLink instead of mw:ExtLink
				newRel = 'mw:WikiLink';
			}
			newAttrs = [
				new KV('href', newHref),
				new KV('rel', newRel),
			];
			token.removeAttribute('typeof');

			// SSS FIXME: Right now, Parsoid does not support templating
			// of ISBN attributes.  So, "ISBN {{echo|1234567890}}" will not
			// parse as you might expect it to.  As a result, this code below
			// that attempts to combine rdf attrs from earlier is unnecessary
			// right now.  But, it will become necessary if Parsoid starts
			// supporting templating of ISBN attributes.
			//
			// combine with existing rdfa attrs
			newAttrs = WikiLinkHandler.buildLinkAttrs(token.attribs, false, null, newAttrs).attribs;
			aStart = new TagTk('a', newAttrs, dataAttribs);
			tokens = [aStart].concat(content, [new EndTagTk('a')]);
			cb({
				tokens: tokens
			});
		} else if (
			(!hasExpandedAttrs && typeof origHref === 'string') ||
				this.urlParser.tokenizesAsURL(hrefWithEntities)
		) {
			rdfaType = 'mw:ExtLink';
			if (
				content.length === 1 &&
				content[0].constructor === String &&
				env.conf.wiki.hasValidProtocol(content[0]) &&
				this.urlParser.tokenizesAsURL(content[0]) &&
				this._hasImageLink(content[0])
			) {
				const src = content[0];
				content = [
					new SelfclosingTagTk('img', [
						new KV('src', src),
						new KV('alt', lastItem(src.split('/'))),
					], { type: 'extlink' }),
				];
			}

			newAttrs = [
				new KV('rel', rdfaType),
				// href is set explicitly below
			];
			// combine with existing rdfa attrs
			newAttrs = WikiLinkHandler.buildLinkAttrs(token.attribs, false, null, newAttrs).attribs;
			aStart = new TagTk('a', newAttrs, dataAttribs);

			if (!this.options.inTemplate) {
				// If we are from a top-level page, add normalized attr info for
				// accurate roundtripping of original content.
				//
				// targetOff covers all spaces before content
				// and we need src without those spaces.
				const tsr0a = dataAttribs.tsr[0] + 1;
				const tsr1a = dataAttribs.targetOff - (token.getAttribute('spaces') || '').length;
				aStart.addNormalizedAttribute('href', href, env.page.src.substring(tsr0a, tsr1a));
			} else {
				aStart.addAttribute('href', href);
			}

			content = PipelineUtils.getDOMFragmentToken(
				content,
				dataAttribs.tsr ? dataAttribs.contentOffsets : null,
				{ inlineContext: true, token: token }
			);

			tokens = [aStart].concat(content, [new EndTagTk('a')]);
			cb({
				tokens: tokens,
			});
		} else {
			// Not a link, convert href to plain text.
			cb({ tokens: WikiLinkHandler.bailTokens(env, token, true) });
		}
	}

	onEnd(token, cb) {
		this._reset();
		cb({ tokens: [ token ] });
	}
}

if (typeof module === "object") {
	module.exports.ExternalLinkHandler = ExternalLinkHandler;
}