/** @module */
'use strict';
const { DOMDataUtils } = require('../../../utils/DOMDataUtils.js');
const { DOMUtils } = require('../../../utils/DOMUtils.js');
const { JSUtils } = require('../../../utils/jsutils.js');
const { Util } = require('../../../utils/Util.js');
const { WTUtils } = require('../../../utils/WTUtils.js');
const { PegTokenizer } = require('../../tokenizer.js');
const { Sanitizer } = require('../../tt/Sanitizer.js');
/**
* TableFixups class.
*
* Provides two DOMTraverser visitors that implement the two parts of
* https://phabricator.wikimedia.org/T52603 :
* - stripDoubleTDs
* - reparseTemplatedAttributes.
* @class
*/
class TableFixups {
constructor(env) {
/**
* Set up some helper objects for reparseTemplatedAttributes
*/
/**
* Actually the regular tokenizer, but we'll use
* tokenizeTableCellAttributes only.
*/
this.tokenizer = new PegTokenizer(env);
}
/**
* DOM visitor that strips the double td for this test case:
* ```
* |{{echo|{{!}} Foo}}
* ```
*
* @see https://phabricator.wikimedia.org/T52603
*/
stripDoubleTDs(node, frame) {
var nextNode = node.nextSibling;
if (!WTUtils.isLiteralHTMLNode(node) &&
nextNode !== null &&
nextNode.nodeName === 'TD' &&
!WTUtils.isLiteralHTMLNode(nextNode) &&
DOMUtils.nodeEssentiallyEmpty(node) &&
(
// FIXME: will not be set for nested templates
DOMUtils.hasTypeOf(nextNode, 'mw:Transclusion') ||
// Hacky work-around for nested templates
/^{{.*?}}$/.test(DOMDataUtils.getDataParsoid(nextNode).src)
)
) {
// Update the dsr. Since we are coalescing the first
// node with the second (or, more precisely, deleting
// the first node), we have to update the second DSR's
// starting point and start tag width.
var nodeDSR = DOMDataUtils.getDataParsoid(node).dsr;
var nextNodeDSR = DOMDataUtils.getDataParsoid(nextNode).dsr;
if (nodeDSR && nextNodeDSR) {
nextNodeDSR[0] = nodeDSR[0];
}
var dataMW = DOMDataUtils.getDataMw(nextNode);
var nodeSrc = WTUtils.getWTSource(frame, node);
if (!dataMW.parts) {
dataMW.parts = [];
}
dataMW.parts.unshift(nodeSrc);
// Delete the duplicated <td> node.
node.parentNode.removeChild(node);
// This node was deleted, so don't continue processing on it.
return nextNode;
}
return true;
}
isSimpleTemplatedSpan(node) {
return node.nodeName === 'SPAN' &&
DOMDataUtils.hasTypeOf(node, 'mw:Transclusion') &&
DOMUtils.allChildrenAreTextOrComments(node);
}
hoistTransclusionInfo(frame, child, tdNode) {
var aboutId = child.getAttribute('about') || '';
// Hoist all transclusion information from the child
// to the parent tdNode.
tdNode.setAttribute('typeof', child.getAttribute('typeof'));
tdNode.setAttribute('about', aboutId);
var dataMW = DOMDataUtils.getDataMw(child);
var parts = dataMW.parts;
var dp = DOMDataUtils.getDataParsoid(tdNode);
var childDP = DOMDataUtils.getDataParsoid(child);
// In `handleTableCellTemplates`, we're creating a cell w/o dsr info.
if (!Util.isValidDSR(dp.dsr)) {
dp.dsr = Util.clone(childDP.dsr);
}
// Get the td and content source up to the transclusion start
if (dp.dsr[0] < childDP.dsr[0]) {
parts.unshift(frame.srcText.substring(dp.dsr[0], childDP.dsr[0]));
}
// Add wikitext for the table cell content following the
// transclusion. This is safe as we are currently only
// handling a single transclusion in the content, which is
// guaranteed to have a dsr that covers the transclusion
// itself.
if (childDP.dsr[1] < dp.dsr[1]) {
parts.push(frame.srcText.substring(childDP.dsr[1], dp.dsr[1]));
}
// Save the new data-mw on the tdNode
DOMDataUtils.setDataMw(tdNode, { parts: parts });
dp.pi = childDP.pi;
DOMDataUtils.setDataMw(child, undefined);
// tdNode wraps everything now.
// Remove template encapsulation from here on.
// This simplifies the problem of analyzing the <td>
// for additional fixups (|| Boo || Baz) by potentially
// invoking 'reparseTemplatedAttributes' on split cells
// with some modifications.
while (child) {
if (child.nodeName === 'SPAN' && child.getAttribute('about') === aboutId) {
// Remove the encapsulation attributes. If there are no more attributes left,
// the span wrapper is useless and can be removed.
child.removeAttribute('about');
child.removeAttribute('typeof');
if (DOMDataUtils.noAttrs(child)) {
var next = child.firstChild || child.nextSibling;
DOMUtils.migrateChildren(child, tdNode, child);
child.parentNode.removeChild(child);
child = next;
} else {
child = child.nextSibling;
}
} else {
child = child.nextSibling;
}
}
}
/**
* Collect potential attribute content.
*
* We expect this to be text nodes without a pipe character followed by one or
* more nowiki spans, followed by a template encapsulation with pure-text and
* nowiki content. Collection stops when encountering other nodes or a pipe
* character.
*/
collectAttributishContent(env, node, templateWrapper) {
var buf = [];
var nowikis = [];
var transclusionNode = templateWrapper ||
(DOMDataUtils.hasTypeOf(node, 'mw:Transclusion') ? node : null);
// Build the result.
var buildRes = function() {
return {
txt: buf.join(''),
nowikis: nowikis,
transclusionNode: transclusionNode,
};
};
var child = node.firstChild;
/*
* In this loop below, where we are trying to collect text content,
* it is safe to use child.textContent since textContent skips over
* comments. See this transcript of a node session:
*
* > d.body.childNodes[0].outerHTML
* '<span><!--foo-->bar</span>'
* > d.body.childNodes[0].textContent
* 'bar'
*
* PHP parser strips comments during parsing, i.e. they don't impact
* how other wikitext constructs are parsed. So, in this code below,
* we have to skip over comments.
*/
while (child) {
if (DOMUtils.isComment(child)) {
// <!--foo--> are not comments in CSS and PHP parser strips them
} else if (DOMUtils.isText(child)) {
buf.push(child.nodeValue);
} else if (child.nodeName !== 'SPAN') {
// The idea here is that style attributes can only
// be text/comment nodes, and nowiki-spans at best.
// So, if we hit anything else, there is nothing more
// to do here!
return buildRes();
} else {
var typeOf = child.getAttribute('typeof') || '';
if (/^mw:Entity$/.test(typeOf)) {
buf.push(child.textContent);
} else if (/^mw:Nowiki$/.test(typeOf)) {
// Nowiki span were added to protect otherwise
// meaningful wikitext chars used in attributes.
// Save the content.
nowikis.push(child.textContent);
// And add in a marker to splice out later.
buf.push('<nowiki>');
} else if (this.isSimpleTemplatedSpan(child)) {
// And only handle a single nested transclusion for now.
// TODO: Handle data-mw construction for multi-transclusion content
// as well, then relax this restriction.
//
// If we already had a transclusion node, we return
// without attempting to fix this up.
if (transclusionNode) {
env.log("error/dom/tdfixup", "Unhandled TD-fixup scenario.",
"Encountered multiple transclusion children of a <td>");
return { transclusionNode: null };
}
// We encountered a transclusion wrapper
buf.push(child.textContent);
transclusionNode = child;
} else if (transclusionNode && (!child.hasAttribute('typeof')) &&
child.getAttribute('about') === transclusionNode.getAttribute('about') &&
DOMUtils.allChildrenAreTextOrComments(child)) {
// Continue accumulating only if we hit grouped template content
buf.push(child.textContent);
} else {
return buildRes();
}
}
// Are we done accumulating?
if (buf.length > 0 && /(?:^|[^|])\|(?:[^|]|$)/.test(JSUtils.lastItem(buf))) {
return buildRes();
}
child = child.nextSibling;
}
return buildRes();
}
/**
* T46498, second part of T52603
*
* Handle wikitext like
* ```
* {|
* |{{nom|Bar}}
* |}
* ```
* where nom expands to `style="foo" class="bar"|Bar`. The attributes are
* tokenized and stripped from the table contents.
*
* This method works well for the templates documented in
* https://en.wikipedia.org/wiki/Template:Table_cell_templates/doc
*
* Nevertheless, there are some limitations:
* - We assume that attributes don't contain wiki markup (apart from <nowiki>)
* and end up in text or nowiki nodes.
* - Only a single table cell is produced / opened by the template that
* contains the attributes. This limitation could be lifted with more
* aggressive re-parsing if really needed in practice.
* - There is only a single transclusion in the table cell content. This
* limitation can be lifted with more advanced data-mw construction.
*/
reparseTemplatedAttributes(frame, node, templateWrapper) {
const env = frame.env;
// Collect attribute content and examine it
var attributishContent = this.collectAttributishContent(env, node, templateWrapper);
// Check for the pipe character in the attributish text.
if (!/^[^|]+\|([^|].*)?$/.test(attributishContent.txt)) {
return;
}
// Try to re-parse the attributish text content
var attributishPrefix = attributishContent.txt.match(/^[^|]+\|/)[0];
// Splice in nowiki content. We added in <nowiki> markers to prevent the
// above regexps from matching on nowiki-protected chars.
if (/<nowiki>/.test(attributishPrefix)) {
attributishPrefix = attributishPrefix.replace(/<nowiki>/g, function() {
// This is a little tricky. We want to use the content from the
// nowikis to reparse the string to kev/val pairs but the rule,
// single_cell_table_args, will invariably get tripped up on
// newlines which, to this point, were shuttled through in the
// nowiki. php's santizer will do this replace in attr vals so
// it's probably a safe assumption ...
return attributishContent.nowikis.shift().replace(/\s+/g, ' ');
});
}
// re-parse the attributish prefix
var attributeTokens = this.tokenizer
.tokenizeTableCellAttributes(attributishPrefix, false);
// No attributes => nothing more to do!
if (attributeTokens instanceof Error) {
return;
}
// Note that `row_syntax_table_args` (the rule used for tokenizing above)
// returns an array consisting of [table_attributes, spaces, pipe]
const attrs = attributeTokens[0];
// Found attributes; sanitize them
// and transfer the sanitized attributes to the td node
Sanitizer.applySanitizedArgs(env, node, attrs);
// If the transclusion node was embedded within the td node,
// lift up the about group to the td node.
var transclusionNode = attributishContent.transclusionNode;
if (transclusionNode !== null && node !== transclusionNode) {
this.hoistTransclusionInfo(frame, transclusionNode, node);
}
// Drop nodes that have been consumed by the reparsed attribute content.
var n = node.firstChild;
while (n) {
if (/[|]/.test(n.textContent)) {
// Remove the consumed prefix from the text node
var nValue = n.nodeName === '#text' ? n.nodeValue : n.textContent;
// and convert it into a simple text node
node.replaceChild(node.ownerDocument.createTextNode(nValue.replace(/^[^|]*[|]/, '')), n);
break;
} else {
var next = n.nextSibling;
// content was consumed by attributes, so just drop it from the cell
node.removeChild(n);
n = next;
}
}
}
needsReparsing(node) {
var testRE = node.nodeName === 'TD' ? /[|]/ : /[!|]/;
var child = node.firstChild;
while (child) {
if (DOMUtils.isText(child) && testRE.test(child.textContent)) {
return true;
} else if (child.nodeName === 'SPAN') {
if (WTUtils.hasParsoidAboutId(child) && testRE.test(child.textContent)) {
return true;
}
}
child = child.nextSibling;
}
return false;
}
handleTableCellTemplates(node, frame) {
// Don't bother with literal HTML nodes or nodes that don't need reparsing.
if (WTUtils.isLiteralHTMLNode(node) || !this.needsReparsing(node)) {
return true;
}
// If the cell didn't have attrs, extract and reparse templated attrs
var about;
var dp = DOMDataUtils.getDataParsoid(node);
var hasAttrs = !(dp.tmp && dp.tmp.noAttrs);
if (!hasAttrs) {
about = node.getAttribute("about");
var templateWrapper = DOMUtils.hasTypeOf(node, 'mw:Transclusion') ?
node : null;
this.reparseTemplatedAttributes(frame, node, templateWrapper);
}
// Now, examine the <td> to see if it hides additional <td>s
// and split it up if required.
//
// DOMTraverser will process the new cell and invoke
// handleTableCellTemplates on it which ensures that
// if any addition attribute fixup or splits are required,
// they will get done.
var newCell;
var ownerDoc = node.ownerDocument;
var child = node.firstChild;
while (child) {
var next = child.nextSibling;
if (newCell) {
newCell.appendChild(child);
} else if (DOMUtils.isText(child) || this.isSimpleTemplatedSpan(child)) {
var cellName = node.nodeName.toLowerCase();
var hasSpanWrapper = !DOMUtils.isText(child);
var match;
if (cellName === 'td') {
match = child.textContent.match(/^(.*?[^|])?\|\|([^|].*)?$/);
} else { /* cellName === 'th' */
// Find the first match of || or !!
var match1 = child.textContent.match(/^(.*?[^|])?\|\|([^|].*)?$/);
var match2 = child.textContent.match(/^(.*?[^!])?\!\!([^!].*)?$/);
if (match1 && match2) {
match = (match1[1] || '').length < (match2[1] || '').length ? match1 : match2;
} else {
match = match1 || match2;
}
}
if (match) {
child.textContent = match[1] || '';
newCell = ownerDoc.createElement(cellName);
if (hasSpanWrapper) {
// Fix up transclusion wrapping
about = child.getAttribute('about');
this.hoistTransclusionInfo(frame, child, node);
} else {
// Refetch the about attribute since 'reparseTemplatedAttributes'
// might have added one to it.
about = node.getAttribute('about');
}
// about may not be present if the cell was inside
// wrapped template content rather than being part
// of the outermost wrapper.
if (about) {
newCell.setAttribute('about', about);
}
newCell.appendChild(ownerDoc.createTextNode(match[2] || ''));
node.parentNode.insertBefore(newCell, node.nextSibling);
// Set data-parsoid noAttrs flag
var newCellDP = DOMDataUtils.getDataParsoid(newCell);
newCellDP.tmp.noAttrs = true;
}
}
child = next;
}
return true;
}
}
if (typeof module === "object") {
module.exports.TableFixups = TableFixups;
}