Parsoid
A bidirectional parser between wikitext and HTML5
Parsoid Namespace Reference

Namespaces

 Html2Wt
 Serializes language variant markup, like `-{ ...
 
 Utils
 This file contains general utilities for: (a) querying token properties and token types (b) manipulating tokens, individually and as collections.
 
 Wt2Html
 Front-end/Wrapper for a particular tree builder, in this case the parser/tree builder from RemexHtml.
 

Classes

class  ClientError
 Exception thrown on invalid client requests. More...
 
class  ConstantLanguageGuesser
 A simple LanguageGuesser that returns the same "source language" for every node. More...
 
class  ContentModelHandler
 
class  ConversionTraverser
 
class  CrhConverter
 
class  DataParsoid
 Parsoid data for a DOM node. More...
 
class  EnConverter
 
class  Gallery
 Native Parsoid implementation of the Gallery extension. More...
 
class  InternalException
 Parsoid internal error that we don't know how to recover from. More...
 
class  KuConverter
 
class  Language
 
class  LanguageConverter
 Base class for language variant conversion. More...
 
class  LanguageCrh
 
class  LanguageEn
 
class  LanguageGuesser
 An oracle that gives you a predicted "source language" for every node in a DOM, which is used when converting the result back to the source language during round-tripping. More...
 
class  LanguageKu
 
class  LanguageSr
 
class  LanguageZh
 
class  MachineLanguageGuesser
 Use a { ReplacementMachine} to predict the best "source language" for every node in a DOM. More...
 
class  NoLines
 
class  Opts
 
class  Packed
 
class  PackedHover
 
class  PackedOverlay
 
class  PageBundle
 PORT-FIXME: This is just a placeholder for data that was previously passed to entrypoint in JavaScript. More...
 
class  Parsoid
 
class  ResourceLimitExceededException
 Parsoid resource limit exception. More...
 
class  Selser
 PORT-FIXME: This is just a placeholder for data that was previously passed to entrypoint in JavaScript. More...
 
class  Slideshow
 
class  SrConverter
 
class  Traditional
 
class  WikitextContentModelHandler
 
class  ZhConverter
 
class  ZhReplacementMachine
 

Functions

 docFragToString ( $docFrag, $force)
 
 LocationData ( $wiki, $title, $meta, $reqId, $userAgent)
 
 ParsoidLogData ( $logType, $logObject, $locationData)
 
 ParsoidLogger ( $env)
 
 ParserFunctions ( $env)
 
 buildAsyncOutputBufferCB ( $cb)
 

Variables

 $ContentUtils = require '../utils/ContentUtils.js'::ContentUtils
 
 $DOMDataUtils = require '../utils/DOMDataUtils.js'::DOMDataUtils
 
 $DOMUtils = require '../utils/DOMUtils.js'::DOMUtils
 
 $Promise = require '../utils/promise.js'
 
 $Util = require '../utils/Util.js'::Util
 
 $PegTokenizer = require '../wt2html/tokenizer.js'::PegTokenizer
 
 $PHPParseRequest = require '../mw/ApiRequest.js'::PHPParseRequest
 
 $apiUtils = $module->exports = []
 module:api/apiUtils
 
$apiUtils relativeRedirect
 Send a redirect response with optional code and a relative URL. More...
 
$apiUtils setHeader
 Set header, but only if response hasn't been sent. More...
 
$apiUtils htmlResponse
 Send an html response, but only if response hasn't been sent. More...
 
$apiUtils plainResponse
 Send a plaintext response, but only if response hasn't been sent. More...
 
$apiUtils jsonResponse
 Send a JSON response, but only if response hasn't been sent. More...
 
$apiUtils renderResponse
 Render response, but only if response hasn't been sent. More...
 
$apiUtils errorResponse
 Error response. More...
 
$apiUtils errorHandler
 Generic error response handler. More...
 
$apiUtils errorWrapper
 Wrap a promised value with a catch that invokes errorHandler. More...
 
$apiUtils substTopLevelTemplates
 To support the 'subst' API parameter, we need to prefix each top-level template with 'subst'. More...
 
$apiUtils wikitextContentType
 Return the appropriate content-type string for wikitext. More...
 
$apiUtils htmlContentType
 Return the appropriate content-type string for Parsoid HTML. More...
 
$apiUtils pagebundleContentType
 Return the appropriate content-type string for a Parsoid page bundle. More...
 
$apiUtils dataParsoidContentType
 Return the appropriate content-type string for a data-parsoid JSON blob. More...
 
$apiUtils dataMwContentType
 Return the appropriate content-type string for a data-mw JSON blob. More...
 
$apiUtils extractPageBundle
 Extracts a pagebundle from a revision. More...
 
$apiUtils validatePageBundle
 Validates the pagebundle was provided in the expected format. More...
 
$apiUtils fatalRequest
 Log a fatal/request. More...
 
$apiUtils versionFromType
 Determine the content version from the html's content type. More...
 
 $oldSpec = '/^mediawiki.org\/specs\/(html)\/(\d+\.\d+\.\d+)$/'
 
 $newSpec = '/^https:\/\/www.mediawiki.org\/wiki\/Specs\/(HTML|pagebundle)\/(\d+\.\d+\.\d+)$/'
 
$apiUtils parseProfile
 Used to extract the format and content version from a profile. More...
 
$apiUtils validateAndSetOutputContentVersion
 Set the content version to an acceptable version. More...
 
$apiUtils redirectToOldid
 Generate an HTTP redirect to a specific revision. More...
 
 $downgrade999to2
 Downgrade content from 999.x to 2.x. More...
 
$apiUtils findDowngrade
 Is this a transition we know how to handle? More...
 
$apiUtils doDowngrade
 Downgrade content. More...
 
$apiUtils returnDowngrade
 Downgrade and return content. More...
 
$apiUtils wt2htmlRes
 Send an appropriate response with the right content types for wt2html. More...
 
$apiUtils shouldScrub
 
 $Diff = require '../utils/Diff.js'::Diff
 
 $JSUtils = require '../utils/jsutils.js'::JSUtils
 
 $TemplateRequest = require '../mw/ApiRequest.js'::TemplateRequest
 
 $roundTripDiff
 
 $rtResponse
 
$module exports
 
 $ParsoidConfig = require '../config/ParsoidConfig.js'::ParsoidConfig
 
 $parseJsPath = $require->resolve( '../parse.js' )
 
 $ParsoidService = $module->exports = []
 ParsoidService. More...
 
 $MWParserEnv = require '../config/MWParserEnvironment.js'::MWParserEnvironment
 
 $LogData = require '../logger/LogData.js'::LogData
 
 $ParsoidExtApi = $module->parent->require( './extapi.js' )->versionCheck( '^0.10.0' )
 
 $temp0
 
 $parseWikitextToDOM
 
 $Sanitizer
 
 $TokenUtils
 
 $modes = require './modes.js'
 
 $addMetaData
 
 $JSONExt
 
 $PARSE_ERROR_HTML
 
 $validityCache = new Map()
 
 $languageNameCache = new Map()
 
$module exports Language = $Language
 
$module exports LanguageConverter = $LanguageConverter
 
 $LintRequest = require '../mw/ApiRequest.js'::LintRequest
 
 $LintLogger
 
 $Logger
 
 $prettyLogTypeMap
 
 $diffTokens
 
 $KV = $temp0::KV
 
 $TagTk = $temp0::TagTk
 
 $EndTagTk = $temp0::EndTagTk
 
 $SelfclosingTagTk = $temp0::SelfclosingTagTk
 
 $ParsoidDate = null
 
 $getJan1
 

Detailed Description

Some parser functions, and quite a bunch of stubs of parser functions.

Diff tools.

Chinese conversion code.

Serbian (Српски / Srpski) specific code.

Kurdish conversion code.

English ( / Pig Latin) conversion code.

Crimean Tatar (Qırımtatarca) conversion code.

A bidirectional Language Converter, capable of round-tripping variant conversion.

Base class for Language objects.

This is a demonstration of content model handling in extensions for Parsoid.

Implements the php parser's renderImageGallery natively.

Simple Parsoid web service.

Params to support (on the extension tag):

  • showfilename
  • caption
  • mode
  • widths
  • heights
  • perrow

A proposed spec is at: https://phabricator.wikimedia.org/P2506 ext/Gallery

It implements the "json" content model, to allow editing JSON data structures using Visual Editor. It represents the JSON structure as a nested table. ext/JSON

Language conversion is as DOMPostProcessor pass, run over the Parsoid-format HTML output, which may have embedded language converter rules. We first assign a (guessed) source variant to each DOM node, which will be used when round-tripping the result back to the original source variant. Then for each applicable text node in the DOM, we first "bracket" the text, splitting it into cleanly round-trippable segments and lossy/unclean segments. For the lossy segments we add additional metadata to the output to record the original source variant text to allow round-tripping (and variant-aware editing).

Like in the PHP implementation, each individual language has a dynamically-loaded subclass of Language, which may also have a LanguageConverter subclass to load appropriate ReplacementMachines and do other language-specific customizations.

Logger backend for linter. This backend filters out logging messages with Logtype "lint/*" and logs them (console, external service).

IMPORTANT NOTE: These parser functions are only used by the Parsoid-native template expansion pipeline, which is not the default or used in production. Normally we use API calls into a MediaWiki installation to implement parser functions and other preprocessor functionality. The only use of this code is currently in parserTests, but those tests should probably be marked as PHP-only and any mixed testing moved into separate tests. This means that there is not much point in spending time on implementing more parser functions here.

There are still quite a few missing, see http://www.mediawiki.org/wiki/Help:Magic_words and http://www.mediawiki.org/wiki/Help:Extension:ParserFunctions. Instantiated and called by the TemplateHandler extension. Any pf_<prefix> matching a lower-cased template name prefix up to the first colon will override that template.

Variable Documentation

◆ $addMetaData

Parsoid\$addMetaData
Initial value:
= $temp0->
addMetaData

◆ $diffTokens

Parsoid\$diffTokens
Initial value:
= function ( $oldString, $newString, $tokenize ) use ( &$simpleDiff ) {
if ( $oldString === $newString ) {
return [ [ '=', [ $newString ] ] ];
} else {
return simpleDiff::diff( $tokenize( $oldString ), $tokenize( $newString ) );
}
}

◆ $downgrade999to2

Parsoid\$downgrade999to2
Initial value:
= function ( $doc, $pb ) use ( &$DOMDataUtils ) {
DOMDataUtils::applyPageBundle( $doc, [ 'parsoid' => [ 'ids' => [] ], 'mw' => $pb->mw ] );
$pb->mw = [ 'ids' => [] ];
}

Downgrade content from 999.x to 2.x.

Parameters
{Document}doc
{Object}pb

◆ $getJan1

Parsoid\$getJan1
Initial value:
= function ( $d ) {
$d = new Date( $d->getTime() );
$d->setUTCMonth( 0 );
$d->setUTCDate( 1 );
$d->setUTCHours( 0 );
$d->setUTCMinutes( 0 );
$d->setUTCSeconds( 0 );
$d->setUTCMilliseconds( 0 );
return $d;
}

◆ $JSONExt

Parsoid\$JSONExt
Initial value:
= function () {
$this->config = [
'contentmodels' => [
'json' => $this
]
];
}

◆ $LintLogger

Parsoid\$LintLogger
Initial value:
= function ( $env ) {
$this->_env = $env;
$this->buffer = [];
}

◆ $Logger

Parsoid\$Logger
Initial value:
= function ( $opts ) {
if ( !$opts ) { $opts = []; }
$this->_opts = $opts;
$this->_logRequestQueue = [];
$this->_backends = new Map();
$this->_testAllRE = new RegExp( '/^$/' );
$this->_samplers = [];
$this->_samplersRE = new RegExp( '/^$/' );
$this->_samplersCache = new Map();
}

◆ $PARSE_ERROR_HTML

Parsoid\$PARSE_ERROR_HTML
Initial value:
=
'<!DOCTYPE html><html>'
. '<body>'
. "<table data-mw='{\"errors\":[{\"key\":\"bad-json\"}]}' typeof=\"mw:Error\">"
. '</body>'

◆ $parseWikitextToDOM

Parsoid\$parseWikitextToDOM
Initial value:
= $temp0->
parseWikitextToDOM

◆ $ParsoidService

Parsoid\$ParsoidService = $module->exports = []

ParsoidService.

For more details on the HTTP api, see the [guide]{ apiuse}. module:api/ParsoidService

◆ $prettyLogTypeMap

Parsoid\$prettyLogTypeMap
Initial value:
= [
'debug' => '[DEBUG]'

◆ $rtResponse

Parsoid\$rtResponse
Initial value:
= function ( $env, $req, $res, $data ) use ( &$apiUtils, &$JSUtils ) {
apiUtils::renderResponse( $res, 'roundtrip', $data );
$env->log( 'info', 'completed in ' . JSUtils::elapsedTime( $res->locals->start ) . 'ms' );
}

◆ $Sanitizer

Parsoid\$Sanitizer
Initial value:
= $temp0::
Sanitizer

◆ $temp0

Parsoid\$temp0
Initial value:
=
$ParsoidExtApi

◆ $TokenUtils

Parsoid\$TokenUtils
Initial value:
= $temp0::
TokenUtils

◆ dataMwContentType

$apiUtils Parsoid\dataMwContentType
Initial value:
= function ( $outputContentVersion ) {
return 'application/json; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/data-mw/' . $outputContentVersion . '"';
}

Return the appropriate content-type string for a data-mw JSON blob.

Parameters
{string}outputContentVersion

◆ dataParsoidContentType

$apiUtils Parsoid\dataParsoidContentType
Initial value:
= function ( $outputContentVersion ) {
return 'application/json; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/data-parsoid/' . $outputContentVersion . '"';
}

Return the appropriate content-type string for a data-parsoid JSON blob.

Parameters
{string}outputContentVersion

◆ doDowngrade

$apiUtils Parsoid\doDowngrade
Initial value:
= function ( $downgrade, $metrics, $env, $revision, $version ) use ( &$apiUtils ) {
if ( $metrics ) { $metrics->increment( "downgrade.from.{$downgrade->from}.to.{$downgrade->to}" );
}
$doc = $env->createDocument( $revision->html->body );
$pb = $apiUtils->extractPageBundle( $revision );
$apiUtils->validatePageBundle( $pb, $version );
$start = time();
$downgrade->func( $doc, $pb );
if ( $metrics ) { $metrics->endTiming( 'downgrade.time', $start );
}
return [ 'doc' => $doc, 'pb' => $pb ];
}
$apiUtils
module:api/apiUtils
Definition: apiUtils.php:23

Downgrade content.

Parameters
{Object}downgrade
{Object}[metrics]
{MWParserEnvironment}env
{Object}revision
{string}version
Returns
{Object}

◆ errorHandler

$apiUtils Parsoid\errorHandler
Initial value:
= function ( $env, $err ) {
if ( $err->type === 'MaxConcurrentCallsError' ) {
$err->suppressLoggingStack = true;
$err->httpStatus = 503;
} elseif ( $err->type === 'TimeoutError' ) {
$err->suppressLoggingStack = true;
$err->httpStatus = 504;
}
$env->log( 'fatal/request', $err );
}

Generic error response handler.

Parameters
{MWParserEnvironment}env
{Error}err

◆ errorResponse

$apiUtils Parsoid\errorResponse
Initial value:
= function ( $res, $text, $status ) use ( &$apiUtils ) {
if ( gettype( $status ) !== 'number' ) {
$status = 500;
}
switch ( $res->locals->errorEnc ) {
case 'html':
$apiUtils->htmlResponse( $res, $text, $status );
break;
case 'json':
$text = [ 'error' => $text ];
$apiUtils->jsonResponse( $res, $text, $status );
break;
case 'plain':
$apiUtils->plainResponse( $res, $text, $status );
break;
default:
throw new Error( 'Unknown response type: ' . $res->locals->errorEnc );
}
}
$apiUtils
module:api/apiUtils
Definition: apiUtils.php:23

Error response.

Parameters
{Response}res The response object from our routing function.
{string}text
{number}[status]

◆ errorWrapper

$apiUtils Parsoid\errorWrapper
Initial value:
= function ( $env, $promiseOrValue ) use ( &$Promise, &$apiUtils ) {
return Promise::resolve( $promiseOrValue )->catch( function ( $err ) use ( &$apiUtils, &$env ) {
$apiUtils->errorHandler( $env, $err );
}
);
}
$apiUtils
module:api/apiUtils
Definition: apiUtils.php:23

Wrap a promised value with a catch that invokes errorHandler.

Parameters
{MWParserEnvironment}env
{Promise|any}promiseOrValue

◆ exports

$module Parsoid::exports

Create the API routes.

Parameters
{ParsoidConfig}parsoidConfig
{Logger}processLogger
{ParsoidConfig}parsoidConfig
{Logger}processLogger
{Object}parsoidOptions
{Function}parse

◆ extractPageBundle

$apiUtils Parsoid\extractPageBundle
Initial value:
= function ( $revision ) {
return [
'parsoid' => $revision[ 'data-parsoid' ] && $revision[ 'data-parsoid' ]->body,
'mw' => $revision[ 'data-mw' ] && $revision[ 'data-mw' ]->body
];
}

Extracts a pagebundle from a revision.

Parameters
{Object}revision
Returns
{Object}

◆ fatalRequest

$apiUtils Parsoid\fatalRequest
Initial value:
= function ( $env, $text, $httpStatus ) {
$err = new Error( $text );
$err->httpStatus = $httpStatus || 404;
$err->suppressLoggingStack = true;
$env->log( 'fatal/request', $err );
}

Log a fatal/request.

Parameters
{MWParserEnvironment}env
{string}text
{number}[httpStatus]

◆ findDowngrade

$apiUtils Parsoid\findDowngrade
Initial value:
= function ( $from, $to ) use ( &$downgrade999to2, &$semver ) {
return [
[ 'from' => '999.0.0', 'to' => '2.0.0', 'func' => $downgrade999to2 ]
]->find( function ( $a ) use ( &$semver, &$from, &$to ) {return semver::satisfies( $from, '^' . $a->from )
&& semver::satisfies( $to, '^' . $a->to );
}
);
}
$downgrade999to2
Downgrade content from 999.x to 2.x.
Definition: apiUtils.php:510

Is this a transition we know how to handle?

Parameters
{string}from
{string}to
Returns
{Object|undefined}

◆ htmlContentType

$apiUtils Parsoid\htmlContentType
Initial value:
= function ( $outputContentVersion ) {
return 'text/html; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/HTML/' . $outputContentVersion . '"';
}

Return the appropriate content-type string for Parsoid HTML.

Parameters
{string}outputContentVersion

◆ htmlResponse

$apiUtils Parsoid\htmlResponse
Initial value:
= function ( $res, $body, $status, $headers, $omitEscape ) use ( &$apiUtils, &$Util ) {
if ( $res->headersSent ) { return;
}
if ( gettype( $status ) === 'number' ) {
$res->status( $status );
}
if ( !$headers ) { $headers = [ 'content-language' => 'en', 'vary' => 'Accept' ];
}
$contentType = $headers[ 'content-type' ] || 'text/html; charset=utf-8';
Assert::invariant( preg_match( '/^text\/html;/', $contentType ) );
$apiUtils->setHeader( $res, 'Content-Type', $contentType );
$apiUtils->setHeader( $res, 'Content-Language', $headers[ 'content-language' ] );
$apiUtils->setHeader( $res, 'Vary', $headers->vary );
$body = String( $body );
if ( !$omitEscape ) {
$body = Util::escapeHtml( $body );
}
$res->send( $body );
}
$apiUtils
module:api/apiUtils
Definition: apiUtils.php:23

Send an html response, but only if response hasn't been sent.

Parameters
{Response}res The response object from our routing function.
{string}body
{number}[status] HTTP status code.
{Object}[headers] HTTP headers to include.
{string}[headers.content-type] A more specific type to use.
{string}[headers.content-language] Content language of response.
{string}[headers.vary] Vary header contents.
{boolean}[omitEscape] Be explicit about omitting escaping.

◆ jsonResponse

$apiUtils Parsoid\jsonResponse
Initial value:
= function ( $res, $json, $status, $contentType ) use ( &$apiUtils ) {
if ( $res->headersSent ) { return;
}
if ( gettype( $status ) === 'number' ) {
$res->status( $status );
}
$contentType = $contentType || 'application/json; charset=utf-8';
Assert::invariant( preg_match( '/^application\/json;/', $contentType ) );
$apiUtils->setHeader( $res, 'Content-Type', $contentType );
$res->json( $json );
}
$apiUtils
module:api/apiUtils
Definition: apiUtils.php:23

Send a JSON response, but only if response hasn't been sent.

Parameters
{Response}res The response object from our routing function.
{Object}json
{number}[status] HTTP status code.
{string}[contentType] A more specific type to use.

◆ pagebundleContentType

$apiUtils Parsoid\pagebundleContentType
Initial value:
= function ( $outputContentVersion ) {
return 'application/json; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/pagebundle/' . $outputContentVersion . '"';
}

Return the appropriate content-type string for a Parsoid page bundle.

Parameters
{string}outputContentVersion

◆ parseProfile

$apiUtils Parsoid\parseProfile
Initial value:
= function ( $profile, $format ) use ( &$newSpec, &$oldSpec ) {
$match = $newSpec->exec( $profile );
if ( !$match ) {
$match = $oldSpec->exec( $profile );
if ( $match ) { $match[ 1 ] = $format;
}
}
if ( $match ) {
return [
'format' => strtolower( $match[ 1 ] ),
'version' => $match[ 2 ]
];
} else {
return null;
}
}

Used to extract the format and content version from a profile.

Parameters
{string}profile
{string}format Just used for backwards compatibility w/ <= 1.2.0 where the pagebundle didn't have a spec.
Returns
{Object|null}

◆ plainResponse

$apiUtils Parsoid\plainResponse
Initial value:
= function ( $res, $text, $status, $contentType ) use ( &$apiUtils ) {
if ( $res->headersSent ) { return;
}
if ( gettype( $status ) === 'number' ) {
$res->status( $status );
}
$contentType = $contentType || 'text/plain; charset=utf-8';
Assert::invariant( preg_match( '/^text\/plain;/', $contentType ) );
$apiUtils->setHeader( $res, 'Content-Type', $contentType );
$res->send( String( $text ) );
}
$apiUtils
module:api/apiUtils
Definition: apiUtils.php:23

Send a plaintext response, but only if response hasn't been sent.

Parameters
{Response}res The response object from our routing function.
{string}text
{number}[status] HTTP status code.
{string}[contentType] A more specific type to use.

◆ redirectToOldid

$apiUtils Parsoid\redirectToOldid
Initial value:
= function ( $req, $res ) use ( &$qs ) {
$env = $res->locals->env;
$target = $env->normalizeAndResolvePageTitle();
$httpStatus = ( $req->method === 'GET' ) ? 302 : 307;
return $this->_redirect( $req, $res, $target, $httpStatus, function ( $redirPath ) use ( &$env, &$qs, &$req, &$res ) {
$revid = $env->page->meta->revision->revid;
$redirPath += '/' . $revid;
if ( count( Object::keys( $req->query ) ) > 0 ) {
$redirPath += '?' . qs::stringify( $req->query );
}
$format = $res->locals->opts->format;
$env->log( 'info', 'redirecting to revision', $revid, 'for', $format );
$metrics = $env->conf->parsoid->metrics;
if ( $metrics ) {
$metrics->increment( 'redirectToOldid.' . strtolower( $format ) );
}
return $redirPath;
}
);
}

Generate an HTTP redirect to a specific revision.

Parameters
{Request}req
{Response}res

◆ relativeRedirect

$apiUtils Parsoid\relativeRedirect
Initial value:
= function ( $res, $path, $httpStatus ) {
if ( $res->headersSent ) { return;
}
$args = [ $path ];
if ( gettype( $httpStatus ) === 'number' ) {
array_unshift( $args, $httpStatus );
}
call_user_func_array( [ $res, 'redirect' ], $args );
}

Send a redirect response with optional code and a relative URL.

Parameters
{Response}res The response object from our routing function.
{string}path
{number}[httpStatus]

◆ renderResponse

$apiUtils Parsoid\renderResponse
Initial value:
= function ( $res, $view, $locals ) {
if ( $res->headersSent ) { return;
}
$res->render( $view, $locals );
}

Render response, but only if response hasn't been sent.

Parameters
{Response}res The response object from our routing function.
{string}view
{Object}locals

◆ returnDowngrade

$apiUtils Parsoid\returnDowngrade
Initial value:
= function ( $downgrade, $metrics, $env, $revision, $res, $contentmodel ) use ( &$apiUtils, &$ContentUtils, &$DOMUtils ) {
$temp0 = $apiUtils->doDowngrade( $downgrade, $metrics, $env, $revision, $env->inputContentVersion );
$doc = $temp0->doc;
$pb = $temp0->pb;
$meta = $doc->querySelector( 'meta[property="mw:html:version"]' );
if ( $meta ) { $meta->setAttribute( 'content', $env->outputContentVersion );
}
$html = ContentUtils::toXML( ( $res->locals->body_only ) ? $doc->body : $doc, [
'innerXML' => $res->locals->body_only
]
);
$apiUtils->wt2htmlRes( $res, $html, $pb, $contentmodel, DOMUtils::findHttpEquivHeaders( $doc ), $env->outputContentVersion );
}
$apiUtils
module:api/apiUtils
Definition: apiUtils.php:23

Downgrade and return content.

Parameters
{Object}downgrade
{Object}[metrics]
{MWParserEnvironment}env
{Object}revision
{Response}res
{string}[contentmodel]

◆ setHeader

$apiUtils Parsoid\setHeader
Initial value:
= function ( $res, $field, $value ) {
Assert::invariant( $value !== null );
if ( $res->headersSent ) { return;
}
$res->set( $field, $value );
}

Set header, but only if response hasn't been sent.

Parameters
{Response}res The response object from our routing function.
{string}field
{string}value

◆ shouldScrub

$apiUtils Parsoid\shouldScrub
Initial value:
= function ( $req, $def ) {
if ( $req->body->hasOwnProperty( 'scrub_wikitext' ) ) {
return !( !$req->body->scrub_wikitext || $req->body->scrub_wikitext === 'false' );
} elseif ( $req->query->hasOwnProperty( 'scrub_wikitext' ) ) {
return !( !$req->query->scrub_wikitext || $req->query->scrub_wikitext === 'false' );
} elseif ( $req->body->hasOwnProperty( 'scrubWikitext' ) ) {
return !( !$req->body->scrubWikitext || $req->body->scrubWikitext === 'false' );
} elseif ( $req->query->hasOwnProperty( 'scrubWikitext' ) ) {
return !( !$req->query->scrubWikitext || $req->query->scrubWikitext === 'false' );
} else {
return $def;
}
}
Returns
{boolean}

◆ substTopLevelTemplates

$apiUtils Parsoid\substTopLevelTemplates
Initial value:
= function ( $env, $target, $wt ) use ( &$PegTokenizer, &$PHPParseRequest ) {
$tokenizer = new PegTokenizer( $env );
$tokens = $tokenizer->tokenizeSync( $wt );
$tsrIncr = 0;
for ( $i = 0; $i < count( $tokens ); $i++ ) {
if ( $tokens[ $i ]->name === 'template' ) {
$tsr = $tokens[ $i ]->dataAttribs->tsr;
$wt = substr( $wt, 0, $tsr[ 0 ] + $tsrIncr )
. '{{subst:'
. substr( $wt, $tsr[ 0 ] + $tsrIncr + 2 );
$tsrIncr += 6;
}
}
return PHPParseRequest::promise( $env, $target, $wt, true );
}

To support the 'subst' API parameter, we need to prefix each top-level template with 'subst'.

To make sure we do this for the correct templates, tokenize the starting wikitext and use that to detect top-level templates. Then, substitute each starting '{{' with '{{subst' using the template token's tsr.

Parameters
{MWParserEnvironment}env
{string}target
{string}wt

◆ validateAndSetOutputContentVersion

$apiUtils Parsoid\validateAndSetOutputContentVersion

Set the content version to an acceptable version.

Returns false if Parsoid is unable to supply one.

Parameters
{Response}res
{Array}acceptableTypes
Returns
{boolean}

◆ validatePageBundle

$apiUtils Parsoid\validatePageBundle
Initial value:
= function ( $pb, $originalVersion ) use ( &$semver ) {
$err = null;
if ( !$pb->parsoid || $pb->parsoid->constructor !== $Object || !$pb->parsoid->ids ) {
$err = new Error( 'Invalid data-parsoid was provided.' );
$err->httpStatus = 400;
$err->suppressLoggingStack = true;
throw $err;
}
if ( semver::satisfies( $originalVersion, '^999.0.0' )
&& ( !$pb->mw || $pb->mw->constructor !== $Object || !$pb->mw->ids )
) {
$err = new Error( 'Invalid data-mw was provided.' );
$err->httpStatus = 400;
$err->suppressLoggingStack = true;
throw $err;
}
}

Validates the pagebundle was provided in the expected format.

Parameters
{Object}pb
{string}originalVersion

◆ versionFromType

$apiUtils Parsoid\versionFromType
Initial value:
= function ( $html ) use ( &$cType, &$apiUtils ) {
$ct = $html->headers && $html->headers[ 'content-type' ];
if ( $ct ) {
try {
$t = cType::parse( $ct );
$profile = $t->parameters && $t->parameters->profile;
if ( $profile ) {
$p = $apiUtils->parseProfile( $profile, 'html' );
return $p && $p->version;
} else {
return null;
}
} catch ( Exception $e ) {
return null;
}
} else {
return null;
}
}
$apiUtils
module:api/apiUtils
Definition: apiUtils.php:23

Determine the content version from the html's content type.

Parameters
{Object}html
Returns
{string|null}

◆ wikitextContentType

$apiUtils Parsoid\wikitextContentType
Initial value:
= function ( $env ) {
return 'text/plain; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/wikitext/' . $env->wikitextVersion . '"';
}

Return the appropriate content-type string for wikitext.

Parameters
{MWParserEnvironment}env

◆ wt2htmlRes

$apiUtils Parsoid\wt2htmlRes

Send an appropriate response with the right content types for wt2html.

Parameters
{Object}res
{string}html
{Object}pb
{string}[contentmodel]
{Object}headers
{string}outputContentVersion