MediaWiki master
ParsoidOutputAccess.php
Go to the documentation of this file.
1<?php
21
37use Wikimedia\Parsoid\Config\SiteConfig;
38use Wikimedia\Parsoid\Core\ClientError;
39use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
40
51 public const PARSOID_PARSER_CACHE_NAME = 'parsoid';
52
53 public const CONSTRUCTOR_OPTIONS = [
55 'ParsoidWikiID'
56 ];
57
58 private ParsoidParserFactory $parsoidParserFactory;
59 private PageLookup $pageLookup;
60 private RevisionLookup $revisionLookup;
61 private ParserOutputAccess $parserOutputAccess;
62 private SiteConfig $siteConfig;
63 private ServiceOptions $options;
64 private string $parsoidWikiId;
65 private IContentHandlerFactory $contentHandlerFactory;
66
76 public function __construct(
77 ServiceOptions $options,
78 ParsoidParserFactory $parsoidParserFactory,
79 ParserOutputAccess $parserOutputAccess,
80 PageLookup $pageLookup,
81 RevisionLookup $revisionLookup,
82 SiteConfig $siteConfig,
83 IContentHandlerFactory $contentHandlerFactory
84 ) {
85 $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
86 $this->options = $options;
87 $this->parsoidParserFactory = $parsoidParserFactory;
88 $this->parserOutputAccess = $parserOutputAccess;
89 $this->pageLookup = $pageLookup;
90 $this->revisionLookup = $revisionLookup;
91 $this->siteConfig = $siteConfig;
92 $this->contentHandlerFactory = $contentHandlerFactory;
93
94 // NOTE: This is passed as the "prefix" option to parsoid, which it uses
95 // to locate wiki specific configuration in the baseconfig directory.
96 // This should probably be managed by SiteConfig instead, so
97 // we hopefully will not need it here in the future.
98 $this->parsoidWikiId = $options->get( 'ParsoidWikiID' );
99 }
100
106 public function supportsContentModel( string $model ): bool {
107 if ( $model === CONTENT_MODEL_WIKITEXT ) {
108 return true;
109 }
110
111 // Check if the content model serializes to wikitext.
112 // NOTE: We could use isSupportedFormat( CONTENT_FORMAT_WIKITEXT ) if PageContent::getContent()
113 // would specify the format when calling serialize().
114 try {
115 $handler = $this->contentHandlerFactory->getContentHandler( $model );
116 if ( $handler->getDefaultFormat() === CONTENT_FORMAT_WIKITEXT ) {
117 return true;
118 }
119 } catch ( MWUnknownContentModelException $ex ) {
120 // If the content model is not known, it can't be supported.
121 return false;
122 }
123
124 return $this->siteConfig->getContentModelHandler( $model ) !== null;
125 }
126
127 private function handleUnsupportedContentModel( RevisionRecord $revision ): ?Status {
128 $mainSlot = $revision->getSlot( SlotRecord::MAIN );
129 $contentModel = $mainSlot->getModel();
130 if ( $this->supportsContentModel( $contentModel ) ) {
131 return null;
132 } else {
133 // This is a messy fix for T324711. The real solution is T311648.
134 // For now, just return dummy parser output.
135 return $this->makeDummyParserOutput( $contentModel );
136 // TODO: go back to throwing, once RESTbase no longer expects to get a parsoid rendering for
137 //any kind of content (T324711).
138 /*
139 // TODO: throw an internal exception here, convert to HttpError in HtmlOutputRendererHelper.
140 throw new HttpException( 'Parsoid does not support content model ' . $mainSlot->getModel(), 400 );
141 }
142 */
143 }
144 }
145
155 public function getParserOutput(
156 PageIdentity $page,
157 ParserOptions $parserOpts,
158 $revision = null,
159 int $options = 0,
160 bool $lenientRevHandling = false
161 ): Status {
162 [ $page, $revision, $uncacheable ] = $this->resolveRevision( $page, $revision, $lenientRevHandling );
163 $status = $this->handleUnsupportedContentModel( $revision );
164 if ( $status ) {
165 return $status;
166 }
167
168 try {
169 if ( $uncacheable ) {
170 $options |= ParserOutputAccess::OPT_NO_UPDATE_CACHE;
171 }
172 // Since we know we are generating Parsoid output, explicitly
173 // call ParserOptions::setUseParsoid. This ensures that when
174 // we query the parser-cache, the right cache key is computed.
175 // This is an optional transition step to using ParserOutputAccess.
176 $parserOpts->setUseParsoid();
177 $status = $this->parserOutputAccess->getParserOutput(
178 $page, $parserOpts, $revision, $options
179 );
180 } catch ( ClientError $e ) {
181 $status = Status::newFatal( 'parsoid-client-error', $e->getMessage() );
182 } catch ( ResourceLimitExceededException $e ) {
183 $status = Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() );
184 }
185 return $status;
186 }
187
196 public function getCachedParserOutput(
197 PageIdentity $page,
198 ParserOptions $parserOpts,
199 $revision = null,
200 bool $lenientRevHandling = false
201 ): ?ParserOutput {
202 [ $page, $revision, $ignored ] = $this->resolveRevision( $page, $revision, $lenientRevHandling );
203 $mainSlot = $revision->getSlot( SlotRecord::MAIN );
204 $contentModel = $mainSlot->getModel();
205 if ( $this->supportsContentModel( $contentModel ) ) {
206 // Since we know Parsoid supports this content model, explicitly
207 // call ParserOptions::setUseParsoid. This ensures that when
208 // we query the parser-cache, the right cache key is called.
209 // This is an optional transition step to using ParserOutputAccess.
210 $parserOpts->setUseParsoid();
211 }
212 return $this->parserOutputAccess->getCachedParserOutput( $page, $parserOpts, $revision );
213 }
214
215 private function makeDummyParserOutput( string $contentModel ): Status {
216 $msg = "Dummy output. Parsoid does not support content model $contentModel. See T324711.";
217 $output = new ParserOutput( $msg );
218
219 // This is fast to generate so it's fine not to write this to parser cache.
220 $output->updateCacheExpiry( 0 );
221 // The render ID is required for rendering of dummy output: T311728.
222 $ts = wfTimestampNow();
223 $output->setCacheTime( $ts );
224 $output->setRenderId( 'dummy-output' );
225 $output->setCacheRevisionId( 0 );
226 $output->setRevisionTimestamp( $ts );
227 // Required in HtmlOutputRendererHelper::putHeaders when $forHtml
228 $output->setExtensionData(
229 PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY,
230 [
231 'headers' => [ 'content-language' => 'en' ],
232 ]
233 );
234
235 return Status::newGood( $output );
236 }
237
249 public function parseUncacheable(
250 PageIdentity $page,
251 ParserOptions $parserOpts,
252 $revision,
253 bool $lenientRevHandling = false
254 ): Status {
255 // NOTE: If we have a RevisionRecord already, just use it, there is no need to resolve $page to
256 // a PageRecord (and it may not be possible if the page doesn't exist).
257 if ( !$revision instanceof RevisionRecord ) {
258 [ $page, $revision, $ignored ] = $this->resolveRevision( $page, $revision, $lenientRevHandling );
259 }
260
261 // Enforce caller expectation
262 $revId = $revision->getId();
263 if ( $revId !== 0 && $revId !== null ) {
264 return Status::newFatal( 'parsoid-revision-access',
265 "parseUncacheable should not be called for a real revision" );
266 }
267
268 $status = $this->handleUnsupportedContentModel( $revision );
269 if ( $status ) {
270 return $status;
271 }
272
273 try {
274 // Since we aren't caching this output, there is no need to
275 // call setUseParsoid() here.
276 $parser = $this->parsoidParserFactory->create();
277 $parserOutput = $this->parsoidParserFactory->create()->parseFakeRevision(
278 $revision, $page, $parserOpts );
279 $parserOutput->updateCacheExpiry( 0 ); // Ensure this isn't accidentally cached
280 $status = Status::newGood( $parserOutput );
281 } catch ( RevisionAccessException $e ) {
282 return Status::newFatal( 'parsoid-revision-access', $e->getMessage() );
283 } catch ( ClientError $e ) {
284 $status = Status::newFatal( 'parsoid-client-error', $e->getMessage() );
285 } catch ( ResourceLimitExceededException $e ) {
286 $status = Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() );
287 }
288 return $status;
289 }
290
298 private function resolveRevision( PageIdentity $page, $revision, bool $lenientRevHandling = false ): array {
299 $uncacheable = false;
300 if ( !$page instanceof PageRecord ) {
301 $name = "$page";
302 $page = $this->pageLookup->getPageByReference( $page );
303 if ( !$page ) {
304 throw new RevisionAccessException(
305 'Page {name} not found',
306 [ 'name' => $name ]
307 );
308 }
309 }
310
311 if ( $revision === null ) {
312 $revision = $page->getLatest();
313 }
314
315 if ( is_int( $revision ) ) {
316 $revId = $revision;
317 $revision = $this->revisionLookup->getRevisionById( $revId );
318
319 if ( !$revision ) {
320 throw new RevisionAccessException(
321 'Revision {revId} not found',
322 [ 'revId' => $revId ]
323 );
324 }
325 }
326
327 if ( $page->getId() !== $revision->getPageId() ) {
328 if ( $lenientRevHandling ) {
329 $page = $this->pageLookup->getPageById( $revision->getPageId() );
330 if ( !$page ) {
331 // This should ideally never trigger!
332 throw new \RuntimeException(
333 "Unexpected NULL page for pageid " . $revision->getPageId() .
334 " from revision " . $revision->getId()
335 );
336 }
337 // Don't cache this!
338 $uncacheable = true;
339 } else {
340 throw new RevisionAccessException(
341 'Revision {revId} does not belong to page {name}',
342 [ 'name' => $page->getDBkey(), 'revId' => $revision->getId() ]
343 );
344 }
345 }
346
347 return [ $page, $revision, $uncacheable ];
348 }
349}
const CONTENT_MODEL_WIKITEXT
Definition Defines.php:218
const CONTENT_FORMAT_WIKITEXT
Wikitext.
Definition Defines.php:234
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Exception thrown when an unregistered content model is requested.
A class for passing options to services.
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
A class containing constants representing the names of configuration variables.
const ParsoidCacheConfig
Name constant for the ParsoidCacheConfig setting, for use with Config::get()
Service for getting rendered output of a given page.
Rendered output of a wiki page, as parsed from wikitext.
MediaWiki service for getting Parsoid Output objects.
getCachedParserOutput(PageIdentity $page, ParserOptions $parserOpts, $revision=null, bool $lenientRevHandling=false)
parseUncacheable(PageIdentity $page, ParserOptions $parserOpts, $revision, bool $lenientRevHandling=false)
This is to be called only for parsing posted wikitext that is actually not part of any real revision.
__construct(ServiceOptions $options, ParsoidParserFactory $parsoidParserFactory, ParserOutputAccess $parserOutputAccess, PageLookup $pageLookup, RevisionLookup $revisionLookup, SiteConfig $siteConfig, IContentHandlerFactory $contentHandlerFactory)
getParserOutput(PageIdentity $page, ParserOptions $parserOpts, $revision=null, int $options=0, bool $lenientRevHandling=false)
Exception representing a failure to look up a revision.
Page revision base class.
Value object representing a content slot associated with a page revision.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:54
Set options of the Parser.
setUseParsoid()
Request Parsoid-format HTML output.
Interface for objects (potentially) representing an editable wiki page.
Service for looking up information about wiki pages.
Data record representing a page that is (or used to be, or could be) an editable page on a wiki.
Service for looking up page revisions.
Copyright (C) 2011-2022 Wikimedia Foundation and others.