MediaWiki  master
ParsoidHTMLHelper.php
Go to the documentation of this file.
1 <?php
2 
22 namespace MediaWiki\Rest\Handler;
23 
30 use ParserCache;
31 use ParserOptions;
32 use ParserOutput;
33 use TitleValue;
35 use Wikimedia\Parsoid\Config\PageConfig;
36 use Wikimedia\Parsoid\Core\ClientError;
37 use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
38 use Wikimedia\Parsoid\Parsoid;
40 
50 
51  private const RENDER_ID_KEY = 'parsoid-render-id';
52 
54  private $parserCache;
55 
58 
61 
63  private $page = null;
64 
66  private $parsoid = null;
67 
69  private $revision = null;
70 
76  public function __construct(
80  ) {
81  $this->parserCache = $parserCache;
82  $this->globalIdGenerator = $globalIdGenerator;
83  $this->revisionOutputCache = $revisionOutputCache;
84  }
85 
90  public function init( PageRecord $page, ?RevisionRecord $revision = null ) {
91  $this->page = $page;
92  $this->revision = $revision;
93  }
94 
99  private function parse(): ParserOutput {
100  $parsoid = $this->createParsoid();
101  $pageConfig = $this->createPageConfig();
102  try {
103  $startTime = microtime( true );
104  $pageBundle = $parsoid->wikitext2html( $pageConfig, [
105  'discardDataParsoid' => true,
106  'pageBundle' => true,
107  ] );
108  $fakeParserOutput = new ParserOutput( $pageBundle->html );
109  $time = microtime( true ) - $startTime;
110  if ( $time > 3 ) {
111  LoggerFactory::getInstance( 'slow-parsoid' )
112  ->info( 'Parsing {title} was slow, took {time} seconds', [
113  'time' => number_format( $time, 2 ),
114  'title' => (string)$this->page,
115  ] );
116  }
117  return $fakeParserOutput;
118  } catch ( ClientError $e ) {
119  throw new LocalizedHttpException(
120  MessageValue::new( 'rest-html-backend-error' ),
121  400,
122  [ 'reason' => $e->getMessage() ]
123  );
124  } catch ( ResourceLimitExceededException $e ) {
125  throw new LocalizedHttpException(
126  MessageValue::new( 'rest-resource-limit-exceeded' ),
127  413,
128  [ 'reason' => $e->getMessage() ]
129  );
130  }
131  }
132 
140  private function assertParsoidInstalled() {
141  $services = MediaWikiServices::getInstance();
142  if ( $services->has( 'ParsoidSiteConfig' ) &&
143  $services->has( 'ParsoidPageConfigFactory' ) &&
144  $services->has( 'ParsoidDataAccess' )
145  ) {
146  return;
147  }
148  throw new LocalizedHttpException(
149  MessageValue::new( 'rest-html-backend-error' ),
150  501
151  );
152  }
153 
158  private function createParsoid(): Parsoid {
159  $this->assertParsoidInstalled();
160  if ( $this->parsoid === null ) {
161  // TODO: Once parsoid glue services are in core,
162  // this will need to use normal DI.
163  // At that point, we may want to extract a more high level
164  // service for rendering a revision, and inject that into this class.
165  // See T265518
166  $services = MediaWikiServices::getInstance();
167  $this->parsoid = new Parsoid(
168  $services->get( 'ParsoidSiteConfig' ),
169  $services->get( 'ParsoidDataAccess' )
170  );
171  }
172  return $this->parsoid;
173  }
174 
179  private function createPageConfig(): PageConfig {
180  $this->assertParsoidInstalled();
181  // Currently everything is parsed as anon since Parsoid
182  // can't report the used options.
183  // Already checked that title/revision exist and accessible.
184  // TODO: make ParsoidPageConfigFactory take a RevisionRecord
185  // TODO: make ParsoidPageConfigFactory take PageReference as well
187  ->get( 'ParsoidPageConfigFactory' )
188  ->create(
189  TitleValue::newFromPage( $this->page ),
190  null,
191  $this->revision ? $this->revision->getId() : null
192  );
193  }
194 
199  public function getHtml(): ParserOutput {
200  $parserOptions = ParserOptions::newCanonical( 'canonical' );
201 
202  $revId = $this->revision ? $this->revision->getId() : $this->page->getLatest();
203  $isOld = $revId !== $this->page->getLatest();
204 
205  if ( $isOld ) {
206  $parserOutput = $this->revisionOutputCache->get( $this->revision, $parserOptions );
207  } else {
208  $parserOutput = $this->parserCache->get( $this->page, $parserOptions );
209  }
210  if ( $parserOutput ) {
211  return $parserOutput;
212  }
213 
214  $fakeParserOutput = $this->parse();
215 
216  // XXX: ParserOutput should just always record the revision ID and timestamp
217  $now = wfTimestampNow();
218  $fakeParserOutput->setCacheRevisionId( $revId );
219  $fakeParserOutput->setCacheTime( $now );
220 
221  // TODO: when we make tighter integration with Parsoid, render ID should become
222  // a standard ParserOutput property. Nothing else needs it now, so don't generate
223  // it in ParserCache just yet.
224  $fakeParserOutput->setExtensionData( self::RENDER_ID_KEY, $this->globalIdGenerator->newUUIDv1() );
225 
226  if ( $isOld ) {
227  $this->revisionOutputCache->save( $fakeParserOutput, $this->revision, $parserOptions, $now );
228  } else {
229  $this->parserCache->save( $fakeParserOutput, $this->page, $parserOptions, $now );
230  }
231 
232  return $fakeParserOutput;
233  }
234 
239  public function getETag(): ?string {
240  $parserOutput = $this->getHtml();
241  $renderId = $parserOutput->getExtensionData( self::RENDER_ID_KEY );
242  // Fallback for backwards compatibility with older cached entries.
243  if ( !$renderId ) {
244  $renderId = $this->getLastModified();
245  }
246  return "\"{$parserOutput->getCacheRevisionId()}/{$renderId}\"";
247  }
248 
254  public function getLastModified(): ?string {
255  return $this->getHtml()->getCacheTime();
256  }
257 
258 }
MediaWiki\Rest\Handler\ParsoidHTMLHelper
Helper for getting output of a given wikitext page rendered by parsoid.
Definition: ParsoidHTMLHelper.php:49
ParserOptions
Set options of the Parser.
Definition: ParserOptions.php:45
MediaWiki\Rest\Handler
Definition: AbstractContributionHandler.php:3
Page\PageRecord
Data record representing a page that is (or used to be, or could be) an editable page on a wiki.
Definition: PageRecord.php:25
MediaWiki\Revision\RevisionRecord
Page revision base class.
Definition: RevisionRecord.php:47
ParserOutput
Definition: ParserOutput.php:36
Wikimedia\UUID\GlobalIdGenerator
Class for getting statistically unique IDs without a central coordinator.
Definition: GlobalIdGenerator.php:34
MediaWiki\Rest\Handler\ParsoidHTMLHelper\$revision
RevisionRecord null $revision
Definition: ParsoidHTMLHelper.php:69
MediaWiki\Rest\Handler\ParsoidHTMLHelper\parse
parse()
Definition: ParsoidHTMLHelper.php:99
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:200
MediaWiki\Rest\Handler\ParsoidHTMLHelper\createPageConfig
createPageConfig()
Definition: ParsoidHTMLHelper.php:179
MediaWiki\Rest\Handler\ParsoidHTMLHelper\getETag
getETag()
Returns an ETag uniquely identifying the HTML output.
Definition: ParsoidHTMLHelper.php:239
MediaWiki\Logger\LoggerFactory\getInstance
static getInstance( $channel)
Get a named logger instance from the currently configured logger factory.
Definition: LoggerFactory.php:92
MediaWiki\Rest\Handler\ParsoidHTMLHelper\$revisionOutputCache
RevisionOutputCache $revisionOutputCache
Definition: ParsoidHTMLHelper.php:57
Wikimedia\Message\MessageValue
Value object representing a message for i18n.
Definition: MessageValue.php:16
MediaWiki\Rest\Handler\ParsoidHTMLHelper\init
init(PageRecord $page, ?RevisionRecord $revision=null)
Definition: ParsoidHTMLHelper.php:90
MediaWiki\Rest\Handler\ParsoidHTMLHelper\$page
PageRecord null $page
Definition: ParsoidHTMLHelper.php:63
MediaWiki\MediaWikiServices\getInstance
static getInstance()
Returns the global default instance of the top level service locator.
Definition: MediaWikiServices.php:261
TitleValue\newFromPage
static newFromPage(PageReference $page)
Constructs a TitleValue from a local PageReference.
Definition: TitleValue.php:119
MediaWiki\Logger\LoggerFactory
PSR-3 logger instance factory.
Definition: LoggerFactory.php:45
MediaWiki\Rest\Handler\ParsoidHTMLHelper\$parserCache
ParserCache $parserCache
Definition: ParsoidHTMLHelper.php:54
ParserOptions\newCanonical
static newCanonical( $context, $userLang=null)
Creates a "canonical" ParserOptions object.
Definition: ParserOptions.php:1091
wfTimestampNow
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
Definition: GlobalFunctions.php:1686
MediaWiki\Rest\Handler\ParsoidHTMLHelper\getLastModified
getLastModified()
Returns the time at which the HTML was rendered.
Definition: ParsoidHTMLHelper.php:254
MediaWiki\Rest\Handler\ParsoidHTMLHelper\createParsoid
createParsoid()
Definition: ParsoidHTMLHelper.php:158
MediaWiki\Rest\Handler\ParsoidHTMLHelper\assertParsoidInstalled
assertParsoidInstalled()
Assert that Parsoid services are available.
Definition: ParsoidHTMLHelper.php:140
MediaWiki\Rest\Handler\ParsoidHTMLHelper\$parsoid
Parsoid null $parsoid
Definition: ParsoidHTMLHelper.php:66
Parser\RevisionOutputCache
Cache for ParserOutput objects.
Definition: RevisionOutputCache.php:44
MediaWiki\Rest\Handler\ParsoidHTMLHelper\RENDER_ID_KEY
const RENDER_ID_KEY
Definition: ParsoidHTMLHelper.php:51
MediaWiki\Rest\Handler\ParsoidHTMLHelper\__construct
__construct(ParserCache $parserCache, RevisionOutputCache $revisionOutputCache, GlobalIdGenerator $globalIdGenerator)
Definition: ParsoidHTMLHelper.php:76
ParserCache
Cache for ParserOutput objects corresponding to the latest page revisions.
Definition: ParserCache.php:63
Wikimedia\Message\MessageValue\new
static new( $key, $params=[])
Static constructor for easier chaining of ->params() methods.
Definition: MessageValue.php:42
MediaWiki\Rest\Handler\ParsoidHTMLHelper\getHtml
getHtml()
Definition: ParsoidHTMLHelper.php:199
MediaWiki\Rest\Handler\ParsoidHTMLHelper\$globalIdGenerator
GlobalIdGenerator $globalIdGenerator
Definition: ParsoidHTMLHelper.php:60
MediaWiki\Rest\LocalizedHttpException
@newable
Definition: LocalizedHttpException.php:10
TitleValue
Represents a page (or page fragment) title within MediaWiki.
Definition: TitleValue.php:40