Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
66.67% |
56 / 84 |
|
57.14% |
4 / 7 |
CRAP | |
0.00% |
0 / 1 |
ParsoidOutputAccess | |
66.67% |
56 / 84 |
|
57.14% |
4 / 7 |
57.04 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
supportsContentModel | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
4 | |||
getParserOutput | |
58.33% |
7 / 12 |
|
0.00% |
0 / 1 |
5.16 | |||
getCachedParserOutput | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
parseUncacheable | |
61.11% |
11 / 18 |
|
0.00% |
0 / 1 |
9.88 | |||
resolveRevision | |
51.52% |
17 / 33 |
|
0.00% |
0 / 1 |
18.23 | |||
adjustParserOptions | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * Copyright (C) 2011-2022 Wikimedia Foundation and others. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | */ |
19 | |
20 | namespace MediaWiki\Parser\Parsoid; |
21 | |
22 | use MediaWiki\Content\IContentHandlerFactory; |
23 | use MediaWiki\Page\PageIdentity; |
24 | use MediaWiki\Page\PageLookup; |
25 | use MediaWiki\Page\PageRecord; |
26 | use MediaWiki\Page\ParserOutputAccess; |
27 | use MediaWiki\Parser\ParserOutput; |
28 | use MediaWiki\Revision\RevisionAccessException; |
29 | use MediaWiki\Revision\RevisionLookup; |
30 | use MediaWiki\Revision\RevisionRecord; |
31 | use MediaWiki\Revision\SlotRecord; |
32 | use MediaWiki\Status\Status; |
33 | use MWUnknownContentModelException; |
34 | use ParserOptions; |
35 | use Wikimedia\Parsoid\Config\SiteConfig; |
36 | use Wikimedia\Parsoid\Core\ClientError; |
37 | use Wikimedia\Parsoid\Core\ResourceLimitExceededException; |
38 | |
39 | /** |
40 | * MediaWiki service for getting rendered page content. |
41 | * |
42 | * This is very similar to ParserOutputAccess and only exists as a |
43 | * separate class as an interim solution and should be removed soon. |
44 | * |
45 | * It is different from ParserOutputAccess in two aspects: |
46 | * - it forces Parsoid to be used when possible |
47 | * - it supports on-the-fly parsing through parseUncacheable() |
48 | * |
49 | * @since 1.39 |
50 | * @unstable |
51 | */ |
52 | class ParsoidOutputAccess { |
53 | private ParsoidParserFactory $parsoidParserFactory; |
54 | private PageLookup $pageLookup; |
55 | private RevisionLookup $revisionLookup; |
56 | private ParserOutputAccess $parserOutputAccess; |
57 | private SiteConfig $siteConfig; |
58 | private IContentHandlerFactory $contentHandlerFactory; |
59 | |
60 | /** |
61 | * @param ParsoidParserFactory $parsoidParserFactory |
62 | * @param ParserOutputAccess $parserOutputAccess |
63 | * @param PageLookup $pageLookup |
64 | * @param RevisionLookup $revisionLookup |
65 | * @param SiteConfig $siteConfig |
66 | * @param IContentHandlerFactory $contentHandlerFactory |
67 | */ |
68 | public function __construct( |
69 | ParsoidParserFactory $parsoidParserFactory, |
70 | ParserOutputAccess $parserOutputAccess, |
71 | PageLookup $pageLookup, |
72 | RevisionLookup $revisionLookup, |
73 | SiteConfig $siteConfig, |
74 | IContentHandlerFactory $contentHandlerFactory |
75 | ) { |
76 | $this->parsoidParserFactory = $parsoidParserFactory; |
77 | $this->parserOutputAccess = $parserOutputAccess; |
78 | $this->pageLookup = $pageLookup; |
79 | $this->revisionLookup = $revisionLookup; |
80 | $this->siteConfig = $siteConfig; |
81 | $this->contentHandlerFactory = $contentHandlerFactory; |
82 | } |
83 | |
84 | /** |
85 | * @param string $model |
86 | * |
87 | * @return bool |
88 | */ |
89 | public function supportsContentModel( string $model ): bool { |
90 | if ( $model === CONTENT_MODEL_WIKITEXT ) { |
91 | return true; |
92 | } |
93 | |
94 | // Check if the content model serializes to wikitext. |
95 | // NOTE: We could use isSupportedFormat( CONTENT_FORMAT_WIKITEXT ) if PageContent::getContent() |
96 | // would specify the format when calling serialize(). |
97 | try { |
98 | $handler = $this->contentHandlerFactory->getContentHandler( $model ); |
99 | if ( $handler->getDefaultFormat() === CONTENT_FORMAT_WIKITEXT ) { |
100 | return true; |
101 | } |
102 | } catch ( MWUnknownContentModelException $ex ) { |
103 | // If the content model is not known, it can't be supported. |
104 | return false; |
105 | } |
106 | |
107 | return $this->siteConfig->getContentModelHandler( $model ) !== null; |
108 | } |
109 | |
110 | /** |
111 | * @param PageIdentity $page |
112 | * @param ParserOptions $parserOpts |
113 | * @param RevisionRecord|int|null $revision |
114 | * @param int $options See the OPT_XXX constants |
115 | * @param bool $lenientRevHandling |
116 | * |
117 | * @return Status<ParserOutput> |
118 | */ |
119 | public function getParserOutput( |
120 | PageIdentity $page, |
121 | ParserOptions $parserOpts, |
122 | $revision = null, |
123 | int $options = 0, |
124 | bool $lenientRevHandling = false |
125 | ): Status { |
126 | [ $page, $revision, $uncacheable ] = $this->resolveRevision( $page, $revision, $lenientRevHandling ); |
127 | |
128 | try { |
129 | if ( $uncacheable ) { |
130 | $options |= ParserOutputAccess::OPT_NO_UPDATE_CACHE; |
131 | } |
132 | |
133 | $this->adjustParserOptions( $revision, $parserOpts ); |
134 | $status = $this->parserOutputAccess->getParserOutput( |
135 | $page, $parserOpts, $revision, $options |
136 | ); |
137 | } catch ( ClientError $e ) { |
138 | $status = Status::newFatal( 'parsoid-client-error', $e->getMessage() ); |
139 | } catch ( ResourceLimitExceededException $e ) { |
140 | $status = Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() ); |
141 | } |
142 | return $status; |
143 | } |
144 | |
145 | /** |
146 | * @param PageIdentity $page |
147 | * @param ParserOptions $parserOpts |
148 | * @param RevisionRecord|int|null $revision |
149 | * @param bool $lenientRevHandling |
150 | * |
151 | * @return ?ParserOutput |
152 | */ |
153 | public function getCachedParserOutput( |
154 | PageIdentity $page, |
155 | ParserOptions $parserOpts, |
156 | $revision = null, |
157 | bool $lenientRevHandling = false |
158 | ): ?ParserOutput { |
159 | [ $page, $revision, $ignored ] = $this->resolveRevision( $page, $revision, $lenientRevHandling ); |
160 | |
161 | $this->adjustParserOptions( $revision, $parserOpts ); |
162 | return $this->parserOutputAccess->getCachedParserOutput( $page, $parserOpts, $revision ); |
163 | } |
164 | |
165 | /** |
166 | * This is to be called only for parsing posted wikitext that is actually |
167 | * not part of any real revision. |
168 | * |
169 | * @param PageIdentity $page |
170 | * @param ParserOptions $parserOpts |
171 | * @param RevisionRecord|int|null $revision |
172 | * @param bool $lenientRevHandling |
173 | * |
174 | * @return Status |
175 | */ |
176 | public function parseUncacheable( |
177 | PageIdentity $page, |
178 | ParserOptions $parserOpts, |
179 | $revision, |
180 | bool $lenientRevHandling = false |
181 | ): Status { |
182 | // NOTE: If we have a RevisionRecord already, just use it, there is no need to resolve $page to |
183 | // a PageRecord (and it may not be possible if the page doesn't exist). |
184 | if ( !$revision instanceof RevisionRecord ) { |
185 | [ $page, $revision, $ignored ] = $this->resolveRevision( $page, $revision, $lenientRevHandling ); |
186 | } |
187 | |
188 | // Enforce caller expectation |
189 | $revId = $revision->getId(); |
190 | if ( $revId !== 0 && $revId !== null ) { |
191 | return Status::newFatal( 'parsoid-revision-access', |
192 | "parseUncacheable should not be called for a real revision" ); |
193 | } |
194 | |
195 | try { |
196 | // Since we aren't caching this output, there is no need to |
197 | // call setUseParsoid() here. |
198 | $parser = $this->parsoidParserFactory->create(); |
199 | $parserOutput = $this->parsoidParserFactory->create()->parseFakeRevision( |
200 | $revision, $page, $parserOpts ); |
201 | $parserOutput->updateCacheExpiry( 0 ); // Ensure this isn't accidentally cached |
202 | $status = Status::newGood( $parserOutput ); |
203 | } catch ( RevisionAccessException $e ) { |
204 | return Status::newFatal( 'parsoid-revision-access', $e->getMessage() ); |
205 | } catch ( ClientError $e ) { |
206 | $status = Status::newFatal( 'parsoid-client-error', $e->getMessage() ); |
207 | } catch ( ResourceLimitExceededException $e ) { |
208 | $status = Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() ); |
209 | } |
210 | return $status; |
211 | } |
212 | |
213 | /** |
214 | * @param PageIdentity $page |
215 | * @param RevisionRecord|int|null $revision |
216 | * @param bool $lenientRevHandling |
217 | * |
218 | * @return array [ PageRecord $page, RevisionRecord $revision ] |
219 | */ |
220 | private function resolveRevision( PageIdentity $page, $revision, bool $lenientRevHandling = false ): array { |
221 | $uncacheable = false; |
222 | if ( !$page instanceof PageRecord ) { |
223 | $name = "$page"; |
224 | $page = $this->pageLookup->getPageByReference( $page ); |
225 | if ( !$page ) { |
226 | throw new RevisionAccessException( |
227 | 'Page {name} not found', |
228 | [ 'name' => $name ] |
229 | ); |
230 | } |
231 | } |
232 | |
233 | if ( $revision === null ) { |
234 | $revision = $page->getLatest(); |
235 | } |
236 | |
237 | if ( is_int( $revision ) ) { |
238 | $revId = $revision; |
239 | $revision = $this->revisionLookup->getRevisionById( $revId ); |
240 | |
241 | if ( !$revision ) { |
242 | throw new RevisionAccessException( |
243 | 'Revision {revId} not found', |
244 | [ 'revId' => $revId ] |
245 | ); |
246 | } |
247 | } |
248 | |
249 | if ( $page->getId() !== $revision->getPageId() ) { |
250 | if ( $lenientRevHandling ) { |
251 | $page = $this->pageLookup->getPageById( $revision->getPageId() ); |
252 | if ( !$page ) { |
253 | // This should ideally never trigger! |
254 | throw new \RuntimeException( |
255 | "Unexpected NULL page for pageid " . $revision->getPageId() . |
256 | " from revision " . $revision->getId() |
257 | ); |
258 | } |
259 | // Don't cache this! |
260 | $uncacheable = true; |
261 | } else { |
262 | throw new RevisionAccessException( |
263 | 'Revision {revId} does not belong to page {name}', |
264 | [ 'name' => $page->getDBkey(), 'revId' => $revision->getId() ] |
265 | ); |
266 | } |
267 | } |
268 | |
269 | return [ $page, $revision, $uncacheable ]; |
270 | } |
271 | |
272 | private function adjustParserOptions( RevisionRecord $revision, ParserOptions $parserOpts ): void { |
273 | $mainSlot = $revision->getSlot( SlotRecord::MAIN ); |
274 | $contentModel = $mainSlot->getModel(); |
275 | if ( $this->supportsContentModel( $contentModel ) ) { |
276 | // Since we know Parsoid supports this content model, explicitly |
277 | // call ParserOptions::setUseParsoid. This ensures that when |
278 | // we query the parser-cache, the right cache key is called. |
279 | // This is an optional transition step to using ParserOutputAccess. |
280 | $parserOpts->setUseParsoid(); |
281 | } |
282 | } |
283 | } |