Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
80.99% |
115 / 142 |
|
40.00% |
6 / 15 |
CRAP | |
0.00% |
0 / 1 |
WikitextContentHandler | |
81.56% |
115 / 141 |
|
40.00% |
6 / 15 |
38.42 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
getContentClass | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
makeRedirectContent | |
93.75% |
15 / 16 |
|
0.00% |
0 / 1 |
5.01 | |||
supportsRedirects | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
supportsSections | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isParserCacheSupported | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
supportsPreloadContent | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFileHandler | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
getFieldsForSearchIndex | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
1 | |||
getDataForSearchIndex | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
2 | |||
serializeContent | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
preSaveTransform | |
92.86% |
13 / 14 |
|
0.00% |
0 / 1 |
2.00 | |||
preloadTransform | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
extractRedirectTargetAndText | |
78.57% |
11 / 14 |
|
0.00% |
0 / 1 |
6.35 | |||
fillParserOutput | |
78.38% |
29 / 37 |
|
0.00% |
0 / 1 |
7.50 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | namespace MediaWiki\Content; |
22 | |
23 | use MediaWiki\Content\Renderer\ContentParseParams; |
24 | use MediaWiki\Content\Transform\PreloadTransformParams; |
25 | use MediaWiki\Content\Transform\PreSaveTransformParams; |
26 | use MediaWiki\Languages\LanguageNameUtils; |
27 | use MediaWiki\Linker\LinkRenderer; |
28 | use MediaWiki\Logger\LoggerFactory; |
29 | use MediaWiki\Page\WikiPage; |
30 | use MediaWiki\Parser\MagicWordFactory; |
31 | use MediaWiki\Parser\ParserFactory; |
32 | use MediaWiki\Parser\ParserOutput; |
33 | use MediaWiki\Parser\ParserOutputFlags; |
34 | use MediaWiki\Parser\Parsoid\ParsoidParserFactory; |
35 | use MediaWiki\Revision\RevisionRecord; |
36 | use MediaWiki\Title\Title; |
37 | use MediaWiki\Title\TitleFactory; |
38 | use SearchEngine; |
39 | use SearchIndexField; |
40 | use Wikimedia\UUID\GlobalIdGenerator; |
41 | |
42 | /** |
43 | * Content handler for wiki text pages. |
44 | * |
45 | * @since 1.21 |
46 | * @ingroup Content |
47 | */ |
48 | class WikitextContentHandler extends TextContentHandler { |
49 | |
50 | private TitleFactory $titleFactory; |
51 | private ParserFactory $parserFactory; |
52 | private GlobalIdGenerator $globalIdGenerator; |
53 | private LanguageNameUtils $languageNameUtils; |
54 | private LinkRenderer $linkRenderer; |
55 | private MagicWordFactory $magicWordFactory; |
56 | private ParsoidParserFactory $parsoidParserFactory; |
57 | |
58 | public function __construct( |
59 | string $modelId, |
60 | TitleFactory $titleFactory, |
61 | ParserFactory $parserFactory, |
62 | GlobalIdGenerator $globalIdGenerator, |
63 | LanguageNameUtils $languageNameUtils, |
64 | LinkRenderer $linkRenderer, |
65 | MagicWordFactory $magicWordFactory, |
66 | ParsoidParserFactory $parsoidParserFactory |
67 | ) { |
68 | // $modelId should always be CONTENT_MODEL_WIKITEXT |
69 | parent::__construct( $modelId, [ CONTENT_FORMAT_WIKITEXT ] ); |
70 | $this->titleFactory = $titleFactory; |
71 | $this->parserFactory = $parserFactory; |
72 | $this->globalIdGenerator = $globalIdGenerator; |
73 | $this->languageNameUtils = $languageNameUtils; |
74 | $this->linkRenderer = $linkRenderer; |
75 | $this->magicWordFactory = $magicWordFactory; |
76 | $this->parsoidParserFactory = $parsoidParserFactory; |
77 | } |
78 | |
79 | /** |
80 | * @return class-string<WikitextContent> |
81 | */ |
82 | protected function getContentClass() { |
83 | return WikitextContent::class; |
84 | } |
85 | |
86 | /** |
87 | * Returns a WikitextContent object representing a redirect to the given destination page. |
88 | * |
89 | * @param Title $destination The page to redirect to. |
90 | * @param string $text Text to include in the redirect, if possible. |
91 | * |
92 | * @return Content |
93 | * |
94 | * @see ContentHandler::makeRedirectContent |
95 | */ |
96 | public function makeRedirectContent( Title $destination, $text = '' ) { |
97 | $optionalColon = ''; |
98 | |
99 | if ( $destination->getNamespace() === NS_CATEGORY ) { |
100 | $optionalColon = ':'; |
101 | } else { |
102 | $iw = $destination->getInterwiki(); |
103 | if ( $iw && $this->languageNameUtils->getLanguageName( $iw, |
104 | LanguageNameUtils::AUTONYMS, |
105 | LanguageNameUtils::DEFINED |
106 | ) ) { |
107 | $optionalColon = ':'; |
108 | } |
109 | } |
110 | |
111 | $mwRedir = $this->magicWordFactory->get( 'redirect' ); |
112 | $redirectText = $mwRedir->getSynonym( 0 ) . |
113 | ' [[' . $optionalColon . $destination->getFullText() . ']]'; |
114 | |
115 | if ( $text != '' ) { |
116 | $redirectText .= "\n" . $text; |
117 | } |
118 | |
119 | $class = $this->getContentClass(); |
120 | |
121 | return new $class( $redirectText ); |
122 | } |
123 | |
124 | /** |
125 | * Returns true because wikitext supports redirects. |
126 | * |
127 | * @return bool Always true. |
128 | * |
129 | * @see ContentHandler::supportsRedirects |
130 | */ |
131 | public function supportsRedirects() { |
132 | return true; |
133 | } |
134 | |
135 | /** |
136 | * Returns true because wikitext supports sections. |
137 | * |
138 | * @return bool Always true. |
139 | * |
140 | * @see ContentHandler::supportsSections |
141 | */ |
142 | public function supportsSections() { |
143 | return true; |
144 | } |
145 | |
146 | /** |
147 | * Returns true, because wikitext supports caching using the |
148 | * ParserCache mechanism. |
149 | * |
150 | * @since 1.21 |
151 | * |
152 | * @return bool Always true. |
153 | * |
154 | * @see ContentHandler::isParserCacheSupported |
155 | */ |
156 | public function isParserCacheSupported() { |
157 | return true; |
158 | } |
159 | |
160 | /** @inheritDoc */ |
161 | public function supportsPreloadContent(): bool { |
162 | return true; |
163 | } |
164 | |
165 | /** |
166 | * @return FileContentHandler |
167 | */ |
168 | protected function getFileHandler() { |
169 | return new FileContentHandler( |
170 | $this->getModelID(), |
171 | $this->titleFactory, |
172 | $this->parserFactory, |
173 | $this->globalIdGenerator, |
174 | $this->languageNameUtils, |
175 | $this->linkRenderer, |
176 | $this->magicWordFactory, |
177 | $this->parsoidParserFactory |
178 | ); |
179 | } |
180 | |
181 | public function getFieldsForSearchIndex( SearchEngine $engine ) { |
182 | $fields = parent::getFieldsForSearchIndex( $engine ); |
183 | |
184 | $fields['heading'] = |
185 | $engine->makeSearchFieldMapping( 'heading', SearchIndexField::INDEX_TYPE_TEXT ); |
186 | $fields['heading']->setFlag( SearchIndexField::FLAG_SCORING ); |
187 | |
188 | $fields['auxiliary_text'] = |
189 | $engine->makeSearchFieldMapping( 'auxiliary_text', SearchIndexField::INDEX_TYPE_TEXT ); |
190 | |
191 | $fields['opening_text'] = |
192 | $engine->makeSearchFieldMapping( 'opening_text', SearchIndexField::INDEX_TYPE_TEXT ); |
193 | $fields['opening_text']->setFlag( |
194 | SearchIndexField::FLAG_SCORING | SearchIndexField::FLAG_NO_HIGHLIGHT |
195 | ); |
196 | |
197 | // Until we have the full first-class content handler for files, we invoke it explicitly here |
198 | return array_merge( $fields, $this->getFileHandler()->getFieldsForSearchIndex( $engine ) ); |
199 | } |
200 | |
201 | public function getDataForSearchIndex( |
202 | WikiPage $page, |
203 | ParserOutput $parserOutput, |
204 | SearchEngine $engine, |
205 | ?RevisionRecord $revision = null |
206 | ) { |
207 | $fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine, $revision ); |
208 | |
209 | $structure = new WikiTextStructure( $parserOutput ); |
210 | $fields['heading'] = $structure->headings(); |
211 | // text fields |
212 | $fields['opening_text'] = $structure->getOpeningText(); |
213 | $fields['text'] = $structure->getMainText(); // overwrites one from ContentHandler |
214 | $fields['auxiliary_text'] = $structure->getAuxiliaryText(); |
215 | $fields['defaultsort'] = $structure->getDefaultSort(); |
216 | $fields['file_text'] = null; |
217 | |
218 | // Until we have the full first-class content handler for files, we invoke it explicitly here |
219 | if ( $page->getTitle()->getNamespace() === NS_FILE ) { |
220 | $fields = array_merge( |
221 | $fields, |
222 | $this->getFileHandler()->getDataForSearchIndex( $page, $parserOutput, $engine, $revision ) |
223 | ); |
224 | } |
225 | |
226 | return $fields; |
227 | } |
228 | |
229 | /** |
230 | * Returns the content's text as-is. |
231 | * |
232 | * @param Content $content |
233 | * @param string|null $format The serialization format to check |
234 | * |
235 | * @return mixed |
236 | */ |
237 | public function serializeContent( Content $content, $format = null ) { |
238 | $this->checkFormat( $format ); |
239 | return parent::serializeContent( $content, $format ); |
240 | } |
241 | |
242 | public function preSaveTransform( |
243 | Content $content, |
244 | PreSaveTransformParams $pstParams |
245 | ): Content { |
246 | '@phan-var WikitextContent $content'; |
247 | $text = $content->getText(); |
248 | |
249 | $parser = $this->parserFactory->getInstance(); |
250 | $pst = $parser->preSaveTransform( |
251 | $text, |
252 | $pstParams->getPage(), |
253 | $pstParams->getUser(), |
254 | $pstParams->getParserOptions() |
255 | ); |
256 | |
257 | if ( $text === $pst ) { |
258 | return $content; |
259 | } |
260 | |
261 | $contentClass = $this->getContentClass(); |
262 | $ret = new $contentClass( $pst ); |
263 | $ret->setPreSaveTransformFlags( $parser->getOutput()->getAllFlags() ); |
264 | |
265 | return $ret; |
266 | } |
267 | |
268 | /** |
269 | * Returns a Content object with preload transformations applied (or this |
270 | * object if no transformations apply). |
271 | * |
272 | * @param Content $content |
273 | * @param PreloadTransformParams $pltParams |
274 | * |
275 | * @return Content |
276 | */ |
277 | public function preloadTransform( |
278 | Content $content, |
279 | PreloadTransformParams $pltParams |
280 | ): Content { |
281 | '@phan-var WikitextContent $content'; |
282 | $text = $content->getText(); |
283 | |
284 | $plt = $this->parserFactory->getInstance()->getPreloadText( |
285 | $text, |
286 | $pltParams->getPage(), |
287 | $pltParams->getParserOptions(), |
288 | $pltParams->getParams() |
289 | ); |
290 | |
291 | $contentClass = $this->getContentClass(); |
292 | |
293 | return new $contentClass( $plt ); |
294 | } |
295 | |
296 | /** |
297 | * Extract the redirect target and the remaining text on the page. |
298 | * |
299 | * @since 1.41 (used to be a method on WikitextContent since 1.23) |
300 | * |
301 | * @return array List of two elements: LinkTarget|null and WikitextContent object. |
302 | */ |
303 | public function extractRedirectTargetAndText( WikitextContent $content ): array { |
304 | $redir = $this->magicWordFactory->get( 'redirect' ); |
305 | $text = ltrim( $content->getText() ); |
306 | |
307 | if ( !$redir->matchStartAndRemove( $text ) ) { |
308 | return [ null, $content ]; |
309 | } |
310 | |
311 | // Extract the first link and see if it's usable |
312 | // Ensure that it really does come directly after #REDIRECT |
313 | // Some older redirects included a colon, so don't freak about that! |
314 | $m = []; |
315 | if ( preg_match( '!^\s*:?\s*\[{2}(.*?)(?:\|.*?)?\]{2}\s*!', $text, $m ) ) { |
316 | // Strip preceding colon used to "escape" categories, etc. |
317 | // and URL-decode links |
318 | if ( strpos( $m[1], '%' ) !== false ) { |
319 | // Match behavior of inline link parsing here; |
320 | $m[1] = rawurldecode( ltrim( $m[1], ':' ) ); |
321 | } |
322 | |
323 | // TODO: Move isValidRedirectTarget() out Title, so we can use a TitleValue here. |
324 | $title = $this->titleFactory->newFromText( $m[1] ); |
325 | |
326 | // If the title is a redirect to bad special pages or is invalid, return null |
327 | if ( !$title instanceof Title || !$title->isValidRedirectTarget() ) { |
328 | return [ null, $content ]; |
329 | } |
330 | |
331 | $remainingContent = new WikitextContent( substr( $text, strlen( $m[0] ) ) ); |
332 | return [ $title, $remainingContent ]; |
333 | } |
334 | |
335 | return [ null, $content ]; |
336 | } |
337 | |
338 | /** |
339 | * Returns a ParserOutput object resulting from parsing the content's text |
340 | * using the global Parser service. |
341 | * |
342 | * @since 1.38 |
343 | * |
344 | * @param Content $content |
345 | * @param ContentParseParams $cpoParams |
346 | * @param ParserOutput &$parserOutput The output object to fill (reference). |
347 | */ |
348 | protected function fillParserOutput( |
349 | Content $content, |
350 | ContentParseParams $cpoParams, |
351 | ParserOutput &$parserOutput |
352 | ) { |
353 | '@phan-var WikitextContent $content'; |
354 | $title = $this->titleFactory->newFromPageReference( $cpoParams->getPage() ); |
355 | $parserOptions = $cpoParams->getParserOptions(); |
356 | $revId = $cpoParams->getRevId(); |
357 | |
358 | [ $redir, $contentWithoutRedirect ] = $this->extractRedirectTargetAndText( $content ); |
359 | if ( $parserOptions->getUseParsoid() ) { |
360 | $parser = $this->parsoidParserFactory->create(); |
361 | // Parsoid renders the #REDIRECT magic word as an invisible |
362 | // <link> tag and doesn't require it to be stripped. |
363 | // T349087: ...and in fact, RESTBase relies on getting |
364 | // redirect information from this <link> tag, so it needs |
365 | // to be present. |
366 | // Further, Parsoid can accept a Content in place of a string. |
367 | $text = $content; |
368 | $extraArgs = [ $cpoParams->getPreviousOutput() ]; |
369 | } else { |
370 | // The legacy parser requires the #REDIRECT magic word to |
371 | // be stripped from the content before parsing. |
372 | $parser = $this->parserFactory->getInstance(); |
373 | $text = $contentWithoutRedirect->getText(); |
374 | $extraArgs = []; |
375 | } |
376 | |
377 | $time = -microtime( true ); |
378 | |
379 | $parserOutput = $parser |
380 | ->parse( $text, $title, $parserOptions, true, true, $revId, ...$extraArgs ); |
381 | $time += microtime( true ); |
382 | |
383 | // Timing hack |
384 | if ( $time > 3 ) { |
385 | // TODO: Use Parser's logger (once it has one) |
386 | $channel = $parserOptions->getUseParsoid() ? 'slow-parsoid' : 'slow-parse'; |
387 | $logger = LoggerFactory::getInstance( $channel ); |
388 | $logger->info( 'Parsing {title} was slow, took {time} seconds', [ |
389 | 'time' => number_format( $time, 2 ), |
390 | 'title' => (string)$title, |
391 | 'trigger' => $parserOptions->getRenderReason(), |
392 | ] ); |
393 | } |
394 | |
395 | // T330667: Record the fact that we used the value of |
396 | // 'useParsoid' to influence this parse. Note that |
397 | // ::getUseParsoid() has a side-effect on $parserOutput here |
398 | // which didn't occur when we called ::getUseParsoid() earlier |
399 | // because $parserOutput didn't exist at that time. |
400 | $parserOptions->getUseParsoid(); |
401 | |
402 | // Add redirect indicator at the top |
403 | if ( $redir ) { |
404 | // Make sure to include the redirect link in pagelinks |
405 | $parserOutput->addLink( $redir ); |
406 | if ( $cpoParams->getGenerateHtml() ) { |
407 | $parserOutput->setRedirectHeader( |
408 | $this->linkRenderer->makeRedirectHeader( |
409 | $title->getPageLanguage(), $redir, false, |
410 | // Add link tag only if we're not using parsoid, |
411 | // since Parsoid adds one itself. |
412 | !$parserOptions->getUseParsoid() |
413 | ) |
414 | ); |
415 | $parserOutput->addModuleStyles( [ 'mediawiki.action.view.redirectPage' ] ); |
416 | } else { |
417 | $parserOutput->setRawText( null ); |
418 | } |
419 | } |
420 | |
421 | // Pass along user-signature flag |
422 | if ( in_array( 'user-signature', $content->getPreSaveTransformFlags() ) ) { |
423 | $parserOutput->setOutputFlag( ParserOutputFlags::USER_SIGNATURE ); |
424 | } |
425 | } |
426 | } |
427 | |
428 | /** @deprecated class alias since 1.43 */ |
429 | class_alias( WikitextContentHandler::class, 'WikitextContentHandler' ); |