Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
80.85% |
114 / 141 |
|
40.00% |
6 / 15 |
CRAP | |
0.00% |
0 / 1 |
WikitextContentHandler | |
81.43% |
114 / 140 |
|
40.00% |
6 / 15 |
38.56 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
getContentClass | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
makeRedirectContent | |
93.75% |
15 / 16 |
|
0.00% |
0 / 1 |
5.01 | |||
supportsRedirects | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
supportsSections | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isParserCacheSupported | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
supportsPreloadContent | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFileHandler | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
getFieldsForSearchIndex | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
1 | |||
getDataForSearchIndex | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
2 | |||
serializeContent | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
preSaveTransform | |
92.86% |
13 / 14 |
|
0.00% |
0 / 1 |
2.00 | |||
preloadTransform | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
extractRedirectTargetAndText | |
78.57% |
11 / 14 |
|
0.00% |
0 / 1 |
6.35 | |||
fillParserOutput | |
77.78% |
28 / 36 |
|
0.00% |
0 / 1 |
7.54 |
1 | <?php |
2 | /** |
3 | * Content handler for wiki text pages. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @since 1.21 |
21 | * |
22 | * @file |
23 | * @ingroup Content |
24 | */ |
25 | |
26 | namespace MediaWiki\Content; |
27 | |
28 | use MediaWiki\Content\Renderer\ContentParseParams; |
29 | use MediaWiki\Content\Transform\PreloadTransformParams; |
30 | use MediaWiki\Content\Transform\PreSaveTransformParams; |
31 | use MediaWiki\Languages\LanguageNameUtils; |
32 | use MediaWiki\Linker\LinkRenderer; |
33 | use MediaWiki\Logger\LoggerFactory; |
34 | use MediaWiki\Parser\MagicWordFactory; |
35 | use MediaWiki\Parser\ParserFactory; |
36 | use MediaWiki\Parser\ParserOutput; |
37 | use MediaWiki\Parser\ParserOutputFlags; |
38 | use MediaWiki\Parser\Parsoid\ParsoidParserFactory; |
39 | use MediaWiki\Revision\RevisionRecord; |
40 | use MediaWiki\Title\Title; |
41 | use MediaWiki\Title\TitleFactory; |
42 | use SearchEngine; |
43 | use SearchIndexField; |
44 | use Wikimedia\UUID\GlobalIdGenerator; |
45 | use WikiPage; |
46 | |
47 | /** |
48 | * Content handler for wiki text pages. |
49 | * |
50 | * @ingroup Content |
51 | */ |
52 | class WikitextContentHandler extends TextContentHandler { |
53 | |
54 | private TitleFactory $titleFactory; |
55 | private ParserFactory $parserFactory; |
56 | private GlobalIdGenerator $globalIdGenerator; |
57 | private LanguageNameUtils $languageNameUtils; |
58 | private LinkRenderer $linkRenderer; |
59 | private MagicWordFactory $magicWordFactory; |
60 | private ParsoidParserFactory $parsoidParserFactory; |
61 | |
62 | public function __construct( |
63 | string $modelId, |
64 | TitleFactory $titleFactory, |
65 | ParserFactory $parserFactory, |
66 | GlobalIdGenerator $globalIdGenerator, |
67 | LanguageNameUtils $languageNameUtils, |
68 | LinkRenderer $linkRenderer, |
69 | MagicWordFactory $magicWordFactory, |
70 | ParsoidParserFactory $parsoidParserFactory |
71 | ) { |
72 | // $modelId should always be CONTENT_MODEL_WIKITEXT |
73 | parent::__construct( $modelId, [ CONTENT_FORMAT_WIKITEXT ] ); |
74 | $this->titleFactory = $titleFactory; |
75 | $this->parserFactory = $parserFactory; |
76 | $this->globalIdGenerator = $globalIdGenerator; |
77 | $this->languageNameUtils = $languageNameUtils; |
78 | $this->linkRenderer = $linkRenderer; |
79 | $this->magicWordFactory = $magicWordFactory; |
80 | $this->parsoidParserFactory = $parsoidParserFactory; |
81 | } |
82 | |
83 | /** |
84 | * @return class-string<WikitextContent> |
85 | */ |
86 | protected function getContentClass() { |
87 | return WikitextContent::class; |
88 | } |
89 | |
90 | /** |
91 | * Returns a WikitextContent object representing a redirect to the given destination page. |
92 | * |
93 | * @param Title $destination The page to redirect to. |
94 | * @param string $text Text to include in the redirect, if possible. |
95 | * |
96 | * @return Content |
97 | * |
98 | * @see ContentHandler::makeRedirectContent |
99 | */ |
100 | public function makeRedirectContent( Title $destination, $text = '' ) { |
101 | $optionalColon = ''; |
102 | |
103 | if ( $destination->getNamespace() === NS_CATEGORY ) { |
104 | $optionalColon = ':'; |
105 | } else { |
106 | $iw = $destination->getInterwiki(); |
107 | if ( $iw && $this->languageNameUtils->getLanguageName( $iw, |
108 | LanguageNameUtils::AUTONYMS, |
109 | LanguageNameUtils::DEFINED |
110 | ) ) { |
111 | $optionalColon = ':'; |
112 | } |
113 | } |
114 | |
115 | $mwRedir = $this->magicWordFactory->get( 'redirect' ); |
116 | $redirectText = $mwRedir->getSynonym( 0 ) . |
117 | ' [[' . $optionalColon . $destination->getFullText() . ']]'; |
118 | |
119 | if ( $text != '' ) { |
120 | $redirectText .= "\n" . $text; |
121 | } |
122 | |
123 | $class = $this->getContentClass(); |
124 | |
125 | return new $class( $redirectText ); |
126 | } |
127 | |
128 | /** |
129 | * Returns true because wikitext supports redirects. |
130 | * |
131 | * @return bool Always true. |
132 | * |
133 | * @see ContentHandler::supportsRedirects |
134 | */ |
135 | public function supportsRedirects() { |
136 | return true; |
137 | } |
138 | |
139 | /** |
140 | * Returns true because wikitext supports sections. |
141 | * |
142 | * @return bool Always true. |
143 | * |
144 | * @see ContentHandler::supportsSections |
145 | */ |
146 | public function supportsSections() { |
147 | return true; |
148 | } |
149 | |
150 | /** |
151 | * Returns true, because wikitext supports caching using the |
152 | * ParserCache mechanism. |
153 | * |
154 | * @since 1.21 |
155 | * |
156 | * @return bool Always true. |
157 | * |
158 | * @see ContentHandler::isParserCacheSupported |
159 | */ |
160 | public function isParserCacheSupported() { |
161 | return true; |
162 | } |
163 | |
164 | /** @inheritDoc */ |
165 | public function supportsPreloadContent(): bool { |
166 | return true; |
167 | } |
168 | |
169 | /** |
170 | * @return FileContentHandler |
171 | */ |
172 | protected function getFileHandler() { |
173 | return new FileContentHandler( |
174 | $this->getModelID(), |
175 | $this->titleFactory, |
176 | $this->parserFactory, |
177 | $this->globalIdGenerator, |
178 | $this->languageNameUtils, |
179 | $this->linkRenderer, |
180 | $this->magicWordFactory, |
181 | $this->parsoidParserFactory |
182 | ); |
183 | } |
184 | |
185 | public function getFieldsForSearchIndex( SearchEngine $engine ) { |
186 | $fields = parent::getFieldsForSearchIndex( $engine ); |
187 | |
188 | $fields['heading'] = |
189 | $engine->makeSearchFieldMapping( 'heading', SearchIndexField::INDEX_TYPE_TEXT ); |
190 | $fields['heading']->setFlag( SearchIndexField::FLAG_SCORING ); |
191 | |
192 | $fields['auxiliary_text'] = |
193 | $engine->makeSearchFieldMapping( 'auxiliary_text', SearchIndexField::INDEX_TYPE_TEXT ); |
194 | |
195 | $fields['opening_text'] = |
196 | $engine->makeSearchFieldMapping( 'opening_text', SearchIndexField::INDEX_TYPE_TEXT ); |
197 | $fields['opening_text']->setFlag( |
198 | SearchIndexField::FLAG_SCORING | SearchIndexField::FLAG_NO_HIGHLIGHT |
199 | ); |
200 | |
201 | // Until we have the full first-class content handler for files, we invoke it explicitly here |
202 | return array_merge( $fields, $this->getFileHandler()->getFieldsForSearchIndex( $engine ) ); |
203 | } |
204 | |
205 | public function getDataForSearchIndex( |
206 | WikiPage $page, |
207 | ParserOutput $parserOutput, |
208 | SearchEngine $engine, |
209 | ?RevisionRecord $revision = null |
210 | ) { |
211 | $fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine, $revision ); |
212 | |
213 | $structure = new WikiTextStructure( $parserOutput ); |
214 | $fields['heading'] = $structure->headings(); |
215 | // text fields |
216 | $fields['opening_text'] = $structure->getOpeningText(); |
217 | $fields['text'] = $structure->getMainText(); // overwrites one from ContentHandler |
218 | $fields['auxiliary_text'] = $structure->getAuxiliaryText(); |
219 | $fields['defaultsort'] = $structure->getDefaultSort(); |
220 | $fields['file_text'] = null; |
221 | |
222 | // Until we have the full first-class content handler for files, we invoke it explicitly here |
223 | if ( $page->getTitle()->getNamespace() === NS_FILE ) { |
224 | $fields = array_merge( |
225 | $fields, |
226 | $this->getFileHandler()->getDataForSearchIndex( $page, $parserOutput, $engine, $revision ) |
227 | ); |
228 | } |
229 | |
230 | return $fields; |
231 | } |
232 | |
233 | /** |
234 | * Returns the content's text as-is. |
235 | * |
236 | * @param Content $content |
237 | * @param string|null $format The serialization format to check |
238 | * |
239 | * @return mixed |
240 | */ |
241 | public function serializeContent( Content $content, $format = null ) { |
242 | $this->checkFormat( $format ); |
243 | return parent::serializeContent( $content, $format ); |
244 | } |
245 | |
246 | public function preSaveTransform( |
247 | Content $content, |
248 | PreSaveTransformParams $pstParams |
249 | ): Content { |
250 | '@phan-var WikitextContent $content'; |
251 | $text = $content->getText(); |
252 | |
253 | $parser = $this->parserFactory->getInstance(); |
254 | $pst = $parser->preSaveTransform( |
255 | $text, |
256 | $pstParams->getPage(), |
257 | $pstParams->getUser(), |
258 | $pstParams->getParserOptions() |
259 | ); |
260 | |
261 | if ( $text === $pst ) { |
262 | return $content; |
263 | } |
264 | |
265 | $contentClass = $this->getContentClass(); |
266 | $ret = new $contentClass( $pst ); |
267 | $ret->setPreSaveTransformFlags( $parser->getOutput()->getAllFlags() ); |
268 | |
269 | return $ret; |
270 | } |
271 | |
272 | /** |
273 | * Returns a Content object with preload transformations applied (or this |
274 | * object if no transformations apply). |
275 | * |
276 | * @param Content $content |
277 | * @param PreloadTransformParams $pltParams |
278 | * |
279 | * @return Content |
280 | */ |
281 | public function preloadTransform( |
282 | Content $content, |
283 | PreloadTransformParams $pltParams |
284 | ): Content { |
285 | '@phan-var WikitextContent $content'; |
286 | $text = $content->getText(); |
287 | |
288 | $plt = $this->parserFactory->getInstance()->getPreloadText( |
289 | $text, |
290 | $pltParams->getPage(), |
291 | $pltParams->getParserOptions(), |
292 | $pltParams->getParams() |
293 | ); |
294 | |
295 | $contentClass = $this->getContentClass(); |
296 | |
297 | return new $contentClass( $plt ); |
298 | } |
299 | |
300 | /** |
301 | * Extract the redirect target and the remaining text on the page. |
302 | * |
303 | * @since 1.41 (used to be a method on WikitextContent since 1.23) |
304 | * |
305 | * @return array List of two elements: LinkTarget|null and WikitextContent object. |
306 | */ |
307 | public function extractRedirectTargetAndText( WikitextContent $content ): array { |
308 | $redir = $this->magicWordFactory->get( 'redirect' ); |
309 | $text = ltrim( $content->getText() ); |
310 | |
311 | if ( !$redir->matchStartAndRemove( $text ) ) { |
312 | return [ null, $content ]; |
313 | } |
314 | |
315 | // Extract the first link and see if it's usable |
316 | // Ensure that it really does come directly after #REDIRECT |
317 | // Some older redirects included a colon, so don't freak about that! |
318 | $m = []; |
319 | if ( preg_match( '!^\s*:?\s*\[{2}(.*?)(?:\|.*?)?\]{2}\s*!', $text, $m ) ) { |
320 | // Strip preceding colon used to "escape" categories, etc. |
321 | // and URL-decode links |
322 | if ( strpos( $m[1], '%' ) !== false ) { |
323 | // Match behavior of inline link parsing here; |
324 | $m[1] = rawurldecode( ltrim( $m[1], ':' ) ); |
325 | } |
326 | |
327 | // TODO: Move isValidRedirectTarget() out Title, so we can use a TitleValue here. |
328 | $title = $this->titleFactory->newFromText( $m[1] ); |
329 | |
330 | // If the title is a redirect to bad special pages or is invalid, return null |
331 | if ( !$title instanceof Title || !$title->isValidRedirectTarget() ) { |
332 | return [ null, $content ]; |
333 | } |
334 | |
335 | $remainingContent = new WikitextContent( substr( $text, strlen( $m[0] ) ) ); |
336 | return [ $title, $remainingContent ]; |
337 | } |
338 | |
339 | return [ null, $content ]; |
340 | } |
341 | |
342 | /** |
343 | * Returns a ParserOutput object resulting from parsing the content's text |
344 | * using the global Parser service. |
345 | * |
346 | * @since 1.38 |
347 | * |
348 | * @param Content $content |
349 | * @param ContentParseParams $cpoParams |
350 | * @param ParserOutput &$parserOutput The output object to fill (reference). |
351 | */ |
352 | protected function fillParserOutput( |
353 | Content $content, |
354 | ContentParseParams $cpoParams, |
355 | ParserOutput &$parserOutput |
356 | ) { |
357 | '@phan-var WikitextContent $content'; |
358 | $title = $this->titleFactory->newFromPageReference( $cpoParams->getPage() ); |
359 | $parserOptions = $cpoParams->getParserOptions(); |
360 | $revId = $cpoParams->getRevId(); |
361 | |
362 | [ $redir, $contentWithoutRedirect ] = $this->extractRedirectTargetAndText( $content ); |
363 | if ( $parserOptions->getUseParsoid() ) { |
364 | $parser = $this->parsoidParserFactory->create(); |
365 | // Parsoid renders the #REDIRECT magic word as an invisible |
366 | // <link> tag and doesn't require it to be stripped. |
367 | // T349087: ...and in fact, RESTBase relies on getting |
368 | // redirect information from this <link> tag, so it needs |
369 | // to be present. |
370 | // Further, Parsoid can accept a Content in place of a string. |
371 | $text = $content; |
372 | $extraArgs = [ $cpoParams->getPreviousOutput() ]; |
373 | } else { |
374 | // The legacy parser requires the #REDIRECT magic word to |
375 | // be stripped from the content before parsing. |
376 | $parser = $this->parserFactory->getInstance(); |
377 | $text = $contentWithoutRedirect->getText(); |
378 | $extraArgs = []; |
379 | } |
380 | |
381 | $time = -microtime( true ); |
382 | |
383 | $parserOutput = $parser |
384 | ->parse( $text, $title, $parserOptions, true, true, $revId, ...$extraArgs ); |
385 | $time += microtime( true ); |
386 | |
387 | // Timing hack |
388 | if ( $time > 3 ) { |
389 | // TODO: Use Parser's logger (once it has one) |
390 | $channel = $parserOptions->getUseParsoid() ? 'slow-parsoid' : 'slow-parse'; |
391 | $logger = LoggerFactory::getInstance( $channel ); |
392 | $logger->info( 'Parsing {title} was slow, took {time} seconds', [ |
393 | 'time' => number_format( $time, 2 ), |
394 | 'title' => (string)$title, |
395 | 'trigger' => $parserOptions->getRenderReason(), |
396 | ] ); |
397 | } |
398 | |
399 | // T330667: Record the fact that we used the value of |
400 | // 'useParsoid' to influence this parse. Note that |
401 | // ::getUseParsoid() has a side-effect on $parserOutput here |
402 | // which didn't occur when we called ::getUseParsoid() earlier |
403 | // because $parserOutput didn't exist at that time. |
404 | $parserOptions->getUseParsoid(); |
405 | |
406 | // Add redirect indicator at the top |
407 | if ( $redir ) { |
408 | // Make sure to include the redirect link in pagelinks |
409 | $parserOutput->addLink( $redir ); |
410 | if ( $cpoParams->getGenerateHtml() ) { |
411 | $parserOutput->setRedirectHeader( |
412 | $this->linkRenderer->makeRedirectHeader( |
413 | $title->getPageLanguage(), $redir, false |
414 | ) |
415 | ); |
416 | $parserOutput->addModuleStyles( [ 'mediawiki.action.view.redirectPage' ] ); |
417 | } else { |
418 | $parserOutput->setRawText( null ); |
419 | } |
420 | } |
421 | |
422 | // Pass along user-signature flag |
423 | if ( in_array( 'user-signature', $content->getPreSaveTransformFlags() ) ) { |
424 | $parserOutput->setOutputFlag( ParserOutputFlags::USER_SIGNATURE ); |
425 | } |
426 | } |
427 | } |
428 | |
429 | /** @deprecated class alias since 1.43 */ |
430 | class_alias( WikitextContentHandler::class, 'WikitextContentHandler' ); |