Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
41.43% |
58 / 140 |
|
6.67% |
1 / 15 |
CRAP | |
0.00% |
0 / 1 |
WikitextContentHandler | |
41.43% |
58 / 140 |
|
6.67% |
1 / 15 |
251.82 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
getContentClass | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
makeRedirectContent | |
93.75% |
15 / 16 |
|
0.00% |
0 / 1 |
5.01 | |||
supportsRedirects | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
supportsSections | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isParserCacheSupported | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
supportsPreloadContent | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFileHandler | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
getFieldsForSearchIndex | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
2 | |||
getDataForSearchIndex | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
2 | |||
serializeContent | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
preSaveTransform | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
6 | |||
preloadTransform | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
extractRedirectTargetAndText | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 | |||
fillParserOutput | |
76.47% |
26 / 34 |
|
0.00% |
0 / 1 |
7.64 |
1 | <?php |
2 | /** |
3 | * Content handler for wiki text pages. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @since 1.21 |
21 | * |
22 | * @file |
23 | * @ingroup Content |
24 | */ |
25 | |
26 | use MediaWiki\Content\Renderer\ContentParseParams; |
27 | use MediaWiki\Content\Transform\PreloadTransformParams; |
28 | use MediaWiki\Content\Transform\PreSaveTransformParams; |
29 | use MediaWiki\Languages\LanguageNameUtils; |
30 | use MediaWiki\Linker\LinkRenderer; |
31 | use MediaWiki\Logger\LoggerFactory; |
32 | use MediaWiki\Parser\MagicWordFactory; |
33 | use MediaWiki\Parser\ParserOutput; |
34 | use MediaWiki\Parser\ParserOutputFlags; |
35 | use MediaWiki\Parser\Parsoid\ParsoidParserFactory; |
36 | use MediaWiki\Revision\RevisionRecord; |
37 | use MediaWiki\Title\Title; |
38 | use MediaWiki\Title\TitleFactory; |
39 | use Wikimedia\UUID\GlobalIdGenerator; |
40 | |
41 | /** |
42 | * Content handler for wiki text pages. |
43 | * |
44 | * @ingroup Content |
45 | */ |
46 | class WikitextContentHandler extends TextContentHandler { |
47 | |
48 | private TitleFactory $titleFactory; |
49 | private ParserFactory $parserFactory; |
50 | private GlobalIdGenerator $globalIdGenerator; |
51 | private LanguageNameUtils $languageNameUtils; |
52 | private LinkRenderer $linkRenderer; |
53 | private MagicWordFactory $magicWordFactory; |
54 | private ParsoidParserFactory $parsoidParserFactory; |
55 | |
56 | public function __construct( |
57 | string $modelId, |
58 | TitleFactory $titleFactory, |
59 | ParserFactory $parserFactory, |
60 | GlobalIdGenerator $globalIdGenerator, |
61 | LanguageNameUtils $languageNameUtils, |
62 | LinkRenderer $linkRenderer, |
63 | MagicWordFactory $magicWordFactory, |
64 | ParsoidParserFactory $parsoidParserFactory |
65 | ) { |
66 | // $modelId should always be CONTENT_MODEL_WIKITEXT |
67 | parent::__construct( $modelId, [ CONTENT_FORMAT_WIKITEXT ] ); |
68 | $this->titleFactory = $titleFactory; |
69 | $this->parserFactory = $parserFactory; |
70 | $this->globalIdGenerator = $globalIdGenerator; |
71 | $this->languageNameUtils = $languageNameUtils; |
72 | $this->linkRenderer = $linkRenderer; |
73 | $this->magicWordFactory = $magicWordFactory; |
74 | $this->parsoidParserFactory = $parsoidParserFactory; |
75 | } |
76 | |
77 | /** |
78 | * @return class-string<WikitextContent> |
79 | */ |
80 | protected function getContentClass() { |
81 | return WikitextContent::class; |
82 | } |
83 | |
84 | /** |
85 | * Returns a WikitextContent object representing a redirect to the given destination page. |
86 | * |
87 | * @param Title $destination The page to redirect to. |
88 | * @param string $text Text to include in the redirect, if possible. |
89 | * |
90 | * @return Content |
91 | * |
92 | * @see ContentHandler::makeRedirectContent |
93 | */ |
94 | public function makeRedirectContent( Title $destination, $text = '' ) { |
95 | $optionalColon = ''; |
96 | |
97 | if ( $destination->getNamespace() === NS_CATEGORY ) { |
98 | $optionalColon = ':'; |
99 | } else { |
100 | $iw = $destination->getInterwiki(); |
101 | if ( $iw && $this->languageNameUtils->getLanguageName( $iw, |
102 | LanguageNameUtils::AUTONYMS, |
103 | LanguageNameUtils::DEFINED |
104 | ) ) { |
105 | $optionalColon = ':'; |
106 | } |
107 | } |
108 | |
109 | $mwRedir = $this->magicWordFactory->get( 'redirect' ); |
110 | $redirectText = $mwRedir->getSynonym( 0 ) . |
111 | ' [[' . $optionalColon . $destination->getFullText() . ']]'; |
112 | |
113 | if ( $text != '' ) { |
114 | $redirectText .= "\n" . $text; |
115 | } |
116 | |
117 | $class = $this->getContentClass(); |
118 | return new $class( $redirectText ); |
119 | } |
120 | |
121 | /** |
122 | * Returns true because wikitext supports redirects. |
123 | * |
124 | * @return bool Always true. |
125 | * |
126 | * @see ContentHandler::supportsRedirects |
127 | */ |
128 | public function supportsRedirects() { |
129 | return true; |
130 | } |
131 | |
132 | /** |
133 | * Returns true because wikitext supports sections. |
134 | * |
135 | * @return bool Always true. |
136 | * |
137 | * @see ContentHandler::supportsSections |
138 | */ |
139 | public function supportsSections() { |
140 | return true; |
141 | } |
142 | |
143 | /** |
144 | * Returns true, because wikitext supports caching using the |
145 | * ParserCache mechanism. |
146 | * |
147 | * @since 1.21 |
148 | * |
149 | * @return bool Always true. |
150 | * |
151 | * @see ContentHandler::isParserCacheSupported |
152 | */ |
153 | public function isParserCacheSupported() { |
154 | return true; |
155 | } |
156 | |
157 | /** @inheritDoc */ |
158 | public function supportsPreloadContent(): bool { |
159 | return true; |
160 | } |
161 | |
162 | /** |
163 | * @return FileContentHandler |
164 | */ |
165 | protected function getFileHandler() { |
166 | return new FileContentHandler( |
167 | $this->getModelID(), |
168 | $this->titleFactory, |
169 | $this->parserFactory, |
170 | $this->globalIdGenerator, |
171 | $this->languageNameUtils, |
172 | $this->linkRenderer, |
173 | $this->magicWordFactory, |
174 | $this->parsoidParserFactory |
175 | ); |
176 | } |
177 | |
178 | public function getFieldsForSearchIndex( SearchEngine $engine ) { |
179 | $fields = parent::getFieldsForSearchIndex( $engine ); |
180 | |
181 | $fields['heading'] = |
182 | $engine->makeSearchFieldMapping( 'heading', SearchIndexField::INDEX_TYPE_TEXT ); |
183 | $fields['heading']->setFlag( SearchIndexField::FLAG_SCORING ); |
184 | |
185 | $fields['auxiliary_text'] = |
186 | $engine->makeSearchFieldMapping( 'auxiliary_text', SearchIndexField::INDEX_TYPE_TEXT ); |
187 | |
188 | $fields['opening_text'] = |
189 | $engine->makeSearchFieldMapping( 'opening_text', SearchIndexField::INDEX_TYPE_TEXT ); |
190 | $fields['opening_text']->setFlag( |
191 | SearchIndexField::FLAG_SCORING | SearchIndexField::FLAG_NO_HIGHLIGHT |
192 | ); |
193 | // Until we have the full first-class content handler for files, we invoke it explicitly here |
194 | return array_merge( $fields, $this->getFileHandler()->getFieldsForSearchIndex( $engine ) ); |
195 | } |
196 | |
197 | public function getDataForSearchIndex( |
198 | WikiPage $page, |
199 | ParserOutput $parserOutput, |
200 | SearchEngine $engine, |
201 | ?RevisionRecord $revision = null |
202 | ) { |
203 | $fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine, $revision ); |
204 | |
205 | $structure = new WikiTextStructure( $parserOutput ); |
206 | $fields['heading'] = $structure->headings(); |
207 | // text fields |
208 | $fields['opening_text'] = $structure->getOpeningText(); |
209 | $fields['text'] = $structure->getMainText(); // overwrites one from ContentHandler |
210 | $fields['auxiliary_text'] = $structure->getAuxiliaryText(); |
211 | $fields['defaultsort'] = $structure->getDefaultSort(); |
212 | $fields['file_text'] = null; |
213 | |
214 | // Until we have the full first-class content handler for files, we invoke it explicitly here |
215 | if ( $page->getTitle()->getNamespace() === NS_FILE ) { |
216 | $fields = array_merge( |
217 | $fields, |
218 | $this->getFileHandler()->getDataForSearchIndex( $page, $parserOutput, $engine, $revision ) |
219 | ); |
220 | } |
221 | return $fields; |
222 | } |
223 | |
224 | /** |
225 | * Returns the content's text as-is. |
226 | * |
227 | * @param Content $content |
228 | * @param string|null $format The serialization format to check |
229 | * |
230 | * @return mixed |
231 | */ |
232 | public function serializeContent( Content $content, $format = null ) { |
233 | $this->checkFormat( $format ); |
234 | |
235 | // NOTE: MessageContent also uses CONTENT_MODEL_WIKITEXT, but it's not a TextContent! |
236 | // Perhaps MessageContent should use a separate ContentHandler instead. |
237 | if ( $content instanceof MessageContent ) { |
238 | return $content->getMessage()->plain(); |
239 | } |
240 | |
241 | return parent::serializeContent( $content, $format ); |
242 | } |
243 | |
244 | public function preSaveTransform( |
245 | Content $content, |
246 | PreSaveTransformParams $pstParams |
247 | ): Content { |
248 | '@phan-var WikitextContent $content'; |
249 | $text = $content->getText(); |
250 | |
251 | $parser = $this->parserFactory->getInstance(); |
252 | $pst = $parser->preSaveTransform( |
253 | $text, |
254 | $pstParams->getPage(), |
255 | $pstParams->getUser(), |
256 | $pstParams->getParserOptions() |
257 | ); |
258 | |
259 | if ( $text === $pst ) { |
260 | return $content; |
261 | } |
262 | |
263 | $contentClass = $this->getContentClass(); |
264 | $ret = new $contentClass( $pst ); |
265 | $ret->setPreSaveTransformFlags( $parser->getOutput()->getAllFlags() ); |
266 | return $ret; |
267 | } |
268 | |
269 | /** |
270 | * Returns a Content object with preload transformations applied (or this |
271 | * object if no transformations apply). |
272 | * |
273 | * @param Content $content |
274 | * @param PreloadTransformParams $pltParams |
275 | * |
276 | * @return Content |
277 | */ |
278 | public function preloadTransform( |
279 | Content $content, |
280 | PreloadTransformParams $pltParams |
281 | ): Content { |
282 | '@phan-var WikitextContent $content'; |
283 | $text = $content->getText(); |
284 | |
285 | $plt = $this->parserFactory->getInstance()->getPreloadText( |
286 | $text, |
287 | $pltParams->getPage(), |
288 | $pltParams->getParserOptions(), |
289 | $pltParams->getParams() |
290 | ); |
291 | |
292 | $contentClass = $this->getContentClass(); |
293 | return new $contentClass( $plt ); |
294 | } |
295 | |
296 | /** |
297 | * Extract the redirect target and the remaining text on the page. |
298 | * |
299 | * @since 1.41 (used to be a method on WikitextContent since 1.23) |
300 | * |
301 | * @return array List of two elements: LinkTarget|null and WikitextContent object. |
302 | */ |
303 | public function extractRedirectTargetAndText( WikitextContent $content ): array { |
304 | $redir = $this->magicWordFactory->get( 'redirect' ); |
305 | $text = ltrim( $content->getText() ); |
306 | |
307 | if ( !$redir->matchStartAndRemove( $text ) ) { |
308 | return [ null, $content ]; |
309 | } |
310 | |
311 | // Extract the first link and see if it's usable |
312 | // Ensure that it really does come directly after #REDIRECT |
313 | // Some older redirects included a colon, so don't freak about that! |
314 | $m = []; |
315 | if ( preg_match( '!^\s*:?\s*\[{2}(.*?)(?:\|.*?)?\]{2}\s*!', $text, $m ) ) { |
316 | // Strip preceding colon used to "escape" categories, etc. |
317 | // and URL-decode links |
318 | if ( strpos( $m[1], '%' ) !== false ) { |
319 | // Match behavior of inline link parsing here; |
320 | $m[1] = rawurldecode( ltrim( $m[1], ':' ) ); |
321 | } |
322 | |
323 | // TODO: Move isValidRedirectTarget() out Title, so we can use a TitleValue here. |
324 | $title = $this->titleFactory->newFromText( $m[1] ); |
325 | |
326 | // If the title is a redirect to bad special pages or is invalid, return null |
327 | if ( !$title instanceof Title || !$title->isValidRedirectTarget() ) { |
328 | return [ null, $content ]; |
329 | } |
330 | |
331 | $remainingContent = new WikitextContent( substr( $text, strlen( $m[0] ) ) ); |
332 | return [ $title, $remainingContent ]; |
333 | } |
334 | |
335 | return [ null, $content ]; |
336 | } |
337 | |
338 | /** |
339 | * Returns a ParserOutput object resulting from parsing the content's text |
340 | * using the global Parser service. |
341 | * |
342 | * @since 1.38 |
343 | * @param Content $content |
344 | * @param ContentParseParams $cpoParams |
345 | * @param ParserOutput &$parserOutput The output object to fill (reference). |
346 | */ |
347 | protected function fillParserOutput( |
348 | Content $content, |
349 | ContentParseParams $cpoParams, |
350 | ParserOutput &$parserOutput |
351 | ) { |
352 | '@phan-var WikitextContent $content'; |
353 | $title = $this->titleFactory->newFromPageReference( $cpoParams->getPage() ); |
354 | $parserOptions = $cpoParams->getParserOptions(); |
355 | $revId = $cpoParams->getRevId(); |
356 | |
357 | [ $redir, $contentWithoutRedirect ] = $this->extractRedirectTargetAndText( $content ); |
358 | if ( $parserOptions->getUseParsoid() ) { |
359 | $parser = $this->parsoidParserFactory->create(); |
360 | // Parsoid renders the #REDIRECT magic word as an invisible |
361 | // <link> tag and doesn't require it to be stripped. |
362 | // T349087: ...and in fact, RESTBase relies on getting |
363 | // redirect information from this <link> tag, so it needs |
364 | // to be present. |
365 | $text = $content->getText(); |
366 | } else { |
367 | // The legacy parser requires the #REDIRECT magic word to |
368 | // be stripped from the content before parsing. |
369 | $parser = $this->parserFactory->getInstance(); |
370 | $text = $contentWithoutRedirect->getText(); |
371 | } |
372 | |
373 | $time = -microtime( true ); |
374 | |
375 | $parserOutput = $parser |
376 | ->parse( $text, $title, $parserOptions, true, true, $revId ); |
377 | $time += microtime( true ); |
378 | |
379 | // Timing hack |
380 | if ( $time > 3 ) { |
381 | // TODO: Use Parser's logger (once it has one) |
382 | $channel = $parserOptions->getUseParsoid() ? 'slow-parsoid' : 'slow-parse'; |
383 | $logger = LoggerFactory::getInstance( $channel ); |
384 | $logger->info( 'Parsing {title} was slow, took {time} seconds', [ |
385 | 'time' => number_format( $time, 2 ), |
386 | 'title' => (string)$title, |
387 | 'trigger' => $parserOptions->getRenderReason(), |
388 | ] ); |
389 | } |
390 | |
391 | // T330667: Record the fact that we used the value of |
392 | // 'useParsoid' to influence this parse. Note that |
393 | // ::getUseParsoid() has a side-effect on $parserOutput here |
394 | // which didn't occur when we called ::getUseParsoid() earlier |
395 | // because $parserOutput didn't exist at that time. |
396 | $parserOptions->getUseParsoid(); |
397 | |
398 | // Add redirect indicator at the top |
399 | if ( $redir ) { |
400 | // Make sure to include the redirect link in pagelinks |
401 | $parserOutput->addLink( $redir ); |
402 | if ( $cpoParams->getGenerateHtml() ) { |
403 | $parserOutput->setRedirectHeader( |
404 | $this->linkRenderer->makeRedirectHeader( |
405 | $title->getPageLanguage(), $redir, false |
406 | ) |
407 | ); |
408 | $parserOutput->addModuleStyles( [ 'mediawiki.action.view.redirectPage' ] ); |
409 | } else { |
410 | $parserOutput->setRawText( null ); |
411 | } |
412 | } |
413 | |
414 | // Pass along user-signature flag |
415 | if ( in_array( 'user-signature', $content->getPreSaveTransformFlags() ) ) { |
416 | $parserOutput->setOutputFlag( ParserOutputFlags::USER_SIGNATURE ); |
417 | } |
418 | } |
419 | } |