MediaWiki REL1_34
SyntaxHighlight.php
Go to the documentation of this file.
1<?php
21
23
25 const HIGHLIGHT_MAX_LINES = 1000;
26
28 const HIGHLIGHT_MAX_BYTES = 102400;
29
31 const HIGHLIGHT_CSS_CLASS = 'mw-highlight';
32
34 const CACHE_VERSION = 2;
35
37 private static $mimeLexers = [
38 'text/javascript' => 'javascript',
39 'application/json' => 'javascript',
40 'text/xml' => 'xml',
41 ];
42
49 private static function getLexer( $lang ) {
50 static $lexers = null;
51
52 if ( $lang === null ) {
53 return null;
54 }
55
56 if ( !$lexers ) {
57 $lexers = require __DIR__ . '/../SyntaxHighlight.lexers.php';
58 }
59
60 $lexer = strtolower( $lang );
61
62 if ( isset( $lexers[$lexer] ) ) {
63 return $lexer;
64 }
65
67
68 // Check if this is a GeSHi lexer name for which there exists
69 // a compatible Pygments lexer with a different name.
70 if ( isset( $geshi2pygments[$lexer] ) ) {
71 $lexer = $geshi2pygments[$lexer];
72 if ( in_array( $lexer, $lexers ) ) {
73 return $lexer;
74 }
75 }
76
77 return null;
78 }
79
85 public static function onParserFirstCallInit( Parser &$parser ) {
86 foreach ( [ 'source', 'syntaxhighlight' ] as $tag ) {
87 $parser->setHook( $tag, [ 'SyntaxHighlight', 'parserHook' ] );
88 }
89 }
90
100 public static function parserHook( $text, $args, $parser ) {
101 // Replace strip markers (For e.g. {{#tag:syntaxhighlight|<nowiki>...}})
102 $out = $parser->mStripState->unstripNoWiki( $text );
103
104 // Don't trim leading spaces away, just the linefeeds
105 $out = preg_replace( '/^\n+/', '', rtrim( $out ) );
106
107 // Convert deprecated attributes
108 if ( isset( $args['enclose'] ) ) {
109 if ( $args['enclose'] === 'none' ) {
110 $args['inline'] = true;
111 }
112 unset( $args['enclose'] );
113 }
114
115 $lexer = $args['lang'] ?? '';
116
117 $result = self::highlight( $out, $lexer, $args );
118 if ( !$result->isGood() ) {
119 $parser->addTrackingCategory( 'syntaxhighlight-error-category' );
120 }
121 $out = $result->getValue();
122
123 // Allow certain HTML attributes
124 $htmlAttribs = Sanitizer::validateAttributes( $args, [ 'style', 'class', 'id', 'dir' ] );
125 if ( !isset( $htmlAttribs['class'] ) ) {
126 $htmlAttribs['class'] = self::HIGHLIGHT_CSS_CLASS;
127 } else {
128 $htmlAttribs['class'] .= ' ' . self::HIGHLIGHT_CSS_CLASS;
129 }
130 if ( !( isset( $htmlAttribs['dir'] ) && $htmlAttribs['dir'] === 'rtl' ) ) {
131 $htmlAttribs['dir'] = 'ltr';
132 }
133
134 if ( isset( $args['inline'] ) ) {
135 // Enforce inlineness. Stray newlines may result in unexpected list and paragraph processing
136 // (also known as doBlockLevels()).
137 $out = str_replace( "\n", ' ', $out );
138 $out = Html::rawElement( 'code', $htmlAttribs, $out );
139
140 } else {
141 // Not entirely sure what benefit this provides, but it was here already
142 $htmlAttribs['class'] .= ' ' . 'mw-content-' . $htmlAttribs['dir'];
143
144 // Unwrap Pygments output to provide our own wrapper. We can't just always use the 'nowrap'
145 // option (pass 'inline'), since it disables other useful things like line highlighting.
146 // Tolerate absence of quotes for Html::element() and wgWellFormedXml=false.
147 if ( $out !== '' ) {
148 $m = [];
149 if ( preg_match( '/^<div class="?mw-highlight"?>(.*)<\/div>$/s', trim( $out ), $m ) ) {
150 $out = trim( $m[1] );
151 } else {
152 throw new MWException( 'Unexpected output from Pygments encountered' );
153 }
154 }
155
156 // Use 'nowiki' strip marker to prevent list processing (also known as doBlockLevels()).
157 // However, leave the wrapping <div/> outside to prevent <p/>-wrapping.
158 $marker = $parser::MARKER_PREFIX . '-syntaxhighlightinner-' .
159 sprintf( '%08X', $parser->mMarkerIndex++ ) . $parser::MARKER_SUFFIX;
160 $parser->mStripState->addNoWiki( $marker, $out );
161
162 $out = Html::openElement( 'div', $htmlAttribs ) .
163 $marker .
164 Html::closeElement( 'div' );
165 }
166
167 // Register CSS
168 // TODO: Consider moving to a separate method so that public method
169 // highlight() can be used without needing to know the module name.
170 $parser->getOutput()->addModuleStyles( 'ext.pygments' );
171
172 return $out;
173 }
174
178 public static function getPygmentizePath() {
179 global $wgPygmentizePath;
180
181 // If $wgPygmentizePath is unset, use the bundled copy.
182 if ( $wgPygmentizePath === false ) {
183 $wgPygmentizePath = __DIR__ . '/../pygments/pygmentize';
184 }
185
186 return $wgPygmentizePath;
187 }
188
193 private static function plainCodeWrap( $code, $inline ) {
194 if ( $inline ) {
195 return htmlspecialchars( $code, ENT_NOQUOTES );
196 }
197
198 return Html::rawElement(
199 'div',
200 [ 'class' => self::HIGHLIGHT_CSS_CLASS ],
201 Html::element( 'pre', [], $code )
202 );
203 }
204
223 public static function highlight( $code, $lang = null, $args = [] ) {
224 $status = new Status;
225
226 $lexer = self::getLexer( $lang );
227 if ( $lexer === null && $lang !== null ) {
228 $status->warning( 'syntaxhighlight-error-unknown-language', $lang );
229 }
230
231 // For empty tag, output nothing instead of empty <pre>.
232 if ( $code === '' ) {
233 $status->value = '';
234 return $status;
235 }
236
237 $length = strlen( $code );
238 if ( strlen( $code ) > self::HIGHLIGHT_MAX_BYTES ) {
239 // Disable syntax highlighting
240 $lexer = null;
241 $status->warning(
242 'syntaxhighlight-error-exceeds-size-limit',
243 $length,
244 self::HIGHLIGHT_MAX_BYTES
245 );
246 } elseif ( Shell::isDisabled() ) {
247 // Disable syntax highlighting
248 $lexer = null;
249 $status->warning( 'syntaxhighlight-error-pygments-invocation-failure' );
250 wfWarn(
251 'MediaWiki determined that it cannot invoke Pygments. ' .
252 'As a result, SyntaxHighlight_GeSHi will not perform any syntax highlighting. ' .
253 'See the debug log for details: ' .
254 'https://www.mediawiki.org/wiki/Manual:$wgDebugLogFile'
255 );
256 }
257
258 $inline = isset( $args['inline'] );
259
260 if ( $inline ) {
261 $code = trim( $code );
262 }
263
264 if ( $lexer === null ) {
265 // When syntax highlighting is disabled..
266 $status->value = self::plainCodeWrap( $code, $inline );
267 return $status;
268 }
269
270 $options = [
271 'cssclass' => self::HIGHLIGHT_CSS_CLASS,
272 'encoding' => 'utf-8',
273 ];
274
275 // Line numbers
276 if ( isset( $args['line'] ) ) {
277 $options['linenos'] = 'inline';
278 }
279
280 if ( $lexer === 'php' && strpos( $code, '<?php' ) === false ) {
281 $options['startinline'] = 1;
282 }
283
284 // Highlight specific lines
285 if ( isset( $args['highlight'] ) ) {
286 $lines = self::parseHighlightLines( $args['highlight'] );
287 if ( count( $lines ) ) {
288 $options['hl_lines'] = implode( ' ', $lines );
289 }
290 }
291
292 // Starting line number
293 if ( isset( $args['start'] ) && ctype_digit( $args['start'] ) ) {
294 $options['linenostart'] = (int)$args['start'];
295 }
296
297 if ( $inline ) {
298 $options['nowrap'] = 1;
299 }
300
301 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
302 $error = null;
303 $output = $cache->getWithSetCallback(
304 $cache->makeGlobalKey( 'highlight', self::makeCacheKeyHash( $code, $lexer, $options ) ),
305 $cache::TTL_MONTH,
306 function ( $oldValue, &$ttl ) use ( $code, $lexer, $options, &$error ) {
307 $optionPairs = [];
308 foreach ( $options as $k => $v ) {
309 $optionPairs[] = "{$k}={$v}";
310 }
311 $result = Shell::command(
312 self::getPygmentizePath(),
313 '-l', $lexer,
314 '-f', 'html',
315 '-O', implode( ',', $optionPairs )
316 )
317 ->input( $code )
318 ->restrict( Shell::RESTRICT_DEFAULT | Shell::NO_NETWORK )
319 ->execute();
320
321 if ( $result->getExitCode() != 0 ) {
322 $ttl = WANObjectCache::TTL_UNCACHEABLE;
323 $error = $result->getStderr();
324 return null;
325 }
326
327 return $result->getStdout();
328 }
329 );
330
331 if ( $error !== null || $output === null ) {
332 $status->warning( 'syntaxhighlight-error-pygments-invocation-failure' );
333 wfWarn( 'Failed to invoke Pygments: ' . $error );
334 // Fall back to preformatted code without syntax highlighting
335 $output = self::plainCodeWrap( $code, $inline );
336 }
337
338 if ( $inline ) {
339 // We've already trimmed the input $code before highlighting,
340 // but pygment's standard out adds a line break afterwards,
341 // which would then be preserved in the paragraph that wraps this,
342 // and become visible as a space. Avoid that.
343 $output = trim( $output );
344 }
345
346 $status->value = $output;
347 return $status;
348 }
349
358 private static function makeCacheKeyHash( $code, $lexer, $options ) {
359 $optionString = FormatJson::encode( $options, false, FormatJson::ALL_OK );
360 return md5( "{$code}|{$lexer}|{$optionString}|" . self::CACHE_VERSION );
361 }
362
372 protected static function parseHighlightLines( $lineSpec ) {
373 $lines = [];
374 $values = array_map( 'trim', explode( ',', $lineSpec ) );
375 foreach ( $values as $value ) {
376 if ( ctype_digit( $value ) ) {
377 $lines[] = (int)$value;
378 } elseif ( strpos( $value, '-' ) !== false ) {
379 list( $start, $end ) = array_map( 'trim', explode( '-', $value ) );
380 if ( self::validHighlightRange( $start, $end ) ) {
381 for ( $i = intval( $start ); $i <= $end; $i++ ) {
382 $lines[] = $i;
383 }
384 }
385 }
386 if ( count( $lines ) > self::HIGHLIGHT_MAX_LINES ) {
387 $lines = array_slice( $lines, 0, self::HIGHLIGHT_MAX_LINES );
388 break;
389 }
390 }
391 return $lines;
392 }
393
400 protected static function validHighlightRange( $start, $end ) {
401 // Since we're taking this tiny range and producing a an
402 // array of every integer between them, it would be trivial
403 // to DoS the system by asking for a huge range.
404 // Impose an arbitrary limit on the number of lines in a
405 // given range to reduce the impact.
406 return ctype_digit( $start ) &&
407 ctype_digit( $end ) &&
408 $start > 0 &&
409 $start < $end &&
410 $end - $start < self::HIGHLIGHT_MAX_LINES;
411 }
412
427 $revId, ParserOptions $options, $generateHtml, ParserOutput &$output
428 ) {
430
431 if ( !$generateHtml ) {
432 // Nothing special for us to do, let MediaWiki handle this.
433 return true;
434 }
435
436 // Determine the language
437 $extension = ExtensionRegistry::getInstance();
438 $models = $extension->getAttribute( 'SyntaxHighlightModels' );
439 $model = $content->getModel();
440 if ( !isset( $models[$model] ) ) {
441 // We don't care about this model, carry on.
442 return true;
443 }
444 $lexer = $models[$model];
445
446 // Hope that $wgSyntaxHighlightModels does not contain silly types.
447 $text = ContentHandler::getContentText( $content );
448 if ( !$text ) {
449 // Oops! Non-text content? Let MediaWiki handle this.
450 return true;
451 }
452
453 // Parse using the standard parser to get links etc. into the database, HTML is replaced below.
454 // We could do this using $content->fillParserOutput(), but alas it is 'protected'.
455 if ( $content instanceof TextContent && in_array( $model, $wgTextModelsToParse ) ) {
456 $output = $wgParser->parse( $text, $title, $options, true, true, $revId );
457 }
458
459 $status = self::highlight( $text, $lexer );
460 if ( !$status->isOK() ) {
461 return true;
462 }
463 $out = $status->getValue();
464
465 $output->addModuleStyles( 'ext.pygments' );
466 $output->setText( '<div dir="ltr">' . $out . '</div>' );
467
468 // Inform MediaWiki that we have parsed this page and it shouldn't mess with it.
469 return false;
470 }
471
482 public static function onApiFormatHighlight( IContextSource $context, $text, $mime, $format ) {
483 if ( !isset( self::$mimeLexers[$mime] ) ) {
484 return true;
485 }
486
487 $lexer = self::$mimeLexers[$mime];
488 $status = self::highlight( $text, $lexer );
489 if ( !$status->isOK() ) {
490 return true;
491 }
492
493 $out = $status->getValue();
494 if ( preg_match( '/^<pre([^>]*)>/i', $out, $m ) ) {
495 $attrs = Sanitizer::decodeTagAttributes( $m[1] );
496 $attrs['class'] .= ' api-pretty-content';
497 $encodedAttrs = Sanitizer::safeEncodeTagAttributes( $attrs );
498 $out = '<pre' . $encodedAttrs . '>' . substr( $out, strlen( $m[0] ) );
499 }
500 $output = $context->getOutput();
501 $output->addModuleStyles( 'ext.pygments' );
502 $output->addHTML( '<div dir="ltr">' . $out . '</div>' );
503
504 // Inform MediaWiki that we have parsed this page and it shouldn't mess with it.
505 return false;
506 }
507
515 if ( !ExtensionRegistry::getInstance()->isLoaded( 'VisualEditor' ) ) {
516 return;
517 }
518
519 $resourceLoader->register( 'ext.geshi.visualEditor', [
520 'class' => ResourceLoaderSyntaxHighlightVisualEditorModule::class,
521 'localBasePath' => __DIR__ . '/../modules',
522 'remoteExtPath' => 'SyntaxHighlight_GeSHi/modules',
523 'scripts' => [
524 've-syntaxhighlight/ve.dm.MWSyntaxHighlightNode.js',
525 've-syntaxhighlight/ve.dm.MWBlockSyntaxHighlightNode.js',
526 've-syntaxhighlight/ve.dm.MWInlineSyntaxHighlightNode.js',
527 've-syntaxhighlight/ve.ce.MWSyntaxHighlightNode.js',
528 've-syntaxhighlight/ve.ce.MWBlockSyntaxHighlightNode.js',
529 've-syntaxhighlight/ve.ce.MWInlineSyntaxHighlightNode.js',
530 've-syntaxhighlight/ve.ui.MWSyntaxHighlightWindow.js',
531 've-syntaxhighlight/ve.ui.MWSyntaxHighlightDialog.js',
532 've-syntaxhighlight/ve.ui.MWSyntaxHighlightDialogTool.js',
533 've-syntaxhighlight/ve.ui.MWSyntaxHighlightInspector.js',
534 've-syntaxhighlight/ve.ui.MWSyntaxHighlightInspectorTool.js',
535 ],
536 'styles' => [
537 've-syntaxhighlight/ve.ce.MWSyntaxHighlightNode.css',
538 've-syntaxhighlight/ve.ui.MWSyntaxHighlightDialog.css',
539 've-syntaxhighlight/ve.ui.MWSyntaxHighlightInspector.css',
540 ],
541 'dependencies' => [
542 'ext.visualEditor.mwcore',
543 'oojs-ui.styles.icons-editing-advanced'
544 ],
545 'messages' => [
546 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-code',
547 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-language',
548 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-none',
549 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-showlines',
550 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-startingline',
551 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-title',
552 ],
553 'targets' => [ 'desktop', 'mobile' ],
554 ] );
555 }
556
561 public static function prepare( $text, $lang ) {
562 wfDeprecated( __METHOD__ );
563 return new GeSHi( self::highlight( $text, $lang )->getValue() );
564 }
565
572 public static function buildHeadItem( $geshi ) {
573 wfDeprecated( __METHOD__ );
574 $geshi->parse_code();
575 return '';
576 }
577}
578class_alias( SyntaxHighlight::class, 'SyntaxHighlight_GeSHi' );
$wgTextModelsToParse
Determines which types of text are parsed as wikitext.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
$wgParser
Definition Setup.php:891
static parseHighlightLines( $lineSpec)
Take an input specifying a list of lines to highlight, returning a raw list of matching line numbers.
static prepare( $text, $lang)
Backward-compatibility shim for extensions.
static onContentGetParserOutput(Content $content, Title $title, $revId, ParserOptions $options, $generateHtml, ParserOutput &$output)
Hook into Content::getParserOutput to provide syntax highlighting for script content.
static makeCacheKeyHash( $code, $lexer, $options)
Construct a cache key for the results of a Pygments invocation.
static onApiFormatHighlight(IContextSource $context, $text, $mime, $format)
Hook to provide syntax highlighting for API pretty-printed output.
static validHighlightRange( $start, $end)
Validate a provided input range.
static onResourceLoaderRegisterModules(&$resourceLoader)
Conditionally register resource loader modules that depends on the VisualEditor MediaWiki extension.
static buildHeadItem( $geshi)
Backward-compatibility shim for extensions.
if( $line===false) $args
Definition cdb.php:64
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Ge...
Definition GeSHi.php:23
MediaWiki exception.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Executes shell commands.
Definition Shell.php:44
Set options of the Parser.
addModuleStyles( $modules)
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition Parser.php:74
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition Parser.php:5189
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:40
static highlight( $code, $lang=null, $args=[])
Highlight a code-block using a particular lexer.
static plainCodeWrap( $code, $inline)
static onParserFirstCallInit(Parser &$parser)
Register parser hook.
static array $mimeLexers
Mapping of MIME-types to lexer names.
static getLexer( $lang)
Get the Pygments lexer name for a particular language.
static parserHook( $text, $args, $parser)
Parser hook.
Content object implementation for representing flat text.
Represents a title within MediaWiki.
Definition Title.php:42
Base interface for content objects.
Definition Content.php:34
Interface for objects which can provide a MediaWiki context on request.
$resourceLoader
Definition load.php:44
$context
Definition load.php:45
$cache
Definition mcc.php:33
$lines
Definition router.php:61
$content
Definition router.php:78
if(!isset( $args[0])) $lang