MediaWiki fundraising/REL1_35
SyntaxHighlight.php
Go to the documentation of this file.
1<?php
21
23
25 const HIGHLIGHT_MAX_LINES = 1000;
26
28 const HIGHLIGHT_MAX_BYTES = 102400;
29
31 const HIGHLIGHT_CSS_CLASS = 'mw-highlight';
32
34 const CACHE_VERSION = 2;
35
37 private static $mimeLexers = [
38 'text/javascript' => 'javascript',
39 'application/json' => 'javascript',
40 'text/xml' => 'xml',
41 ];
42
49 private static function getLexer( $lang ) {
50 static $lexers = null;
51
52 if ( $lang === null ) {
53 return null;
54 }
55
56 if ( !$lexers ) {
57 $lexers = require __DIR__ . '/../SyntaxHighlight.lexers.php';
58 }
59
60 $lexer = strtolower( $lang );
61
62 if ( isset( $lexers[$lexer] ) ) {
63 return $lexer;
64 }
65
67
68 // Check if this is a GeSHi lexer name for which there exists
69 // a compatible Pygments lexer with a different name.
70 if ( isset( $geshi2pygments[$lexer] ) ) {
71 $lexer = $geshi2pygments[$lexer];
72 if ( in_array( $lexer, $lexers ) ) {
73 return $lexer;
74 }
75 }
76
77 return null;
78 }
79
85 public static function onParserFirstCallInit( Parser $parser ) {
86 $parser->setHook( 'source', [ 'SyntaxHighlight', 'parserHookSource' ] );
87 $parser->setHook( 'syntaxhighlight', [ 'SyntaxHighlight', 'parserHook' ] );
88 }
89
99 public static function parserHookSource( $text, $args, $parser ) {
100 $parser->addTrackingCategory( 'syntaxhighlight-source-category' );
101 return self::parserHook( $text, $args, $parser );
102 }
103
113 public static function parserHook( $text, $args, $parser ) {
114 // Replace strip markers (For e.g. {{#tag:syntaxhighlight|<nowiki>...}})
115 $out = $parser->mStripState->unstripNoWiki( $text );
116
117 // Don't trim leading spaces away, just the linefeeds
118 $out = preg_replace( '/^\n+/', '', rtrim( $out ) );
119
120 // Convert deprecated attributes
121 if ( isset( $args['enclose'] ) ) {
122 if ( $args['enclose'] === 'none' ) {
123 $args['inline'] = true;
124 }
125 unset( $args['enclose'] );
126 $parser->addTrackingCategory( 'syntaxhighlight-enclose-category' );
127 }
128
129 $lexer = $args['lang'] ?? '';
130
131 $result = self::highlight( $out, $lexer, $args );
132 if ( !$result->isGood() ) {
133 $parser->addTrackingCategory( 'syntaxhighlight-error-category' );
134 }
135 $out = $result->getValue();
136
137 // Allow certain HTML attributes
138 $htmlAttribs = Sanitizer::validateAttributes(
139 $args, array_flip( [ 'style', 'class', 'id', 'dir' ] )
140 );
141 if ( !isset( $htmlAttribs['class'] ) ) {
142 $htmlAttribs['class'] = self::HIGHLIGHT_CSS_CLASS;
143 } else {
144 $htmlAttribs['class'] .= ' ' . self::HIGHLIGHT_CSS_CLASS;
145 }
146 $lexer = self::getLexer( $lexer );
147 if ( $lexer !== null ) {
148 $htmlAttribs['class'] .= ' ' . self::HIGHLIGHT_CSS_CLASS . '-lang-' . $lexer;
149 }
150 if ( !( isset( $htmlAttribs['dir'] ) && $htmlAttribs['dir'] === 'rtl' ) ) {
151 $htmlAttribs['dir'] = 'ltr';
152 }
153 '@phan-var array{class:string,dir:string} $htmlAttribs';
154
155 if ( isset( $args['inline'] ) ) {
156 // Enforce inlineness. Stray newlines may result in unexpected list and paragraph processing
157 // (also known as doBlockLevels()).
158 $out = str_replace( "\n", ' ', $out );
159 $out = Html::rawElement( 'code', $htmlAttribs, $out );
160
161 } else {
162 // Not entirely sure what benefit this provides, but it was here already
163 $htmlAttribs['class'] .= ' ' . 'mw-content-' . $htmlAttribs['dir'];
164
165 // Unwrap Pygments output to provide our own wrapper. We can't just always use the 'nowrap'
166 // option (pass 'inline'), since it disables other useful things like line highlighting.
167 // Tolerate absence of quotes for Html::element() and wgWellFormedXml=false.
168 if ( $out !== '' ) {
169 $m = [];
170 if ( preg_match( '/^<div class="?mw-highlight"?>(.*)<\/div>$/s', trim( $out ), $m ) ) {
171 $out = trim( $m[1] );
172 } else {
173 throw new MWException( 'Unexpected output from Pygments encountered' );
174 }
175 }
176
177 // Use 'nowiki' strip marker to prevent list processing (also known as doBlockLevels()).
178 // However, leave the wrapping <div/> outside to prevent <p/>-wrapping.
179 $marker = $parser::MARKER_PREFIX . '-syntaxhighlightinner-' .
180 sprintf( '%08X', $parser->mMarkerIndex++ ) . $parser::MARKER_SUFFIX;
181 $parser->mStripState->addNoWiki( $marker, $out );
182
183 $out = Html::openElement( 'div', $htmlAttribs ) .
184 $marker .
185 Html::closeElement( 'div' );
186 }
187
188 // Register CSS
189 // TODO: Consider moving to a separate method so that public method
190 // highlight() can be used without needing to know the module name.
191 $parser->getOutput()->addModuleStyles( 'ext.pygments' );
192
193 return $out;
194 }
195
199 public static function getPygmentizePath() {
200 global $wgPygmentizePath;
201
202 // If $wgPygmentizePath is unset, use the bundled copy.
203 if ( $wgPygmentizePath === false ) {
204 $wgPygmentizePath = __DIR__ . '/../pygments/pygmentize';
205 }
206
207 return $wgPygmentizePath;
208 }
209
215 private static function plainCodeWrap( $code, $inline ) {
216 if ( $inline ) {
217 return htmlspecialchars( $code, ENT_NOQUOTES );
218 }
219
220 return Html::rawElement(
221 'div',
222 [ 'class' => self::HIGHLIGHT_CSS_CLASS ],
223 Html::element( 'pre', [], $code )
224 );
225 }
226
245 public static function highlight( $code, $lang = null, $args = [] ) {
246 $status = new Status;
247
248 $lexer = self::getLexer( $lang );
249 if ( $lexer === null && $lang !== null ) {
250 $status->warning( 'syntaxhighlight-error-unknown-language', $lang );
251 }
252
253 // For empty tag, output nothing instead of empty <pre>.
254 if ( $code === '' ) {
255 $status->value = '';
256 return $status;
257 }
258
259 $length = strlen( $code );
260 if ( strlen( $code ) > self::HIGHLIGHT_MAX_BYTES ) {
261 // Disable syntax highlighting
262 $lexer = null;
263 $status->warning(
264 'syntaxhighlight-error-exceeds-size-limit',
265 $length,
266 self::HIGHLIGHT_MAX_BYTES
267 );
268 } elseif ( Shell::isDisabled() ) {
269 // Disable syntax highlighting
270 $lexer = null;
271 $status->warning( 'syntaxhighlight-error-pygments-invocation-failure' );
272 wfWarn(
273 'MediaWiki determined that it cannot invoke Pygments. ' .
274 'As a result, SyntaxHighlight_GeSHi will not perform any syntax highlighting. ' .
275 'See the debug log for details: ' .
276 'https://www.mediawiki.org/wiki/Manual:$wgDebugLogFile'
277 );
278 }
279
280 $inline = isset( $args['inline'] );
281
282 if ( $inline ) {
283 $code = trim( $code );
284 }
285
286 if ( $lexer === null ) {
287 // When syntax highlighting is disabled..
288 $status->value = self::plainCodeWrap( $code, $inline );
289 return $status;
290 }
291
292 $options = [
293 'cssclass' => self::HIGHLIGHT_CSS_CLASS,
294 'encoding' => 'utf-8',
295 ];
296
297 // Line numbers
298 if ( isset( $args['line'] ) ) {
299 $options['linenos'] = 'inline';
300 }
301
302 if ( $lexer === 'php' && strpos( $code, '<?php' ) === false ) {
303 $options['startinline'] = 1;
304 }
305
306 // Highlight specific lines
307 if ( isset( $args['highlight'] ) ) {
308 $lines = self::parseHighlightLines( $args['highlight'] );
309 if ( count( $lines ) ) {
310 $options['hl_lines'] = implode( ' ', $lines );
311 }
312 }
313
314 // Starting line number
315 if ( isset( $args['start'] ) && ctype_digit( $args['start'] ) ) {
316 $options['linenostart'] = (int)$args['start'];
317 }
318
319 if ( $inline ) {
320 $options['nowrap'] = 1;
321 }
322
323 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
324 $error = null;
325 $output = $cache->getWithSetCallback(
326 $cache->makeGlobalKey( 'highlight', self::makeCacheKeyHash( $code, $lexer, $options ) ),
327 $cache::TTL_MONTH,
328 function ( $oldValue, &$ttl ) use ( $code, $lexer, $options, &$error ) {
329 $optionPairs = [];
330 foreach ( $options as $k => $v ) {
331 $optionPairs[] = "{$k}={$v}";
332 }
333 $result = Shell::command(
334 self::getPygmentizePath(),
335 '-l', $lexer,
336 '-f', 'html',
337 '-O', implode( ',', $optionPairs )
338 )
339 ->input( $code )
340 ->restrict( Shell::RESTRICT_DEFAULT | Shell::NO_NETWORK )
341 ->execute();
342
343 if ( $result->getExitCode() != 0 ) {
344 $ttl = WANObjectCache::TTL_UNCACHEABLE;
345 $error = $result->getStderr();
346 return null;
347 }
348
349 return $result->getStdout();
350 }
351 );
352
353 if ( $error !== null || $output === null ) {
354 $status->warning( 'syntaxhighlight-error-pygments-invocation-failure' );
355 if ( $error !== null ) {
356 wfWarn( 'Failed to invoke Pygments: ' . $error );
357 } else {
358 wfWarn( 'Invoking Pygments returned blank output with no error response' );
359 }
360
361 // Fall back to preformatted code without syntax highlighting
362 $output = self::plainCodeWrap( $code, $inline );
363 }
364
365 if ( $inline ) {
366 // We've already trimmed the input $code before highlighting,
367 // but pygment's standard out adds a line break afterwards,
368 // which would then be preserved in the paragraph that wraps this,
369 // and become visible as a space. Avoid that.
370 $output = trim( $output );
371 }
372
373 $status->value = $output;
374 return $status;
375 }
376
385 private static function makeCacheKeyHash( $code, $lexer, $options ) {
386 $optionString = FormatJson::encode( $options, false, FormatJson::ALL_OK );
387 return md5( "{$code}|{$lexer}|{$optionString}|" . self::CACHE_VERSION );
388 }
389
399 protected static function parseHighlightLines( $lineSpec ) {
400 $lines = [];
401 $values = array_map( 'trim', explode( ',', $lineSpec ) );
402 foreach ( $values as $value ) {
403 if ( ctype_digit( $value ) ) {
404 $lines[] = (int)$value;
405 } elseif ( strpos( $value, '-' ) !== false ) {
406 list( $start, $end ) = array_map( 'intval', explode( '-', $value ) );
407 if ( self::validHighlightRange( $start, $end ) ) {
408 for ( $i = $start; $i <= $end; $i++ ) {
409 $lines[] = $i;
410 }
411 }
412 }
413 if ( count( $lines ) > self::HIGHLIGHT_MAX_LINES ) {
414 $lines = array_slice( $lines, 0, self::HIGHLIGHT_MAX_LINES );
415 break;
416 }
417 }
418 return $lines;
419 }
420
427 protected static function validHighlightRange( $start, $end ) {
428 // Since we're taking this tiny range and producing a an
429 // array of every integer between them, it would be trivial
430 // to DoS the system by asking for a huge range.
431 // Impose an arbitrary limit on the number of lines in a
432 // given range to reduce the impact.
433 return $start > 0 &&
434 $start < $end &&
435 $end - $start < self::HIGHLIGHT_MAX_LINES;
436 }
437
452 $revId, ParserOptions $options, $generateHtml, ParserOutput &$output
453 ) {
455
456 if ( !$generateHtml ) {
457 // Nothing special for us to do, let MediaWiki handle this.
458 return true;
459 }
460
461 // Determine the language
462 $extension = ExtensionRegistry::getInstance();
463 $models = $extension->getAttribute( 'SyntaxHighlightModels' );
464 $model = $content->getModel();
465 if ( !isset( $models[$model] ) ) {
466 // We don't care about this model, carry on.
467 return true;
468 }
469 $lexer = $models[$model];
470
471 // Hope that $wgSyntaxHighlightModels does not contain silly types.
472 $text = ContentHandler::getContentText( $content );
473 if ( !$text ) {
474 // Oops! Non-text content? Let MediaWiki handle this.
475 return true;
476 }
477
478 // Parse using the standard parser to get links etc. into the database, HTML is replaced below.
479 // We could do this using $content->fillParserOutput(), but alas it is 'protected'.
480 if ( $content instanceof TextContent && in_array( $model, $wgTextModelsToParse ) ) {
481 $output = MediaWikiServices::getInstance()->getParser()
482 ->parse( $text, $title, $options, true, true, $revId );
483 }
484
485 $status = self::highlight( $text, $lexer );
486 if ( !$status->isOK() ) {
487 return true;
488 }
489 $out = $status->getValue();
490
491 $output->addModuleStyles( 'ext.pygments' );
492 $output->setText( '<div dir="ltr">' . $out . '</div>' );
493
494 // Inform MediaWiki that we have parsed this page and it shouldn't mess with it.
495 return false;
496 }
497
508 public static function onApiFormatHighlight( IContextSource $context, $text, $mime, $format ) {
509 if ( !isset( self::$mimeLexers[$mime] ) ) {
510 return true;
511 }
512
513 $lexer = self::$mimeLexers[$mime];
514 $status = self::highlight( $text, $lexer );
515 if ( !$status->isOK() ) {
516 return true;
517 }
518
519 $out = $status->getValue();
520 if ( preg_match( '/^<pre([^>]*)>/i', $out, $m ) ) {
521 $attrs = Sanitizer::decodeTagAttributes( $m[1] );
522 $attrs['class'] .= ' api-pretty-content';
523 $encodedAttrs = Sanitizer::safeEncodeTagAttributes( $attrs );
524 $out = '<pre' . $encodedAttrs . '>' . substr( $out, strlen( $m[0] ) );
525 }
526 $output = $context->getOutput();
527 $output->addModuleStyles( 'ext.pygments' );
528 $output->addHTML( '<div dir="ltr">' . $out . '</div>' );
529
530 // Inform MediaWiki that we have parsed this page and it shouldn't mess with it.
531 return false;
532 }
533
540 public static function onResourceLoaderRegisterModules( $resourceLoader ) {
541 if ( !ExtensionRegistry::getInstance()->isLoaded( 'VisualEditor' ) ) {
542 return;
543 }
544
545 $resourceLoader->register( 'ext.geshi.visualEditor', [
546 'class' => ResourceLoaderSyntaxHighlightVisualEditorModule::class,
547 'localBasePath' => __DIR__ . '/../modules',
548 'remoteExtPath' => 'SyntaxHighlight_GeSHi/modules',
549 'scripts' => [
550 've-syntaxhighlight/ve.dm.MWSyntaxHighlightNode.js',
551 've-syntaxhighlight/ve.dm.MWBlockSyntaxHighlightNode.js',
552 've-syntaxhighlight/ve.dm.MWInlineSyntaxHighlightNode.js',
553 've-syntaxhighlight/ve.ce.MWSyntaxHighlightNode.js',
554 've-syntaxhighlight/ve.ce.MWBlockSyntaxHighlightNode.js',
555 've-syntaxhighlight/ve.ce.MWInlineSyntaxHighlightNode.js',
556 've-syntaxhighlight/ve.ui.MWSyntaxHighlightWindow.js',
557 've-syntaxhighlight/ve.ui.MWSyntaxHighlightDialog.js',
558 've-syntaxhighlight/ve.ui.MWSyntaxHighlightDialogTool.js',
559 've-syntaxhighlight/ve.ui.MWSyntaxHighlightInspector.js',
560 've-syntaxhighlight/ve.ui.MWSyntaxHighlightInspectorTool.js',
561 ],
562 'styles' => [
563 've-syntaxhighlight/ve.ce.MWSyntaxHighlightNode.css',
564 've-syntaxhighlight/ve.ui.MWSyntaxHighlightDialog.css',
565 've-syntaxhighlight/ve.ui.MWSyntaxHighlightInspector.css',
566 ],
567 'dependencies' => [
568 'ext.visualEditor.mwcore',
569 'oojs-ui.styles.icons-editing-advanced'
570 ],
571 'messages' => [
572 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-code',
573 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-language',
574 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-none',
575 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-showlines',
576 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-startingline',
577 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-title',
578 ],
579 'targets' => [ 'desktop', 'mobile' ],
580 ] );
581 }
582}
$wgTextModelsToParse
Determines which types of text are parsed as wikitext.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
static parseHighlightLines( $lineSpec)
Take an input specifying a list of lines to highlight, returning a raw list of matching line numbers.
static onContentGetParserOutput(Content $content, Title $title, $revId, ParserOptions $options, $generateHtml, ParserOutput &$output)
Hook into Content::getParserOutput to provide syntax highlighting for script content.
static onResourceLoaderRegisterModules( $resourceLoader)
Conditionally register resource loader modules that depends on the VisualEditor MediaWiki extension.
static makeCacheKeyHash( $code, $lexer, $options)
Construct a cache key for the results of a Pygments invocation.
static onApiFormatHighlight(IContextSource $context, $text, $mime, $format)
Hook to provide syntax highlighting for API pretty-printed output.
static validHighlightRange( $start, $end)
Validate a provided input range.
MediaWiki exception.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Executes shell commands.
Definition Shell.php:44
Set options of the Parser.
addModuleStyles( $modules)
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:44
static highlight( $code, $lang=null, $args=[])
Highlight a code-block using a particular lexer.
static parserHookSource( $text, $args, $parser)
Parser hook for <source> to add deprecated tracking category.
static plainCodeWrap( $code, $inline)
static array $mimeLexers
Mapping of MIME-types to lexer names.
static getLexer( $lang)
Get the Pygments lexer name for a particular language.
static onParserFirstCallInit(Parser $parser)
Register parser hook.
static parserHook( $text, $args, $parser)
Parser hook for both <source> and <syntaxhighlight> logic.
Content object implementation for representing flat text.
Represents a title within MediaWiki.
Definition Title.php:42
Base interface for content objects.
Definition Content.php:35
Interface for objects which can provide a MediaWiki context on request.
$cache
Definition mcc.php:33
if( $line===false) $args
Definition mcc.php:124
$content
Definition router.php:76
$mime
Definition router.php:60
if(!isset( $args[0])) $lang
if(!file_exists( $CREDITS)) $lines