MediaWiki  1.34.0
SyntaxHighlight.php
Go to the documentation of this file.
1 <?php
21 
22 class SyntaxHighlight {
23 
25  const HIGHLIGHT_MAX_LINES = 1000;
26 
28  const HIGHLIGHT_MAX_BYTES = 102400;
29 
31  const HIGHLIGHT_CSS_CLASS = 'mw-highlight';
32 
34  const CACHE_VERSION = 2;
35 
37  private static $mimeLexers = [
38  'text/javascript' => 'javascript',
39  'application/json' => 'javascript',
40  'text/xml' => 'xml',
41  ];
42 
49  private static function getLexer( $lang ) {
50  static $lexers = null;
51 
52  if ( $lang === null ) {
53  return null;
54  }
55 
56  if ( !$lexers ) {
57  $lexers = require __DIR__ . '/../SyntaxHighlight.lexers.php';
58  }
59 
60  $lexer = strtolower( $lang );
61 
62  if ( isset( $lexers[$lexer] ) ) {
63  return $lexer;
64  }
65 
67 
68  // Check if this is a GeSHi lexer name for which there exists
69  // a compatible Pygments lexer with a different name.
70  if ( isset( $geshi2pygments[$lexer] ) ) {
71  $lexer = $geshi2pygments[$lexer];
72  if ( in_array( $lexer, $lexers ) ) {
73  return $lexer;
74  }
75  }
76 
77  return null;
78  }
79 
85  public static function onParserFirstCallInit( Parser &$parser ) {
86  foreach ( [ 'source', 'syntaxhighlight' ] as $tag ) {
87  $parser->setHook( $tag, [ 'SyntaxHighlight', 'parserHook' ] );
88  }
89  }
90 
100  public static function parserHook( $text, $args, $parser ) {
101  // Replace strip markers (For e.g. {{#tag:syntaxhighlight|<nowiki>...}})
102  $out = $parser->mStripState->unstripNoWiki( $text );
103 
104  // Don't trim leading spaces away, just the linefeeds
105  $out = preg_replace( '/^\n+/', '', rtrim( $out ) );
106 
107  // Convert deprecated attributes
108  if ( isset( $args['enclose'] ) ) {
109  if ( $args['enclose'] === 'none' ) {
110  $args['inline'] = true;
111  }
112  unset( $args['enclose'] );
113  }
114 
115  $lexer = $args['lang'] ?? '';
116 
117  $result = self::highlight( $out, $lexer, $args );
118  if ( !$result->isGood() ) {
119  $parser->addTrackingCategory( 'syntaxhighlight-error-category' );
120  }
121  $out = $result->getValue();
122 
123  // Allow certain HTML attributes
124  $htmlAttribs = Sanitizer::validateAttributes( $args, [ 'style', 'class', 'id', 'dir' ] );
125  if ( !isset( $htmlAttribs['class'] ) ) {
126  $htmlAttribs['class'] = self::HIGHLIGHT_CSS_CLASS;
127  } else {
128  $htmlAttribs['class'] .= ' ' . self::HIGHLIGHT_CSS_CLASS;
129  }
130  if ( !( isset( $htmlAttribs['dir'] ) && $htmlAttribs['dir'] === 'rtl' ) ) {
131  $htmlAttribs['dir'] = 'ltr';
132  }
133 
134  if ( isset( $args['inline'] ) ) {
135  // Enforce inlineness. Stray newlines may result in unexpected list and paragraph processing
136  // (also known as doBlockLevels()).
137  $out = str_replace( "\n", ' ', $out );
138  $out = Html::rawElement( 'code', $htmlAttribs, $out );
139 
140  } else {
141  // Not entirely sure what benefit this provides, but it was here already
142  $htmlAttribs['class'] .= ' ' . 'mw-content-' . $htmlAttribs['dir'];
143 
144  // Unwrap Pygments output to provide our own wrapper. We can't just always use the 'nowrap'
145  // option (pass 'inline'), since it disables other useful things like line highlighting.
146  // Tolerate absence of quotes for Html::element() and wgWellFormedXml=false.
147  if ( $out !== '' ) {
148  $m = [];
149  if ( preg_match( '/^<div class="?mw-highlight"?>(.*)<\/div>$/s', trim( $out ), $m ) ) {
150  $out = trim( $m[1] );
151  } else {
152  throw new MWException( 'Unexpected output from Pygments encountered' );
153  }
154  }
155 
156  // Use 'nowiki' strip marker to prevent list processing (also known as doBlockLevels()).
157  // However, leave the wrapping <div/> outside to prevent <p/>-wrapping.
158  $marker = $parser::MARKER_PREFIX . '-syntaxhighlightinner-' .
159  sprintf( '%08X', $parser->mMarkerIndex++ ) . $parser::MARKER_SUFFIX;
160  $parser->mStripState->addNoWiki( $marker, $out );
161 
162  $out = Html::openElement( 'div', $htmlAttribs ) .
163  $marker .
164  Html::closeElement( 'div' );
165  }
166 
167  // Register CSS
168  // TODO: Consider moving to a separate method so that public method
169  // highlight() can be used without needing to know the module name.
170  $parser->getOutput()->addModuleStyles( 'ext.pygments' );
171 
172  return $out;
173  }
174 
178  public static function getPygmentizePath() {
179  global $wgPygmentizePath;
180 
181  // If $wgPygmentizePath is unset, use the bundled copy.
182  if ( $wgPygmentizePath === false ) {
183  $wgPygmentizePath = __DIR__ . '/../pygments/pygmentize';
184  }
185 
186  return $wgPygmentizePath;
187  }
188 
193  private static function plainCodeWrap( $code, $inline ) {
194  if ( $inline ) {
195  return htmlspecialchars( $code, ENT_NOQUOTES );
196  }
197 
198  return Html::rawElement(
199  'div',
200  [ 'class' => self::HIGHLIGHT_CSS_CLASS ],
201  Html::element( 'pre', [], $code )
202  );
203  }
204 
223  public static function highlight( $code, $lang = null, $args = [] ) {
224  $status = new Status;
225 
226  $lexer = self::getLexer( $lang );
227  if ( $lexer === null && $lang !== null ) {
228  $status->warning( 'syntaxhighlight-error-unknown-language', $lang );
229  }
230 
231  // For empty tag, output nothing instead of empty <pre>.
232  if ( $code === '' ) {
233  $status->value = '';
234  return $status;
235  }
236 
237  $length = strlen( $code );
238  if ( strlen( $code ) > self::HIGHLIGHT_MAX_BYTES ) {
239  // Disable syntax highlighting
240  $lexer = null;
241  $status->warning(
242  'syntaxhighlight-error-exceeds-size-limit',
243  $length,
244  self::HIGHLIGHT_MAX_BYTES
245  );
246  } elseif ( Shell::isDisabled() ) {
247  // Disable syntax highlighting
248  $lexer = null;
249  $status->warning( 'syntaxhighlight-error-pygments-invocation-failure' );
250  wfWarn(
251  'MediaWiki determined that it cannot invoke Pygments. ' .
252  'As a result, SyntaxHighlight_GeSHi will not perform any syntax highlighting. ' .
253  'See the debug log for details: ' .
254  'https://www.mediawiki.org/wiki/Manual:$wgDebugLogFile'
255  );
256  }
257 
258  $inline = isset( $args['inline'] );
259 
260  if ( $inline ) {
261  $code = trim( $code );
262  }
263 
264  if ( $lexer === null ) {
265  // When syntax highlighting is disabled..
266  $status->value = self::plainCodeWrap( $code, $inline );
267  return $status;
268  }
269 
270  $options = [
271  'cssclass' => self::HIGHLIGHT_CSS_CLASS,
272  'encoding' => 'utf-8',
273  ];
274 
275  // Line numbers
276  if ( isset( $args['line'] ) ) {
277  $options['linenos'] = 'inline';
278  }
279 
280  if ( $lexer === 'php' && strpos( $code, '<?php' ) === false ) {
281  $options['startinline'] = 1;
282  }
283 
284  // Highlight specific lines
285  if ( isset( $args['highlight'] ) ) {
286  $lines = self::parseHighlightLines( $args['highlight'] );
287  if ( count( $lines ) ) {
288  $options['hl_lines'] = implode( ' ', $lines );
289  }
290  }
291 
292  // Starting line number
293  if ( isset( $args['start'] ) && ctype_digit( $args['start'] ) ) {
294  $options['linenostart'] = (int)$args['start'];
295  }
296 
297  if ( $inline ) {
298  $options['nowrap'] = 1;
299  }
300 
301  $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
302  $error = null;
303  $output = $cache->getWithSetCallback(
304  $cache->makeGlobalKey( 'highlight', self::makeCacheKeyHash( $code, $lexer, $options ) ),
305  $cache::TTL_MONTH,
306  function ( $oldValue, &$ttl ) use ( $code, $lexer, $options, &$error ) {
307  $optionPairs = [];
308  foreach ( $options as $k => $v ) {
309  $optionPairs[] = "{$k}={$v}";
310  }
311  $result = Shell::command(
312  self::getPygmentizePath(),
313  '-l', $lexer,
314  '-f', 'html',
315  '-O', implode( ',', $optionPairs )
316  )
317  ->input( $code )
318  ->restrict( Shell::RESTRICT_DEFAULT | Shell::NO_NETWORK )
319  ->execute();
320 
321  if ( $result->getExitCode() != 0 ) {
322  $ttl = WANObjectCache::TTL_UNCACHEABLE;
323  $error = $result->getStderr();
324  return null;
325  }
326 
327  return $result->getStdout();
328  }
329  );
330 
331  if ( $error !== null || $output === null ) {
332  $status->warning( 'syntaxhighlight-error-pygments-invocation-failure' );
333  wfWarn( 'Failed to invoke Pygments: ' . $error );
334  // Fall back to preformatted code without syntax highlighting
335  $output = self::plainCodeWrap( $code, $inline );
336  }
337 
338  if ( $inline ) {
339  // We've already trimmed the input $code before highlighting,
340  // but pygment's standard out adds a line break afterwards,
341  // which would then be preserved in the paragraph that wraps this,
342  // and become visible as a space. Avoid that.
343  $output = trim( $output );
344  }
345 
346  $status->value = $output;
347  return $status;
348  }
349 
358  private static function makeCacheKeyHash( $code, $lexer, $options ) {
359  $optionString = FormatJson::encode( $options, false, FormatJson::ALL_OK );
360  return md5( "{$code}|{$lexer}|{$optionString}|" . self::CACHE_VERSION );
361  }
362 
372  protected static function parseHighlightLines( $lineSpec ) {
373  $lines = [];
374  $values = array_map( 'trim', explode( ',', $lineSpec ) );
375  foreach ( $values as $value ) {
376  if ( ctype_digit( $value ) ) {
377  $lines[] = (int)$value;
378  } elseif ( strpos( $value, '-' ) !== false ) {
379  list( $start, $end ) = array_map( 'trim', explode( '-', $value ) );
380  if ( self::validHighlightRange( $start, $end ) ) {
381  for ( $i = intval( $start ); $i <= $end; $i++ ) {
382  $lines[] = $i;
383  }
384  }
385  }
386  if ( count( $lines ) > self::HIGHLIGHT_MAX_LINES ) {
387  $lines = array_slice( $lines, 0, self::HIGHLIGHT_MAX_LINES );
388  break;
389  }
390  }
391  return $lines;
392  }
393 
400  protected static function validHighlightRange( $start, $end ) {
401  // Since we're taking this tiny range and producing a an
402  // array of every integer between them, it would be trivial
403  // to DoS the system by asking for a huge range.
404  // Impose an arbitrary limit on the number of lines in a
405  // given range to reduce the impact.
406  return ctype_digit( $start ) &&
407  ctype_digit( $end ) &&
408  $start > 0 &&
409  $start < $end &&
410  $end - $start < self::HIGHLIGHT_MAX_LINES;
411  }
412 
427  $revId, ParserOptions $options, $generateHtml, ParserOutput &$output
428  ) {
430 
431  if ( !$generateHtml ) {
432  // Nothing special for us to do, let MediaWiki handle this.
433  return true;
434  }
435 
436  // Determine the language
437  $extension = ExtensionRegistry::getInstance();
438  $models = $extension->getAttribute( 'SyntaxHighlightModels' );
439  $model = $content->getModel();
440  if ( !isset( $models[$model] ) ) {
441  // We don't care about this model, carry on.
442  return true;
443  }
444  $lexer = $models[$model];
445 
446  // Hope that $wgSyntaxHighlightModels does not contain silly types.
448  if ( !$text ) {
449  // Oops! Non-text content? Let MediaWiki handle this.
450  return true;
451  }
452 
453  // Parse using the standard parser to get links etc. into the database, HTML is replaced below.
454  // We could do this using $content->fillParserOutput(), but alas it is 'protected'.
455  if ( $content instanceof TextContent && in_array( $model, $wgTextModelsToParse ) ) {
456  $output = $wgParser->parse( $text, $title, $options, true, true, $revId );
457  }
458 
459  $status = self::highlight( $text, $lexer );
460  if ( !$status->isOK() ) {
461  return true;
462  }
463  $out = $status->getValue();
464 
465  $output->addModuleStyles( 'ext.pygments' );
466  $output->setText( '<div dir="ltr">' . $out . '</div>' );
467 
468  // Inform MediaWiki that we have parsed this page and it shouldn't mess with it.
469  return false;
470  }
471 
482  public static function onApiFormatHighlight( IContextSource $context, $text, $mime, $format ) {
483  if ( !isset( self::$mimeLexers[$mime] ) ) {
484  return true;
485  }
486 
487  $lexer = self::$mimeLexers[$mime];
488  $status = self::highlight( $text, $lexer );
489  if ( !$status->isOK() ) {
490  return true;
491  }
492 
493  $out = $status->getValue();
494  if ( preg_match( '/^<pre([^>]*)>/i', $out, $m ) ) {
495  $attrs = Sanitizer::decodeTagAttributes( $m[1] );
496  $attrs['class'] .= ' api-pretty-content';
497  $encodedAttrs = Sanitizer::safeEncodeTagAttributes( $attrs );
498  $out = '<pre' . $encodedAttrs . '>' . substr( $out, strlen( $m[0] ) );
499  }
500  $output = $context->getOutput();
501  $output->addModuleStyles( 'ext.pygments' );
502  $output->addHTML( '<div dir="ltr">' . $out . '</div>' );
503 
504  // Inform MediaWiki that we have parsed this page and it shouldn't mess with it.
505  return false;
506  }
507 
515  if ( !ExtensionRegistry::getInstance()->isLoaded( 'VisualEditor' ) ) {
516  return;
517  }
518 
519  $resourceLoader->register( 'ext.geshi.visualEditor', [
520  'class' => ResourceLoaderSyntaxHighlightVisualEditorModule::class,
521  'localBasePath' => __DIR__ . '/../modules',
522  'remoteExtPath' => 'SyntaxHighlight_GeSHi/modules',
523  'scripts' => [
524  've-syntaxhighlight/ve.dm.MWSyntaxHighlightNode.js',
525  've-syntaxhighlight/ve.dm.MWBlockSyntaxHighlightNode.js',
526  've-syntaxhighlight/ve.dm.MWInlineSyntaxHighlightNode.js',
527  've-syntaxhighlight/ve.ce.MWSyntaxHighlightNode.js',
528  've-syntaxhighlight/ve.ce.MWBlockSyntaxHighlightNode.js',
529  've-syntaxhighlight/ve.ce.MWInlineSyntaxHighlightNode.js',
530  've-syntaxhighlight/ve.ui.MWSyntaxHighlightWindow.js',
531  've-syntaxhighlight/ve.ui.MWSyntaxHighlightDialog.js',
532  've-syntaxhighlight/ve.ui.MWSyntaxHighlightDialogTool.js',
533  've-syntaxhighlight/ve.ui.MWSyntaxHighlightInspector.js',
534  've-syntaxhighlight/ve.ui.MWSyntaxHighlightInspectorTool.js',
535  ],
536  'styles' => [
537  've-syntaxhighlight/ve.ce.MWSyntaxHighlightNode.css',
538  've-syntaxhighlight/ve.ui.MWSyntaxHighlightDialog.css',
539  've-syntaxhighlight/ve.ui.MWSyntaxHighlightInspector.css',
540  ],
541  'dependencies' => [
542  'ext.visualEditor.mwcore',
543  'oojs-ui.styles.icons-editing-advanced'
544  ],
545  'messages' => [
546  'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-code',
547  'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-language',
548  'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-none',
549  'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-showlines',
550  'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-startingline',
551  'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-title',
552  ],
553  'targets' => [ 'desktop', 'mobile' ],
554  ] );
555  }
556 
561  public static function prepare( $text, $lang ) {
562  wfDeprecated( __METHOD__ );
563  return new GeSHi( self::highlight( $text, $lang )->getValue() );
564  }
565 
572  public static function buildHeadItem( $geshi ) {
573  wfDeprecated( __METHOD__ );
574  $geshi->parse_code();
575  return '';
576  }
577 }
578 class_alias( SyntaxHighlight::class, 'SyntaxHighlight_GeSHi' );
ParserOptions
Set options of the Parser.
Definition: ParserOptions.php:42
MediaWiki\Shell\Shell
Executes shell commands.
Definition: Shell.php:44
parseHighlightLines
static parseHighlightLines( $lineSpec)
Take an input specifying a list of lines to highlight, returning a raw list of matching line numbers.
Definition: SyntaxHighlight.php:372
makeCacheKeyHash
static makeCacheKeyHash( $code, $lexer, $options)
Construct a cache key for the results of a Pygments invocation.
Definition: SyntaxHighlight.php:358
prepare
static prepare( $text, $lang)
Backward-compatibility shim for extensions.
Definition: SyntaxHighlight.php:561
ParserOutput
Definition: ParserOutput.php:25
$wgParser
$wgParser
Definition: Setup.php:892
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:117
$lang
if(!isset( $args[0])) $lang
Definition: testCompression.php:33
onParserFirstCallInit
static onParserFirstCallInit(Parser &$parser)
Register parser hook.
Definition: SyntaxHighlight.php:378
getLexer
static getLexer( $lang)
Get the Pygments lexer name for a particular language.
Definition: SyntaxHighlight.php:342
$resourceLoader
$resourceLoader
Definition: load.php:44
onResourceLoaderRegisterModules
static onResourceLoaderRegisterModules(&$resourceLoader)
Conditionally register resource loader modules that depends on the VisualEditor MediaWiki extension.
Definition: SyntaxHighlight.php:514
FormatJson\ALL_OK
const ALL_OK
Skip escaping as many characters as reasonably possible.
Definition: FormatJson.php:55
buildHeadItem
static buildHeadItem( $geshi)
Backward-compatibility shim for extensions.
Definition: SyntaxHighlight.php:572
validHighlightRange
static validHighlightRange( $start, $end)
Validate a provided input range.
Definition: SyntaxHighlight.php:400
Status
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition: Status.php:40
ExtensionRegistry\getInstance
static getInstance()
Definition: ExtensionRegistry.php:106
onApiFormatHighlight
static onApiFormatHighlight(IContextSource $context, $text, $mime, $format)
Hook to provide syntax highlighting for API pretty-printed output.
Definition: SyntaxHighlight.php:482
FormatJson\encode
static encode( $value, $pretty=false, $escaping=0)
Returns the JSON representation of a value.
Definition: FormatJson.php:115
MWException
MediaWiki exception.
Definition: MWException.php:26
wfDeprecated
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
Definition: GlobalFunctions.php:1044
SyntaxHighlightGeSHiCompat\getGeSHiToPygmentsMap
static getGeSHiToPygmentsMap()
Definition: SyntaxHighlightGeSHiCompat.php:110
onContentGetParserOutput
static onContentGetParserOutput(Content $content, Title $title, $revId, ParserOptions $options, $generateHtml, ParserOutput &$output)
Hook into Content::getParserOutput to provide syntax highlighting for script content.
Definition: SyntaxHighlight.php:426
$mimeLexers
static array $mimeLexers
Mapping of MIME-types to lexer names.
Definition: SyntaxHighlight.php:330
$lines
$lines
Definition: router.php:61
$title
$title
Definition: testCompression.php:34
$output
$output
Definition: SyntaxHighlight.php:335
$content
$content
Definition: router.php:78
TextContent
Content object implementation for representing flat text.
Definition: TextContent.php:37
IContextSource
Interface for objects which can provide a MediaWiki context on request.
Definition: IContextSource.php:53
$context
$context
Definition: load.php:45
Content
Base interface for content objects.
Definition: Content.php:34
GeSHi
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Ge...
Definition: GeSHi.php:23
$args
if( $line===false) $args
Definition: cdb.php:64
Title
Represents a title within MediaWiki.
Definition: Title.php:42
$status
return $status
Definition: SyntaxHighlight.php:347
ContentHandler\getContentText
static getContentText(Content $content=null)
Convenience function for getting flat text from a Content object.
Definition: ContentHandler.php:85
$cache
$cache
Definition: mcc.php:33
$wgTextModelsToParse
$wgTextModelsToParse
Determines which types of text are parsed as wikitext.
Definition: DefaultSettings.php:8654
wfWarn
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
Definition: GlobalFunctions.php:1065
parserHook
static parserHook( $text, $args, $parser)
Parser hook.
Definition: SyntaxHighlight.php:393