Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
27.05% |
33 / 122 |
|
26.67% |
4 / 15 |
CRAP | |
0.00% |
0 / 1 |
Pygmentize | |
27.05% |
33 / 122 |
|
26.67% |
4 / 15 |
482.80 | |
0.00% |
0 / 1 |
useBundled | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getPath | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
getVersion | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
12 | |||
getBundledVersion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
fetchVersion | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
12 | |||
getGeneratedCSS | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
fetchGeneratedCSS | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
6 | |||
getLexers | |
25.00% |
2 / 8 |
|
0.00% |
0 / 1 |
3.69 | |||
pygmentsSupportsJsonOutput | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
fetchLexers | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 | |||
parseLexersFromJson | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
parseLexersFromText | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
highlight | |
100.00% |
20 / 20 |
|
100.00% |
1 / 1 |
4 | |||
boxedCommand | |
70.00% |
7 / 10 |
|
0.00% |
0 / 1 |
2.11 | |||
recordShellout | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | /** |
3 | * Copyright (C) 2021 Kunal Mehta <legoktm@debian.org> |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | */ |
20 | |
21 | namespace MediaWiki\SyntaxHighlight; |
22 | |
23 | use MediaWiki\MediaWikiServices; |
24 | use Shellbox\Command\BoxedCommand; |
25 | use Shellbox\ShellboxError; |
26 | |
27 | /** |
28 | * Wrapper around the `pygmentize` command |
29 | */ |
30 | class Pygmentize { |
31 | |
32 | /** |
33 | * If no pygmentize is configured, use bundled |
34 | */ |
35 | public static function useBundled(): bool { |
36 | global $wgPygmentizePath; |
37 | return $wgPygmentizePath === false; |
38 | } |
39 | |
40 | /** |
41 | * Get a real path to pygmentize |
42 | */ |
43 | private static function getPath(): string { |
44 | global $wgPygmentizePath; |
45 | |
46 | // If $wgPygmentizePath is unset, use the bundled copy. |
47 | return $wgPygmentizePath ?: __DIR__ . '/../pygments/pygmentize'; |
48 | } |
49 | |
50 | /** |
51 | * Get the version of pygments (cached) |
52 | */ |
53 | public static function getVersion(): string { |
54 | static $version; |
55 | if ( $version !== null ) { |
56 | return $version; |
57 | } |
58 | if ( self::useBundled() ) { |
59 | $version = self::getBundledVersion(); |
60 | return $version; |
61 | } |
62 | |
63 | // This is called a lot, during both page views, edits, and load.php startup request. |
64 | // It also gets called multiple times during the same request. As such, prefer |
65 | // low latency via php-apcu. |
66 | // |
67 | // This value also controls cache invalidation and propagation through embedding |
68 | // in other keys from this class, and thus has a low expiry. Avoid latency from |
69 | // frequent cache misses by by sharing the values with other servers via Memcached |
70 | // as well. |
71 | |
72 | $srvCache = MediaWikiServices::getInstance()->getLocalServerObjectCache(); |
73 | return $srvCache->getWithSetCallback( |
74 | $srvCache->makeGlobalKey( 'pygmentize-version' ), |
75 | // Spread between 55 min and 1 hour |
76 | mt_rand( 55 * $srvCache::TTL_MINUTE, 60 * $srvCache::TTL_MINUTE ), |
77 | static function () { |
78 | $wanCache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
79 | return $wanCache->getWithSetCallback( |
80 | $wanCache->makeGlobalKey( 'pygmentize-version' ), |
81 | // Must be under 55 min to avoid renewing stale data in upper layer |
82 | 30 * $wanCache::TTL_MINUTE, |
83 | [ __CLASS__, 'fetchVersion' ] |
84 | ); |
85 | } |
86 | ); |
87 | } |
88 | |
89 | /** |
90 | * Get the version of bundled pygments |
91 | */ |
92 | private static function getBundledVersion(): string { |
93 | return trim( file_get_contents( __DIR__ . '/../pygments/VERSION' ) ); |
94 | } |
95 | |
96 | /** |
97 | * Shell out to get installed pygments version |
98 | * |
99 | * @internal For use by WANObjectCache/BagOStuff only |
100 | */ |
101 | public static function fetchVersion(): string { |
102 | $result = self::boxedCommand() |
103 | ->params( self::getPath(), '-V' ) |
104 | ->includeStderr() |
105 | ->execute(); |
106 | self::recordShellout( 'version' ); |
107 | |
108 | $output = $result->getStdout(); |
109 | if ( $result->getExitCode() != 0 || |
110 | !preg_match( '/^Pygments version (\S+),/', $output, $matches ) |
111 | ) { |
112 | throw new PygmentsException( $output ); |
113 | } |
114 | |
115 | return $matches[1]; |
116 | } |
117 | |
118 | /** |
119 | * Get the pygments generated CSS (cached) |
120 | * |
121 | * Note: if using bundled, the CSS is already available |
122 | * in modules/pygments.generated.css. |
123 | */ |
124 | public static function getGeneratedCSS(): string { |
125 | // This is rarely called as the result gets HTTP-cached via long-expiry load.php. |
126 | // When it gets called once, after a deployment, during that brief spike of |
127 | // dedicated requests from each wiki. Leverage Memcached to share this. |
128 | // Its likely not needed again on the same server for a while after that. |
129 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
130 | return $cache->getWithSetCallback( |
131 | $cache->makeGlobalKey( 'pygmentize-css', self::getVersion() ), |
132 | $cache::TTL_WEEK, |
133 | [ __CLASS__, 'fetchGeneratedCSS' ] |
134 | ); |
135 | } |
136 | |
137 | /** |
138 | * Shell out to get generated CSS from pygments |
139 | * |
140 | * @internal Only public for updateCSS.php |
141 | */ |
142 | public static function fetchGeneratedCSS(): string { |
143 | $result = self::boxedCommand() |
144 | ->params( |
145 | self::getPath(), '-f', 'html', |
146 | '-S', 'default', '-a', '.mw-highlight' ) |
147 | ->includeStderr() |
148 | ->execute(); |
149 | self::recordShellout( 'generated_css' ); |
150 | $output = $result->getStdout(); |
151 | if ( $result->getExitCode() != 0 ) { |
152 | throw new PygmentsException( $output ); |
153 | } |
154 | return $output; |
155 | } |
156 | |
157 | /** |
158 | * Get the list of supported lexers by pygments (cached) |
159 | * |
160 | * @return array<string,true> |
161 | */ |
162 | public static function getLexers(): array { |
163 | if ( self::useBundled() ) { |
164 | return require __DIR__ . '/../SyntaxHighlight.lexers.php'; |
165 | } |
166 | |
167 | // This is called during page views and edits, and may be called |
168 | // repeatedly. Trade low latency for higher shell rate by caching |
169 | // on each server separately. This is made up for with a high TTL, |
170 | // which is fine because we vary by version, thus ensuring quick |
171 | // propagation separate from the TTL. |
172 | $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache(); |
173 | return $cache->getWithSetCallback( |
174 | $cache->makeGlobalKey( 'pygmentize-lexers', self::getVersion() ), |
175 | $cache::TTL_WEEK, |
176 | [ __CLASS__, 'fetchLexers' ] |
177 | ); |
178 | } |
179 | |
180 | /** |
181 | * Determine if the pygments command line supports the --json option |
182 | * |
183 | * @return bool |
184 | */ |
185 | private static function pygmentsSupportsJsonOutput(): bool { |
186 | $version = self::getVersion(); |
187 | return ( version_compare( $version, '2.11.0' ) !== -1 ); |
188 | } |
189 | |
190 | /** |
191 | * Shell out to get supported lexers by pygments |
192 | * |
193 | * @internal Only public for updateLexerList.php |
194 | * @return array<string,true> |
195 | */ |
196 | public static function fetchLexers(): array { |
197 | $cliParams = [ self::getPath(), '-L', 'lexer' ]; |
198 | if ( self::pygmentsSupportsJsonOutput() ) { |
199 | $cliParams[] = '--json'; |
200 | } |
201 | |
202 | $result = self::boxedCommand() |
203 | ->params( $cliParams ) |
204 | ->includeStderr() |
205 | ->execute(); |
206 | self::recordShellout( 'fetch_lexers' ); |
207 | $output = $result->getStdout(); |
208 | if ( $result->getExitCode() != 0 ) { |
209 | throw new PygmentsException( $output ); |
210 | } |
211 | |
212 | if ( self::pygmentsSupportsJsonOutput() ) { |
213 | $lexers = self::parseLexersFromJson( $output ); |
214 | } else { |
215 | $lexers = self::parseLexersFromText( $output ); |
216 | } |
217 | |
218 | sort( $lexers ); |
219 | return array_fill_keys( $lexers, true ); |
220 | } |
221 | |
222 | /** |
223 | * Parse json output of the pygments lexers list and return as php array |
224 | * |
225 | * @param string $output JSON formatted output of pygments lexers list |
226 | * @return array |
227 | */ |
228 | private static function parseLexersFromJson( $output ): array { |
229 | $data = json_decode( $output, true ); |
230 | if ( $data === null ) { |
231 | throw new PygmentsException( |
232 | 'Got invalid JSON from Pygments: ' . $output ); |
233 | } |
234 | $lexers = []; |
235 | foreach ( array_values( $data['lexers'] ) as $lexer ) { |
236 | $lexers = array_merge( $lexers, $lexer['aliases'] ); |
237 | } |
238 | return $lexers; |
239 | } |
240 | |
241 | /** |
242 | * Parse original stdout of the pygments lexers list |
243 | * This was the only format available before pygments 2.11.0 |
244 | * NOTE: Should be removed when pygments 2.11 is the minimum version expected to be installed |
245 | * |
246 | * @param string $output Textual list of pygments lexers |
247 | * @return array |
248 | */ |
249 | private static function parseLexersFromText( $output ): array { |
250 | $lexers = []; |
251 | foreach ( explode( "\n", $output ) as $line ) { |
252 | if ( str_starts_with( $line, '*' ) ) { |
253 | $newLexers = explode( ', ', trim( $line, "* :\r\n" ) ); |
254 | |
255 | // Skip internal, unnamed lexers |
256 | if ( $newLexers[0] !== '' ) { |
257 | $lexers = array_merge( $lexers, $newLexers ); |
258 | } |
259 | } |
260 | } |
261 | return $lexers; |
262 | } |
263 | |
264 | /** |
265 | * Actually highlight some text |
266 | * |
267 | * @param string $lexer Lexer name |
268 | * @param string $code Code to highlight |
269 | * @param array $options Options to pass to pygments |
270 | * @return string |
271 | */ |
272 | public static function highlight( $lexer, $code, array $options ): string { |
273 | $optionPairs = []; |
274 | foreach ( $options as $k => $v ) { |
275 | $optionPairs[] = "{$k}={$v}"; |
276 | } |
277 | self::recordShellout( 'highlight' ); |
278 | |
279 | try { |
280 | $result = self::boxedCommand() |
281 | ->params( |
282 | self::getPath(), |
283 | '-l', $lexer, |
284 | '-f', 'html', |
285 | '-O', implode( ',', $optionPairs ), |
286 | 'file' |
287 | ) |
288 | ->inputFileFromString( 'file', $code ) |
289 | ->execute(); |
290 | } catch ( ShellboxError $exception ) { |
291 | // If we have trouble sending or receiving over the network to |
292 | // Shellbox, we technically don't know if the command succeed or failed, |
293 | // but, treat the highlight() command as recoverable by wrapping this in |
294 | // PygmentsException. This permits the Parser tag to fallback to |
295 | // plainCodeWrap(), thus avoiding a fatal on pageviews (T292663). |
296 | throw new PygmentsException( 'ShellboxError', 0, $exception ); |
297 | } |
298 | |
299 | $output = $result->getStdout(); |
300 | if ( $result->getExitCode() != 0 ) { |
301 | throw new PygmentsException( $output ); |
302 | } |
303 | |
304 | return $output; |
305 | } |
306 | |
307 | private static function boxedCommand(): BoxedCommand { |
308 | $command = MediaWikiServices::getInstance()->getShellCommandFactory() |
309 | ->createBoxed( 'syntaxhighlight' ) |
310 | ->disableNetwork() |
311 | ->firejailDefaultSeccomp() |
312 | ->routeName( 'syntaxhighlight-pygments' ); |
313 | |
314 | if ( wfIsWindows() ) { |
315 | // Python requires the SystemRoot environment variable to initialize (T300223) |
316 | $command->environment( [ |
317 | 'SystemRoot' => getenv( 'SystemRoot' ), |
318 | ] ); |
319 | } |
320 | |
321 | return $command; |
322 | } |
323 | |
324 | /** |
325 | * Track how often we do each type of shellout in statsd |
326 | * |
327 | * @param string $type Type of shellout |
328 | */ |
329 | private static function recordShellout( $type ) { |
330 | $statsd = MediaWikiServices::getInstance()->getStatsdDataFactory(); |
331 | $statsd->increment( "syntaxhighlight_shell.$type" ); |
332 | } |
333 | } |