Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 203 |
|
0.00% |
0 / 12 |
CRAP | |
0.00% |
0 / 1 |
LabeledSectionTransclusion | |
0.00% |
0 / 203 |
|
0.00% |
0 / 12 |
4830 | |
0.00% |
0 / 1 |
open | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
parse | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
noop | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getAttrPattern | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
countHeadings | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
12 | |||
getTemplateText | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
20 | |||
setupPfunc12 | |
0.00% |
0 / 40 |
|
0.00% |
0 / 1 |
110 | |||
isSection | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
expandSectionNode | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
pfuncIncludeObj | |
0.00% |
0 / 48 |
|
0.00% |
0 / 1 |
306 | |||
pfuncExcludeObj | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
182 | |||
pfuncIncludeHeading | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
90 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\LabeledSectionTransclusion; |
4 | |
5 | use MediaWiki\Title\Title; |
6 | use Parser; |
7 | use PPFrame; |
8 | use PPNode; |
9 | |
10 | class LabeledSectionTransclusion { |
11 | |
12 | /* |
13 | * To do transclusion from an extension, we need to interact with the parser |
14 | * at a low level. This is the general transclusion functionality |
15 | */ |
16 | |
17 | /** |
18 | * Register what we're working on in the parser, so we don't fall into a trap. |
19 | * @param Parser $parser |
20 | * @param string $part1 |
21 | * @return bool |
22 | */ |
23 | private static function open( $parser, $part1 ) { |
24 | // This property on Parser has been deprecated: T360573 |
25 | if ( !isset( $parser->mTemplatePath ) ) { |
26 | $parser->mTemplatePath = []; |
27 | } |
28 | |
29 | // Infinite loop test |
30 | if ( isset( $parser->mTemplatePath[$part1] ) ) { |
31 | wfDebug( __METHOD__ . ": template loop broken at '$part1'\n" ); |
32 | return false; |
33 | } else { |
34 | $parser->mTemplatePath[$part1] = 1; |
35 | return true; |
36 | } |
37 | } |
38 | |
39 | /** |
40 | * Handle recursive substitution here, so we can break cycles, and set up |
41 | * return values so that edit sections will resolve correctly. |
42 | * @param Parser $parser |
43 | * @param Title $title of target page |
44 | * @param string $text |
45 | * @param string $part1 Key for cycle detection |
46 | * @param int $skiphead Number of source string headers to skip for numbering |
47 | * @return mixed string or magic array of bits |
48 | * @todo handle mixed-case </section> |
49 | */ |
50 | private static function parse( $parser, $title, $text, $part1, $skiphead = 0 ) { |
51 | // if someone tries something like<section begin=blah>lst only</section> |
52 | // text, may as well do the right thing. |
53 | $text = str_replace( '</section>', '', $text ); |
54 | |
55 | if ( self::open( $parser, $part1 ) ) { |
56 | // Try to get edit sections correct by munging around the parser's guts. |
57 | return [ $text, 'title' => $title, 'replaceHeadings' => true, |
58 | 'headingOffset' => $skiphead, 'noparse' => false, 'noargs' => false ]; |
59 | } else { |
60 | return "[[" . $title->getPrefixedText() . "]]" . |
61 | "<!-- WARNING: LST loop detected -->"; |
62 | } |
63 | } |
64 | |
65 | /* |
66 | * And now, the labeled section transclusion |
67 | */ |
68 | |
69 | /** |
70 | * Parser tag hook for <section>. |
71 | * The section markers aren't paired, so we only need to remove them. |
72 | * |
73 | * @param string $in |
74 | * @param array $assocArgs |
75 | * @param Parser|null $parser |
76 | * @return string HTML output |
77 | */ |
78 | public static function noop( $in, $assocArgs = [], $parser = null ) { |
79 | return ''; |
80 | } |
81 | |
82 | /** |
83 | * Generate a regex fragment matching the attribute portion of a section tag |
84 | * @param string $sec Name of the target section |
85 | * @param string $type Either "begin" or "end" depending on the type of section tag to be matched |
86 | * @param string $lang |
87 | * @return string |
88 | */ |
89 | private static function getAttrPattern( $sec, $type, $lang ) { |
90 | $sec = preg_quote( $sec, '/' ); |
91 | $ws = "(?:\s+[^>]*)?"; // was like $ws="\s*" |
92 | $attrs = [ $type ]; |
93 | $localName = Hooks::getLocalName( $type, $lang ); |
94 | if ( $localName !== null ) { |
95 | $attrs[] = $localName; |
96 | } |
97 | $attrName = '(?i:' . implode( '|', $attrs ) . ')'; |
98 | return "$ws\s+$attrName\s*=\s*([\"']?)$sec\\1$ws"; |
99 | } |
100 | |
101 | /** |
102 | * Count headings in skipped text. |
103 | * |
104 | * Count skipped headings, so parser (as of r18218) can skip them, to |
105 | * prevent wrong heading links (see bug 6563). |
106 | * |
107 | * @param string $text |
108 | * @param int $limit Cutoff point in the text to stop searching |
109 | * @return int Number of matches |
110 | */ |
111 | private static function countHeadings( $text, $limit ) { |
112 | $pat = '^(={1,6}).+\1\s*$()'; |
113 | |
114 | $count = 0; |
115 | $offset = 0; |
116 | $m = []; |
117 | while ( preg_match( "/$pat/im", $text, $m, PREG_OFFSET_CAPTURE, $offset ) ) { |
118 | if ( $m[2][1] > $limit ) { |
119 | break; |
120 | } |
121 | |
122 | $count++; |
123 | $offset = $m[2][1]; |
124 | } |
125 | |
126 | return $count; |
127 | } |
128 | |
129 | /** |
130 | * Fetches content of target page if valid and found, otherwise |
131 | * produces wikitext of a link to the target page. |
132 | * |
133 | * @param Parser $parser |
134 | * @param string $page title text of target page |
135 | * @param Title &$title normalized title object |
136 | * @param string &$text wikitext output |
137 | * @return bool true if returning text, false if target not found |
138 | */ |
139 | private static function getTemplateText( $parser, $page, &$title, &$text ) { |
140 | $title = Title::newFromText( $page ); |
141 | |
142 | if ( $title === null || $title->isExternal() ) { |
143 | $text = ''; |
144 | return true; |
145 | } else { |
146 | [ $text, $title ] = $parser->fetchTemplateAndTitle( $title ); |
147 | } |
148 | |
149 | // if article doesn't exist, return a red link. |
150 | if ( $text === false ) { |
151 | $text = "[[" . $title->getPrefixedText() . "]]"; |
152 | return false; |
153 | } else { |
154 | return true; |
155 | } |
156 | } |
157 | |
158 | /** |
159 | * Set up some variables for MW-1.12 parser functions |
160 | * @param Parser $parser |
161 | * @param PPFrame $frame |
162 | * @param array $args |
163 | * @param string $func |
164 | * @return array|string |
165 | */ |
166 | private static function setupPfunc12( $parser, $frame, $args, $func = 'lst' ) { |
167 | if ( !count( $args ) ) { |
168 | $parser->addTrackingCategory( "lst-invalid-section-category" ); |
169 | return ''; |
170 | } |
171 | |
172 | $title = Title::newFromText( trim( $frame->expand( array_shift( $args ) ) ) ); |
173 | if ( !$title || $title->isExternal() ) { |
174 | $parser->addTrackingCategory( "lst-invalid-section-category" ); |
175 | return ''; |
176 | } |
177 | if ( !$frame->loopCheck( $title ) ) { |
178 | return '<span class="error">' |
179 | . wfMessage( 'parser-template-loop-warning', $title->getPrefixedText() ) |
180 | ->inContentLanguage()->text() |
181 | . '</span>'; |
182 | } |
183 | |
184 | [ $root, $finalTitle ] = $parser->getTemplateDom( $title ); |
185 | |
186 | // if article doesn't exist, return a red link. |
187 | if ( $root === false ) { |
188 | return "[[" . $title->getPrefixedText() . "]]"; |
189 | } |
190 | |
191 | $newFrame = $frame->newChild( false, $finalTitle ); |
192 | if ( !count( $args ) ) { |
193 | return $newFrame->expand( $root ); |
194 | } |
195 | |
196 | $begin = trim( $frame->expand( array_shift( $args ) ) ); |
197 | |
198 | $repl = null; |
199 | if ( $func == 'lstx' ) { |
200 | if ( !count( $args ) ) { |
201 | $repl = ''; |
202 | } else { |
203 | $repl = trim( $frame->expand( array_shift( $args ) ) ); |
204 | } |
205 | } |
206 | |
207 | if ( !count( $args ) ) { |
208 | $end = $begin; |
209 | } else { |
210 | $end = trim( $frame->expand( array_shift( $args ) ) ); |
211 | } |
212 | |
213 | $lang = $parser->getContentLanguage()->getCode(); |
214 | $beginAttr = self::getAttrPattern( $begin, 'begin', $lang ); |
215 | $beginRegex = "/^$beginAttr$/s"; |
216 | $endAttr = self::getAttrPattern( $end, 'end', $lang ); |
217 | $endRegex = "/^$endAttr$/s"; |
218 | |
219 | return [ |
220 | 'root' => $root, |
221 | 'newFrame' => $newFrame, |
222 | 'repl' => $repl, |
223 | 'beginRegex' => $beginRegex, |
224 | 'begin' => $begin, |
225 | 'endRegex' => $endRegex, |
226 | ]; |
227 | } |
228 | |
229 | /** |
230 | * Returns true if the given extension name is "section" |
231 | * @param string $name |
232 | * @param string $lang |
233 | * @return bool |
234 | */ |
235 | private static function isSection( $name, $lang ) { |
236 | $name = strtolower( $name ); |
237 | $sectionLocal = Hooks::getLocalName( 'section', $lang ); |
238 | return ( |
239 | $name === 'section' |
240 | || ( $sectionLocal !== null && $name === strtolower( $sectionLocal ) ) |
241 | ); |
242 | } |
243 | |
244 | /** |
245 | * Returns the text for the inside of a split <section> node |
246 | * @param Parser $parser |
247 | * @param PPFrame $frame |
248 | * @param array $parts |
249 | * @return string |
250 | */ |
251 | private static function expandSectionNode( $parser, $frame, $parts ) { |
252 | if ( isset( $parts['inner'] ) ) { |
253 | return $parser->replaceVariables( $parts['inner'], $frame ); |
254 | } else { |
255 | return ''; |
256 | } |
257 | } |
258 | |
259 | /** |
260 | * @param Parser $parser |
261 | * @param PPFrame $frame |
262 | * @param array $args |
263 | * @return array|string |
264 | */ |
265 | public static function pfuncIncludeObj( $parser, $frame, $args ) { |
266 | $setup = self::setupPfunc12( $parser, $frame, $args, 'lst' ); |
267 | if ( !is_array( $setup ) ) { |
268 | return $setup; |
269 | } |
270 | |
271 | /** |
272 | * @var $root PPNode |
273 | */ |
274 | $root = $setup['root']; |
275 | /** |
276 | * @var $newFrame PPFrame |
277 | */ |
278 | $newFrame = $setup['newFrame']; |
279 | $beginRegex = $setup['beginRegex']; |
280 | $endRegex = $setup['endRegex']; |
281 | $begin = $setup['begin']; |
282 | |
283 | $lang = $parser->getContentLanguage()->getCode(); |
284 | $text = ''; |
285 | $node = $root->getFirstChild(); |
286 | $foundSection = false; |
287 | while ( $node ) { |
288 | // If the name of the begin node was specified, find it. |
289 | // Otherwise transclude everything from the beginning of the page. |
290 | if ( $begin !== '' ) { |
291 | // Find the begin node |
292 | $found = false; |
293 | for ( ; $node; $node = $node->getNextSibling() ) { |
294 | if ( $node->getName() !== 'ext' ) { |
295 | continue; |
296 | } |
297 | $parts = $node->splitExt(); |
298 | $parts = array_map( [ $newFrame, 'expand' ], $parts ); |
299 | if ( self::isSection( $parts['name'], $lang ) ) { |
300 | // @phan-suppress-next-line SecurityCheck-ReDoS |
301 | if ( preg_match( $beginRegex, $parts['attr'] ) ) { |
302 | $found = true; |
303 | $foundSection = true; |
304 | break; |
305 | } |
306 | } |
307 | } |
308 | if ( !$found || !$node ) { |
309 | break; |
310 | } |
311 | } |
312 | |
313 | // Write the text out while looking for the end node |
314 | $found = false; |
315 | for ( ; $node; $node = $node->getNextSibling() ) { |
316 | if ( $node->getName() === 'ext' ) { |
317 | $parts = $node->splitExt(); |
318 | $parts = array_map( [ $newFrame, 'expand' ], $parts ); |
319 | if ( self::isSection( $parts['name'], $lang ) ) { |
320 | // @phan-suppress-next-line SecurityCheck-ReDoS |
321 | if ( preg_match( $endRegex, $parts['attr'] ) ) { |
322 | $found = true; |
323 | $foundSection = true; |
324 | break; |
325 | } |
326 | $text .= self::expandSectionNode( $parser, $newFrame, $parts ); |
327 | } else { |
328 | $text .= $newFrame->expand( $node ); |
329 | } |
330 | } else { |
331 | $text .= $newFrame->expand( $node ); |
332 | } |
333 | } |
334 | if ( !$found ) { |
335 | break; |
336 | } elseif ( $begin === '' ) { |
337 | // When the end node was found and text is transcluded from |
338 | // the beginning of the page, finish the transclusion |
339 | break; |
340 | } |
341 | |
342 | $node = $node->getNextSibling(); |
343 | } |
344 | if ( !$foundSection ) { |
345 | $parser->addTrackingCategory( "lst-invalid-section-category" ); |
346 | } |
347 | return $text; |
348 | } |
349 | |
350 | /** |
351 | * @param Parser $parser |
352 | * @param PPFrame $frame |
353 | * @param array $args |
354 | * @return array|string |
355 | */ |
356 | public static function pfuncExcludeObj( $parser, $frame, $args ) { |
357 | $setup = self::setupPfunc12( $parser, $frame, $args, 'lstx' ); |
358 | if ( !is_array( $setup ) ) { |
359 | return $setup; |
360 | } |
361 | |
362 | /** |
363 | * @var $root PPNode |
364 | */ |
365 | $root = $setup['root']; |
366 | /** |
367 | * @var $newFrame PPFrame |
368 | */ |
369 | $newFrame = $setup['newFrame']; |
370 | $beginRegex = $setup['beginRegex']; |
371 | $endRegex = $setup['endRegex']; |
372 | $repl = $setup['repl']; |
373 | |
374 | $lang = $parser->getContentLanguage()->getCode(); |
375 | $text = ''; |
376 | // phpcs:ignore Generic.CodeAnalysis.JumbledIncrementer.Found |
377 | for ( $node = $root->getFirstChild(); $node; $node = $node ? $node->getNextSibling() : false ) { |
378 | // Search for the start tag |
379 | $found = false; |
380 | for ( ; $node; $node = $node->getNextSibling() ) { |
381 | if ( $node->getName() == 'ext' ) { |
382 | $parts = $node->splitExt(); |
383 | $parts = array_map( [ $newFrame, 'expand' ], $parts ); |
384 | if ( self::isSection( $parts['name'], $lang ) ) { |
385 | // @phan-suppress-next-line SecurityCheck-ReDoS |
386 | if ( preg_match( $beginRegex, $parts['attr'] ) ) { |
387 | $found = true; |
388 | break; |
389 | } |
390 | $text .= self::expandSectionNode( $parser, $newFrame, $parts ); |
391 | } else { |
392 | $text .= $newFrame->expand( $node ); |
393 | } |
394 | } else { |
395 | $text .= $newFrame->expand( $node ); |
396 | } |
397 | } |
398 | |
399 | if ( !$found ) { |
400 | break; |
401 | } |
402 | |
403 | // Append replacement text |
404 | $text .= $repl; |
405 | |
406 | // Search for the end tag |
407 | for ( ; $node; $node = $node->getNextSibling() ) { |
408 | if ( $node->getName() == 'ext' ) { |
409 | $parts = $node->splitExt(); |
410 | $parts = array_map( [ $newFrame, 'expand' ], $parts ); |
411 | if ( self::isSection( $parts['name'], $lang ) ) { |
412 | // @phan-suppress-next-line SecurityCheck-ReDoS |
413 | if ( preg_match( $endRegex, $parts['attr'] ) ) { |
414 | $text .= self::expandSectionNode( $parser, $newFrame, $parts ); |
415 | break; |
416 | } |
417 | } |
418 | } |
419 | } |
420 | } |
421 | return $text; |
422 | } |
423 | |
424 | /** |
425 | * section inclusion - include all matching sections |
426 | * |
427 | * A parser extension that further extends labeled section transclusion, |
428 | * adding a function, #lsth for transcluding marked sections of text, |
429 | * |
430 | * @todo MW 1.12 version, as per #lst/#lstx |
431 | * |
432 | * @param Parser $parser |
433 | * @param string $page |
434 | * @param string $sec |
435 | * @param string $to |
436 | * @return mixed|string |
437 | */ |
438 | public static function pfuncIncludeHeading( $parser, $page = '', $sec = '', $to = '' ) { |
439 | if ( self::getTemplateText( $parser, $page, $title, $text ) == false ) { |
440 | return $text; |
441 | } |
442 | |
443 | // Generate a regex to match the === classical heading section(s) === we're |
444 | // interested in. |
445 | if ( $sec == '' ) { |
446 | $begin_off = 0; |
447 | $head_len = 6; |
448 | } else { |
449 | $pat = '^(={1,6})\s*' . preg_quote( $sec, '/' ) . '\s*\1\s*($)'; |
450 | if ( preg_match( "/$pat/im", $text, $m, PREG_OFFSET_CAPTURE ) ) { |
451 | $begin_off = $m[2][1]; |
452 | $head_len = strlen( $m[1][0] ); |
453 | } else { |
454 | $parser->addTrackingCategory( "lst-invalid-section-category" ); |
455 | return ''; |
456 | } |
457 | |
458 | } |
459 | |
460 | $end_off = null; |
461 | if ( $to != '' ) { |
462 | // if $to is supplied, try and match it. If we don't match, just |
463 | // ignore it. |
464 | $pat = '^(={1,6})\s*' . preg_quote( $to, '/' ) . '\s*\1\s*$'; |
465 | if ( preg_match( "/$pat/im", $text, $m, PREG_OFFSET_CAPTURE, $begin_off ) ) { |
466 | $end_off = $m[0][1] - 1; |
467 | } |
468 | } |
469 | |
470 | if ( $end_off === null ) { |
471 | $pat = '^(={1,' . $head_len . '})(?!=).*?\1\s*$'; |
472 | if ( preg_match( "/$pat/im", $text, $m, PREG_OFFSET_CAPTURE, $begin_off ) ) { |
473 | $end_off = $m[0][1] - 1; |
474 | } |
475 | } |
476 | |
477 | $nhead = self::countHeadings( $text, $begin_off ); |
478 | |
479 | if ( $end_off !== null ) { |
480 | $result = substr( $text, $begin_off, $end_off - $begin_off ); |
481 | } else { |
482 | $result = substr( $text, $begin_off ); |
483 | } |
484 | |
485 | $frame = $parser->getPreprocessor()->newFrame(); |
486 | $dom = $parser->preprocessToDom( $result, Parser::PTD_FOR_INCLUSION ); |
487 | $result = $frame->expand( $dom ); |
488 | $result = trim( $result ); |
489 | |
490 | return self::parse( $parser, $title, $result, "#lsth:{$page}|{$sec}", $nhead ); |
491 | } |
492 | } |