MediaWiki REL1_32
BlockLevelPass.php
Go to the documentation of this file.
1<?php
2
26 private $DTopen = false;
27 private $inPre = false;
28 private $lastSection = '';
29 private $lineStart;
30 private $text;
31
32 # State constants for the definition list colon extraction
34 const COLON_STATE_TAG = 1;
41 const COLON_STATE_LC = 8;
42
50 public static function doBlockLevels( $text, $lineStart ) {
51 $pass = new self( $text, $lineStart );
52 return $pass->execute();
53 }
54
58 private function __construct( $text, $lineStart ) {
59 $this->text = $text;
60 $this->lineStart = $lineStart;
61 }
62
68 private function closeParagraph() {
69 $result = '';
70 if ( $this->lastSection !== '' ) {
71 $result = '</' . $this->lastSection . ">\n";
72 }
73 $this->inPre = false;
74 $this->lastSection = '';
75 return $result;
76 }
77
87 private function getCommon( $st1, $st2 ) {
88 $shorter = min( strlen( $st1 ), strlen( $st2 ) );
89
90 for ( $i = 0; $i < $shorter; ++$i ) {
91 if ( $st1[$i] !== $st2[$i] ) {
92 break;
93 }
94 }
95 return $i;
96 }
97
105 private function openList( $char ) {
106 $result = $this->closeParagraph();
107
108 if ( '*' === $char ) {
109 $result .= "<ul><li>";
110 } elseif ( '#' === $char ) {
111 $result .= "<ol><li>";
112 } elseif ( ':' === $char ) {
113 $result .= "<dl><dd>";
114 } elseif ( ';' === $char ) {
115 $result .= "<dl><dt>";
116 $this->DTopen = true;
117 } else {
118 $result = '<!-- ERR 1 -->';
119 }
120
121 return $result;
122 }
123
130 private function nextItem( $char ) {
131 if ( '*' === $char || '#' === $char ) {
132 return "</li>\n<li>";
133 } elseif ( ':' === $char || ';' === $char ) {
134 $close = "</dd>\n";
135 if ( $this->DTopen ) {
136 $close = "</dt>\n";
137 }
138 if ( ';' === $char ) {
139 $this->DTopen = true;
140 return $close . '<dt>';
141 } else {
142 $this->DTopen = false;
143 return $close . '<dd>';
144 }
145 }
146 return '<!-- ERR 2 -->';
147 }
148
155 private function closeList( $char ) {
156 if ( '*' === $char ) {
157 $text = "</li></ul>";
158 } elseif ( '#' === $char ) {
159 $text = "</li></ol>";
160 } elseif ( ':' === $char ) {
161 if ( $this->DTopen ) {
162 $this->DTopen = false;
163 $text = "</dt></dl>";
164 } else {
165 $text = "</dd></dl>";
166 }
167 } else {
168 return '<!-- ERR 3 -->';
169 }
170 return $text;
171 }
172
177 private function execute() {
179 # Parsing through the text line by line. The main thing
180 # happening here is handling of block-level elements p, pre,
181 # and making lists from lines starting with * # : etc.
182 $textLines = StringUtils::explode( "\n", $text );
183
184 $lastPrefix = $output = '';
185 $this->DTopen = $inBlockElem = false;
186 $prefixLength = 0;
187 $pendingPTag = false;
188 $inBlockquote = false;
189
190 foreach ( $textLines as $inputLine ) {
191 # Fix up $lineStart
192 if ( !$this->lineStart ) {
193 $output .= $inputLine;
194 $this->lineStart = true;
195 continue;
196 }
197 # * = ul
198 # # = ol
199 # ; = dt
200 # : = dd
201
202 $lastPrefixLength = strlen( $lastPrefix );
203 $preCloseMatch = preg_match( '/<\\/pre/i', $inputLine );
204 $preOpenMatch = preg_match( '/<pre/i', $inputLine );
205 # If not in a <pre> element, scan for and figure out what prefixes are there.
206 if ( !$this->inPre ) {
207 # Multiple prefixes may abut each other for nested lists.
208 $prefixLength = strspn( $inputLine, '*#:;' );
209 $prefix = substr( $inputLine, 0, $prefixLength );
210
211 # eh?
212 # ; and : are both from definition-lists, so they're equivalent
213 # for the purposes of determining whether or not we need to open/close
214 # elements.
215 $prefix2 = str_replace( ';', ':', $prefix );
216 $t = substr( $inputLine, $prefixLength );
217 $this->inPre = (bool)$preOpenMatch;
218 } else {
219 # Don't interpret any other prefixes in preformatted text
220 $prefixLength = 0;
221 $prefix = $prefix2 = '';
222 $t = $inputLine;
223 }
224
225 # List generation
226 if ( $prefixLength && $lastPrefix === $prefix2 ) {
227 # Same as the last item, so no need to deal with nesting or opening stuff
228 $output .= $this->nextItem( substr( $prefix, -1 ) );
229 $pendingPTag = false;
230
231 if ( substr( $prefix, -1 ) === ';' ) {
232 # The one nasty exception: definition lists work like this:
233 # ; title : definition text
234 # So we check for : in the remainder text to split up the
235 # title and definition, without b0rking links.
236 $term = $t2 = '';
237 if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
238 $t = $t2;
239 // Trim whitespace in list items
240 $output .= trim( $term ) . $this->nextItem( ':' );
241 }
242 }
243 } elseif ( $prefixLength || $lastPrefixLength ) {
244 # We need to open or close prefixes, or both.
245
246 # Either open or close a level...
247 $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
248 $pendingPTag = false;
249
250 # Close all the prefixes which aren't shared.
251 while ( $commonPrefixLength < $lastPrefixLength ) {
252 $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );
253 --$lastPrefixLength;
254 }
255
256 # Continue the current prefix if appropriate.
257 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
258 $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );
259 }
260
261 # Close an open <dt> if we have a <dd> (":") starting on this line
262 if ( $this->DTopen && $commonPrefixLength > 0 && $prefix[$commonPrefixLength - 1] === ':' ) {
263 $output .= $this->nextItem( ':' );
264 }
265
266 # Open prefixes where appropriate.
267 if ( $lastPrefix && $prefixLength > $commonPrefixLength ) {
268 $output .= "\n";
269 }
270 while ( $prefixLength > $commonPrefixLength ) {
271 $char = $prefix[$commonPrefixLength];
272 $output .= $this->openList( $char );
273
274 if ( ';' === $char ) {
275 # @todo FIXME: This is dupe of code above
276 if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
277 $t = $t2;
278 // Trim whitespace in list items
279 $output .= trim( $term ) . $this->nextItem( ':' );
280 }
281 }
282 ++$commonPrefixLength;
283 }
284 if ( !$prefixLength && $lastPrefix ) {
285 $output .= "\n";
286 }
287 $lastPrefix = $prefix2;
288 }
289
290 # If we have no prefixes, go to paragraph mode.
291 if ( 0 == $prefixLength ) {
292 # No prefix (not in list)--go to paragraph mode
293 # @todo consider using a stack for nestable elements like span, table and div
294
295 // P-wrapping and indent-pre are suppressed inside, not outside
296 $blockElems = 'table|h1|h2|h3|h4|h5|h6|pre|p|ul|ol|dl';
297 // P-wrapping and indent-pre are suppressed outside, not inside
298 $antiBlockElems = 'td|th';
299
300 $openMatch = preg_match(
301 '/<('
302 . "({$blockElems})|\\/({$antiBlockElems})|"
303 // Always suppresses
304 . '\\/?(tr|dt|dd|li)'
305 . ')\\b/iS',
306 $t
307 );
308 $closeMatch = preg_match(
309 '/<('
310 . "\\/({$blockElems})|({$antiBlockElems})|"
311 // Never suppresses
312 . '\\/?(center|blockquote|div|hr|mw:)'
313 . ')\\b/iS',
314 $t
315 );
316
317 // Any match closes the paragraph, but only when `!$closeMatch`
318 // do we enter block mode. The oddities with table rows and
319 // cells are to avoid paragraph wrapping in interstitial spaces
320 // leading to fostered content.
321
322 if ( $openMatch || $closeMatch ) {
323 $pendingPTag = false;
324 // Only close the paragraph if we're not inside a <pre> tag, or if
325 // that <pre> tag has just been opened
326 if ( !$this->inPre || $preOpenMatch ) {
327 // @todo T7718: paragraph closed
328 $output .= $this->closeParagraph();
329 }
330 if ( $preOpenMatch && !$preCloseMatch ) {
331 $this->inPre = true;
332 }
333 $bqOffset = 0;
334 while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t,
335 $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset )
336 ) {
337 $inBlockquote = !$bqMatch[1][0]; // is this a close tag?
338 $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );
339 }
340 $inBlockElem = !$closeMatch;
341 } elseif ( !$inBlockElem && !$this->inPre ) {
342 if ( ' ' == substr( $t, 0, 1 )
343 && ( $this->lastSection === 'pre' || trim( $t ) != '' )
344 && !$inBlockquote
345 ) {
346 # pre
347 if ( $this->lastSection !== 'pre' ) {
348 $pendingPTag = false;
349 $output .= $this->closeParagraph() . '<pre>';
350 $this->lastSection = 'pre';
351 }
352 $t = substr( $t, 1 );
353 } elseif ( preg_match( '/^(?:<style\\b[^>]*>.*?<\\/style>\s*|<link\\b[^>]*>\s*)+$/iS', $t ) ) {
354 # T186965: <style> or <link> by itself on a line shouldn't open or close paragraphs.
355 # But it should clear $pendingPTag.
356 if ( $pendingPTag ) {
357 $output .= $this->closeParagraph();
358 $pendingPTag = false;
359 $this->lastSection = '';
360 }
361 } else {
362 # paragraph
363 if ( trim( $t ) === '' ) {
364 if ( $pendingPTag ) {
365 $output .= $pendingPTag . '<br />';
366 $pendingPTag = false;
367 $this->lastSection = 'p';
368 } else {
369 if ( $this->lastSection !== 'p' ) {
370 $output .= $this->closeParagraph();
371 $this->lastSection = '';
372 $pendingPTag = '<p>';
373 } else {
374 $pendingPTag = '</p><p>';
375 }
376 }
377 } else {
378 if ( $pendingPTag ) {
379 $output .= $pendingPTag;
380 $pendingPTag = false;
381 $this->lastSection = 'p';
382 } elseif ( $this->lastSection !== 'p' ) {
383 $output .= $this->closeParagraph() . '<p>';
384 $this->lastSection = 'p';
385 }
386 }
387 }
388 }
389 }
390 # somewhere above we forget to get out of pre block (T2785)
391 if ( $preCloseMatch && $this->inPre ) {
392 $this->inPre = false;
393 }
394 if ( $pendingPTag === false ) {
395 if ( $prefixLength === 0 ) {
396 $output .= $t;
397 $output .= "\n";
398 } else {
399 // Trim whitespace in list items
400 $output .= trim( $t );
401 }
402 }
403 }
404 while ( $prefixLength ) {
405 $output .= $this->closeList( $prefix2[$prefixLength - 1] );
406 --$prefixLength;
407 if ( !$prefixLength ) {
408 $output .= "\n";
409 }
410 }
411 if ( $this->lastSection !== '' ) {
412 $output .= '</' . $this->lastSection . '>';
413 $this->lastSection = '';
414 }
415
416 return $output;
417 }
418
429 private function findColonNoLinks( $str, &$before, &$after ) {
430 if ( !preg_match( '/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE ) ) {
431 # Nothing to find!
432 return false;
433 }
434
435 if ( $m[0][0] === ':' ) {
436 # Easy; no tag nesting to worry about
437 $colonPos = $m[0][1];
438 $before = substr( $str, 0, $colonPos );
439 $after = substr( $str, $colonPos + 1 );
440 return $colonPos;
441 }
442
443 # Ugly state machine to walk through avoiding tags.
444 $state = self::COLON_STATE_TEXT;
445 $ltLevel = 0;
446 $lcLevel = 0;
447 $len = strlen( $str );
448 for ( $i = $m[0][1]; $i < $len; $i++ ) {
449 $c = $str[$i];
450
451 switch ( $state ) {
453 switch ( $c ) {
454 case "<":
455 # Could be either a <start> tag or an </end> tag
457 break;
458 case ":":
459 if ( $ltLevel === 0 ) {
460 # We found it!
461 $before = substr( $str, 0, $i );
462 $after = substr( $str, $i + 1 );
463 return $i;
464 }
465 # Embedded in a tag; don't break it.
466 break;
467 default:
468 # Skip ahead looking for something interesting
469 if ( !preg_match( '/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE, $i ) ) {
470 # Nothing else interesting
471 return false;
472 }
473 if ( $m[0][0] === '-{' ) {
474 $state = self::COLON_STATE_LC;
475 $lcLevel++;
476 $i = $m[0][1] + 1;
477 } else {
478 # Skip ahead to next interesting character.
479 $i = $m[0][1] - 1;
480 }
481 break;
482 }
483 break;
485 # In language converter markup -{ ... }-
486 if ( !preg_match( '/-\{|\}-/', $str, $m, PREG_OFFSET_CAPTURE, $i ) ) {
487 # Nothing else interesting to find; abort!
488 # We're nested in language converter markup, but there
489 # are no close tags left. Abort!
490 break 2;
491 } elseif ( $m[0][0] === '-{' ) {
492 $i = $m[0][1] + 1;
493 $lcLevel++;
494 } elseif ( $m[0][0] === '}-' ) {
495 $i = $m[0][1] + 1;
496 $lcLevel--;
497 if ( $lcLevel === 0 ) {
498 $state = self::COLON_STATE_TEXT;
499 }
500 }
501 break;
503 # In a <tag>
504 switch ( $c ) {
505 case ">":
506 $ltLevel++;
507 $state = self::COLON_STATE_TEXT;
508 break;
509 case "/":
510 # Slash may be followed by >?
512 break;
513 default:
514 # ignore
515 }
516 break;
518 switch ( $c ) {
519 case "/":
521 break;
522 case "!":
524 break;
525 case ">":
526 # Illegal early close? This shouldn't happen D:
527 $state = self::COLON_STATE_TEXT;
528 break;
529 default:
530 $state = self::COLON_STATE_TAG;
531 }
532 break;
534 # In a </tag>
535 if ( $c === ">" ) {
536 if ( $ltLevel > 0 ) {
537 $ltLevel--;
538 } else {
539 # ignore the excess close tag, but keep looking for
540 # colons. (This matches Parsoid behavior.)
541 wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" );
542 }
543 $state = self::COLON_STATE_TEXT;
544 }
545 break;
547 if ( $c === ">" ) {
548 # Yes, a self-closed tag <blah/>
549 $state = self::COLON_STATE_TEXT;
550 } else {
551 # Probably we're jumping the gun, and this is an attribute
552 $state = self::COLON_STATE_TAG;
553 }
554 break;
556 if ( $c === "-" ) {
558 }
559 break;
561 if ( $c === "-" ) {
563 } else {
565 }
566 break;
568 if ( $c === ">" ) {
569 $state = self::COLON_STATE_TEXT;
570 } else {
572 }
573 break;
574 default:
575 throw new MWException( "State machine error in " . __METHOD__ );
576 }
577 }
578 if ( $ltLevel > 0 || $lcLevel > 0 ) {
579 wfDebug(
580 __METHOD__ . ": Invalid input; not enough close tags " .
581 "(level $ltLevel/$lcLevel, state $state)\n"
582 );
583 return false;
584 }
585 return false;
586 }
587}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
openList( $char)
Open the list item element identified by the prefix character.
const COLON_STATE_COMMENTDASHDASH
__construct( $text, $lineStart)
Private constructor.
nextItem( $char)
Close the current list item and open the next one.
execute()
Execute the pass.
getCommon( $st1, $st2)
getCommon() returns the length of the longest common substring of both arguments, starting at the beg...
static doBlockLevels( $text, $lineStart)
Make lists from lines starting with ':', '*', '#', etc.
findColonNoLinks( $str, &$before, &$after)
Split up a string on ':', ignoring any occurrences inside tags to prevent illegal overlapping.
const COLON_STATE_COMMENTDASH
closeParagraph()
If a pre or p is open, return the corresponding close tag and update the state.
closeList( $char)
Close the current list item identified by the prefix character.
MediaWiki exception.
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
namespace being checked & $result
Definition hooks.txt:2385
For QUnit the mediawiki tests qunit testrunner dependency will be added to any module whereas SearchGetNearMatch runs after $term
Definition hooks.txt:2926
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place $output
Definition hooks.txt:2317