REL1_32/php/BlockLevelPass_8php_source.html

<?php


class BlockLevelPass {

    private $DTopen = false;

    private $inPre = false;

    private $lastSection = '';

    private $lineStart;

    private $text;


    # State constants for the definition list colon extraction

    const COLON_STATE_TEXT = 0;

    const COLON_STATE_TAG = 1;

    const COLON_STATE_TAGSTART = 2;

    const COLON_STATE_CLOSETAG = 3;

    const COLON_STATE_TAGSLASH = 4;

    const COLON_STATE_COMMENT = 5;

    const COLON_STATE_COMMENTDASH = 6;

    const COLON_STATE_COMMENTDASHDASH = 7;

    const COLON_STATE_LC = 8;


    public static function doBlockLevels( $text, $lineStart ) {

        $pass = new self( $text, $lineStart );

        return $pass->execute();

    }


    private function __construct( $text, $lineStart ) {

        $this->text = $text;

        $this->lineStart = $lineStart;

    }


    private function closeParagraph() {

        $result = '';

        if ( $this->lastSection !== '' ) {

            $result = '</' . $this->lastSection . ">\n";

        }

        $this->inPre = false;

        $this->lastSection = '';

        return $result;

    }


    private function getCommon( $st1, $st2 ) {

        $shorter = min( strlen( $st1 ), strlen( $st2 ) );


        for ( $i = 0; $i < $shorter; ++$i ) {

            if ( $st1[$i] !== $st2[$i] ) {

                break;

            }

        }

        return $i;

    }


    private function openList( $char ) {

        $result = $this->closeParagraph();


        if ( '*' === $char ) {

            $result .= "<ul><li>";

        } elseif ( '#' === $char ) {

            $result .= "<ol><li>";

        } elseif ( ':' === $char ) {

            $result .= "<dl><dd>";

        } elseif ( ';' === $char ) {

            $result .= "<dl><dt>";

            $this->DTopen = true;

        } else {

            $result = '<!-- ERR 1 -->';

        }


        return $result;

    }


    private function nextItem( $char ) {

        if ( '*' === $char || '#' === $char ) {

            return "</li>\n<li>";

        } elseif ( ':' === $char || ';' === $char ) {

            $close = "</dd>\n";

            if ( $this->DTopen ) {

                $close = "</dt>\n";

            }

            if ( ';' === $char ) {

                $this->DTopen = true;

                return $close . '<dt>';

            } else {

                $this->DTopen = false;

                return $close . '<dd>';

            }

        }

        return '<!-- ERR 2 -->';

    }


    private function closeList( $char ) {

        if ( '*' === $char ) {

            $text = "</li></ul>";

        } elseif ( '#' === $char ) {

            $text = "</li></ol>";

        } elseif ( ':' === $char ) {

            if ( $this->DTopen ) {

                $this->DTopen = false;

                $text = "</dt></dl>";

            } else {

                $text = "</dd></dl>";

            }

        } else {

            return '<!-- ERR 3 -->';

        }

        return $text;

    }


    private function execute() {

        $text = $this->text;

        # Parsing through the text line by line.  The main thing

        # happening here is handling of block-level elements p, pre,

        # and making lists from lines starting with * # : etc.

        $textLines = StringUtils::explode( "\n", $text );


        $lastPrefix = $output = '';

        $this->DTopen = $inBlockElem = false;

        $prefixLength = 0;

        $pendingPTag = false;

        $inBlockquote = false;


        foreach ( $textLines as $inputLine ) {

            # Fix up $lineStart

            if ( !$this->lineStart ) {

                $output .= $inputLine;

                $this->lineStart = true;

                continue;

            }

            # * = ul

            # # = ol

            # ; = dt

            # : = dd


            $lastPrefixLength = strlen( $lastPrefix );

            $preCloseMatch = preg_match( '/<\\/pre/i', $inputLine );

            $preOpenMatch = preg_match( '/<pre/i', $inputLine );

            # If not in a <pre> element, scan for and figure out what prefixes are there.

            if ( !$this->inPre ) {

                # Multiple prefixes may abut each other for nested lists.

                $prefixLength = strspn( $inputLine, '*#:;' );

                $prefix = substr( $inputLine, 0, $prefixLength );


                # eh?

                # ; and : are both from definition-lists, so they're equivalent

                #  for the purposes of determining whether or not we need to open/close

                #  elements.

                $prefix2 = str_replace( ';', ':', $prefix );

                $t = substr( $inputLine, $prefixLength );

                $this->inPre = (bool)$preOpenMatch;

            } else {

                # Don't interpret any other prefixes in preformatted text

                $prefixLength = 0;

                $prefix = $prefix2 = '';

                $t = $inputLine;

            }


            # List generation

            if ( $prefixLength && $lastPrefix === $prefix2 ) {

                # Same as the last item, so no need to deal with nesting or opening stuff

                $output .= $this->nextItem( substr( $prefix, -1 ) );

                $pendingPTag = false;


                if ( substr( $prefix, -1 ) === ';' ) {

                    # The one nasty exception: definition lists work like this:

                    # ; title : definition text

                    # So we check for : in the remainder text to split up the

                    # title and definition, without b0rking links.

                    $term = $t2 = '';

                    if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {

                        $t = $t2;

                        // Trim whitespace in list items

                        $output .= trim( $term ) . $this->nextItem( ':' );

                    }

                }

            } elseif ( $prefixLength || $lastPrefixLength ) {

                # We need to open or close prefixes, or both.


                # Either open or close a level...

                $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );

                $pendingPTag = false;


                # Close all the prefixes which aren't shared.

                while ( $commonPrefixLength < $lastPrefixLength ) {

                    $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );

                    --$lastPrefixLength;

                }


                # Continue the current prefix if appropriate.

                if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {

                    $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );

                }


                # Close an open <dt> if we have a <dd> (":") starting on this line

                if ( $this->DTopen && $commonPrefixLength > 0 && $prefix[$commonPrefixLength - 1] === ':' ) {

                    $output .= $this->nextItem( ':' );

                }


                # Open prefixes where appropriate.

                if ( $lastPrefix && $prefixLength > $commonPrefixLength ) {

                    $output .= "\n";

                }

                while ( $prefixLength > $commonPrefixLength ) {

                    $char = $prefix[$commonPrefixLength];

                    $output .= $this->openList( $char );


                    if ( ';' === $char ) {

                        # @todo FIXME: This is dupe of code above

                        if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {

                            $t = $t2;

                            // Trim whitespace in list items

                            $output .= trim( $term ) . $this->nextItem( ':' );

                        }

                    }

                    ++$commonPrefixLength;

                }

                if ( !$prefixLength && $lastPrefix ) {

                    $output .= "\n";

                }

                $lastPrefix = $prefix2;

            }


            # If we have no prefixes, go to paragraph mode.

            if ( 0 == $prefixLength ) {

                # No prefix (not in list)--go to paragraph mode

                # @todo consider using a stack for nestable elements like span, table and div


                // P-wrapping and indent-pre are suppressed inside, not outside

                $blockElems = 'table|h1|h2|h3|h4|h5|h6|pre|p|ul|ol|dl';

                // P-wrapping and indent-pre are suppressed outside, not inside

                $antiBlockElems = 'td|th';


                $openMatch = preg_match(

                    '/<('

                        . "({$blockElems})|\\/({$antiBlockElems})|"

                        // Always suppresses

                        . '\\/?(tr|dt|dd|li)'

                        . ')\\b/iS',

                    $t

                );

                $closeMatch = preg_match(

                    '/<('

                        . "\\/({$blockElems})|({$antiBlockElems})|"

                        // Never suppresses

                        . '\\/?(center|blockquote|div|hr|mw:)'

                        . ')\\b/iS',

                    $t

                );


                // Any match closes the paragraph, but only when `!$closeMatch`

                // do we enter block mode.  The oddities with table rows and

                // cells are to avoid paragraph wrapping in interstitial spaces

                // leading to fostered content.


                if ( $openMatch || $closeMatch ) {

                    $pendingPTag = false;

                    // Only close the paragraph if we're not inside a <pre> tag, or if

                    // that <pre> tag has just been opened

                    if ( !$this->inPre || $preOpenMatch ) {

                        // @todo T7718: paragraph closed

                        $output .= $this->closeParagraph();

                    }

                    if ( $preOpenMatch && !$preCloseMatch ) {

                        $this->inPre = true;

                    }

                    $bqOffset = 0;

                    while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t,

                        $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset )

                    ) {

                        $inBlockquote = !$bqMatch[1][0]; // is this a close tag?

                        $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );

                    }

                    $inBlockElem = !$closeMatch;

                } elseif ( !$inBlockElem && !$this->inPre ) {

                    if ( ' ' == substr( $t, 0, 1 )

                        && ( $this->lastSection === 'pre' || trim( $t ) != '' )

                        && !$inBlockquote

                    ) {

                        # pre

                        if ( $this->lastSection !== 'pre' ) {

                            $pendingPTag = false;

                            $output .= $this->closeParagraph() . '<pre>';

                            $this->lastSection = 'pre';

                        }

                        $t = substr( $t, 1 );

                    } elseif ( preg_match( '/^(?:<style\\b[^>]*>.*?<\\/style>\s*|<link\\b[^>]*>\s*)+$/iS', $t ) ) {

                        # T186965: <style> or <link> by itself on a line shouldn't open or close paragraphs.

                        # But it should clear $pendingPTag.

                        if ( $pendingPTag ) {

                            $output .= $this->closeParagraph();

                            $pendingPTag = false;

                            $this->lastSection = '';

                        }

                    } else {

                        # paragraph

                        if ( trim( $t ) === '' ) {

                            if ( $pendingPTag ) {

                                $output .= $pendingPTag . '<br />';

                                $pendingPTag = false;

                                $this->lastSection = 'p';

                            } else {

                                if ( $this->lastSection !== 'p' ) {

                                    $output .= $this->closeParagraph();

                                    $this->lastSection = '';

                                    $pendingPTag = '<p>';

                                } else {

                                    $pendingPTag = '</p><p>';

                                }

                            }

                        } else {

                            if ( $pendingPTag ) {

                                $output .= $pendingPTag;

                                $pendingPTag = false;

                                $this->lastSection = 'p';

                            } elseif ( $this->lastSection !== 'p' ) {

                                $output .= $this->closeParagraph() . '<p>';

                                $this->lastSection = 'p';

                            }

                        }

                    }

                }

            }

            # somewhere above we forget to get out of pre block (T2785)

            if ( $preCloseMatch && $this->inPre ) {

                $this->inPre = false;

            }

            if ( $pendingPTag === false ) {

                if ( $prefixLength === 0 ) {

                    $output .= $t;

                    $output .= "\n";

                } else {

                    // Trim whitespace in list items

                    $output .= trim( $t );

                }

            }

        }

        while ( $prefixLength ) {

            $output .= $this->closeList( $prefix2[$prefixLength - 1] );

            --$prefixLength;

            if ( !$prefixLength ) {

                $output .= "\n";

            }

        }

        if ( $this->lastSection !== '' ) {

            $output .= '</' . $this->lastSection . '>';

            $this->lastSection = '';

        }


        return $output;

    }


    private function findColonNoLinks( $str, &$before, &$after ) {

        if ( !preg_match( '/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE ) ) {

            # Nothing to find!

            return false;

        }


        if ( $m[0][0] === ':' ) {

            # Easy; no tag nesting to worry about

            $colonPos = $m[0][1];

            $before = substr( $str, 0, $colonPos );

            $after = substr( $str, $colonPos + 1 );

            return $colonPos;

        }


        # Ugly state machine to walk through avoiding tags.

        $state = self::COLON_STATE_TEXT;

        $ltLevel = 0;

        $lcLevel = 0;

        $len = strlen( $str );

        for ( $i = $m[0][1]; $i < $len; $i++ ) {

            $c = $str[$i];


            switch ( $state ) {

                case self::COLON_STATE_TEXT:

                    switch ( $c ) {

                        case "<":

                            # Could be either a <start> tag or an </end> tag

                            $state = self::COLON_STATE_TAGSTART;

                            break;

                        case ":":

                            if ( $ltLevel === 0 ) {

                                # We found it!

                                $before = substr( $str, 0, $i );

                                $after = substr( $str, $i + 1 );

                                return $i;

                            }

                            # Embedded in a tag; don't break it.

                            break;

                        default:

                            # Skip ahead looking for something interesting

                            if ( !preg_match( '/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE, $i ) ) {

                                # Nothing else interesting

                                return false;

                            }

                            if ( $m[0][0] === '-{' ) {

                                $state = self::COLON_STATE_LC;

                                $lcLevel++;

                                $i = $m[0][1] + 1;

                            } else {

                                # Skip ahead to next interesting character.

                                $i = $m[0][1] - 1;

                            }

                            break;

                    }

                    break;

                case self::COLON_STATE_LC:

                    # In language converter markup -{ ... }-

                    if ( !preg_match( '/-\{|\}-/', $str, $m, PREG_OFFSET_CAPTURE, $i ) ) {

                        # Nothing else interesting to find; abort!

                        # We're nested in language converter markup, but there

                        # are no close tags left.  Abort!

                        break 2;

                    } elseif ( $m[0][0] === '-{' ) {

                        $i = $m[0][1] + 1;

                        $lcLevel++;

                    } elseif ( $m[0][0] === '}-' ) {

                        $i = $m[0][1] + 1;

                        $lcLevel--;

                        if ( $lcLevel === 0 ) {

                            $state = self::COLON_STATE_TEXT;

                        }

                    }

                    break;

                case self::COLON_STATE_TAG:

                    # In a <tag>

                    switch ( $c ) {

                        case ">":

                            $ltLevel++;

                            $state = self::COLON_STATE_TEXT;

                            break;

                        case "/":

                            # Slash may be followed by >?

                            $state = self::COLON_STATE_TAGSLASH;

                            break;

                        default:

                            # ignore

                    }

                    break;

                case self::COLON_STATE_TAGSTART:

                    switch ( $c ) {

                        case "/":

                            $state = self::COLON_STATE_CLOSETAG;

                            break;

                        case "!":

                            $state = self::COLON_STATE_COMMENT;

                            break;

                        case ">":

                            # Illegal early close? This shouldn't happen D:

                            $state = self::COLON_STATE_TEXT;

                            break;

                        default:

                            $state = self::COLON_STATE_TAG;

                    }

                    break;

                case self::COLON_STATE_CLOSETAG:

                    # In a </tag>

                    if ( $c === ">" ) {

                        if ( $ltLevel > 0 ) {

                            $ltLevel--;

                        } else {

                            # ignore the excess close tag, but keep looking for

                            # colons. (This matches Parsoid behavior.)

                            wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" );

                        }

                        $state = self::COLON_STATE_TEXT;

                    }

                    break;

                case self::COLON_STATE_TAGSLASH:

                    if ( $c === ">" ) {

                        # Yes, a self-closed tag <blah/>

                        $state = self::COLON_STATE_TEXT;

                    } else {

                        # Probably we're jumping the gun, and this is an attribute

                        $state = self::COLON_STATE_TAG;

                    }

                    break;

                case self::COLON_STATE_COMMENT:

                    if ( $c === "-" ) {

                        $state = self::COLON_STATE_COMMENTDASH;

                    }

                    break;

                case self::COLON_STATE_COMMENTDASH:

                    if ( $c === "-" ) {

                        $state = self::COLON_STATE_COMMENTDASHDASH;

                    } else {

                        $state = self::COLON_STATE_COMMENT;

                    }

                    break;

                case self::COLON_STATE_COMMENTDASHDASH:

                    if ( $c === ">" ) {

                        $state = self::COLON_STATE_TEXT;

                    } else {

                        $state = self::COLON_STATE_COMMENT;

                    }

                    break;

                default:

                    throw new MWException( "State machine error in " . __METHOD__ );

            }

        }

        if ( $ltLevel > 0 || $lcLevel > 0 ) {

            wfDebug(

                __METHOD__ . ": Invalid input; not enough close tags " .

                "(level $ltLevel/$lcLevel, state $state)\n"

            );

            return false;

        }

        return false;

    }


}


wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition GlobalFunctions.php:1000

BlockLevelPass
Definition BlockLevelPass.php:25

BlockLevelPass\COLON_STATE_CLOSETAG
const COLON_STATE_CLOSETAG
Definition BlockLevelPass.php:36

BlockLevelPass\openList
openList( $char)
Open the list item element identified by the prefix character.
Definition BlockLevelPass.php:105

BlockLevelPass\$inPre
$inPre
Definition BlockLevelPass.php:27

BlockLevelPass\COLON_STATE_LC
const COLON_STATE_LC
Definition BlockLevelPass.php:41

BlockLevelPass\COLON_STATE_COMMENTDASHDASH
const COLON_STATE_COMMENTDASHDASH
Definition BlockLevelPass.php:40

BlockLevelPass\$DTopen
$DTopen
Definition BlockLevelPass.php:26

BlockLevelPass\$text
$text
Definition BlockLevelPass.php:30

BlockLevelPass\COLON_STATE_TAGSLASH
const COLON_STATE_TAGSLASH
Definition BlockLevelPass.php:37

BlockLevelPass\__construct
__construct( $text, $lineStart)
Private constructor.
Definition BlockLevelPass.php:58

BlockLevelPass\nextItem
nextItem( $char)
Close the current list item and open the next one.
Definition BlockLevelPass.php:130

BlockLevelPass\COLON_STATE_TAG
const COLON_STATE_TAG
Definition BlockLevelPass.php:34

BlockLevelPass\execute
execute()
Execute the pass.
Definition BlockLevelPass.php:177

BlockLevelPass\getCommon
getCommon( $st1, $st2)
getCommon() returns the length of the longest common substring of both arguments, starting at the beg...
Definition BlockLevelPass.php:87

BlockLevelPass\doBlockLevels
static doBlockLevels( $text, $lineStart)
Make lists from lines starting with ':', '*', '#', etc.
Definition BlockLevelPass.php:50

BlockLevelPass\COLON_STATE_COMMENT
const COLON_STATE_COMMENT
Definition BlockLevelPass.php:38

BlockLevelPass\$lineStart
$lineStart
Definition BlockLevelPass.php:29

BlockLevelPass\findColonNoLinks
findColonNoLinks( $str, &$before, &$after)
Split up a string on ':', ignoring any occurrences inside tags to prevent illegal overlapping.
Definition BlockLevelPass.php:429

BlockLevelPass\COLON_STATE_COMMENTDASH
const COLON_STATE_COMMENTDASH
Definition BlockLevelPass.php:39

BlockLevelPass\closeParagraph
closeParagraph()
If a pre or p is open, return the corresponding close tag and update the state.
Definition BlockLevelPass.php:68

BlockLevelPass\closeList
closeList( $char)
Close the current list item identified by the prefix character.
Definition BlockLevelPass.php:155

BlockLevelPass\COLON_STATE_TEXT
const COLON_STATE_TEXT
Definition BlockLevelPass.php:33

BlockLevelPass\COLON_STATE_TAGSTART
const COLON_STATE_TAGSTART
Definition BlockLevelPass.php:35

BlockLevelPass\$lastSection
$lastSection
Definition BlockLevelPass.php:28

MWException
MediaWiki exception.
Definition MWException.php:26

StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition StringUtils.php:336

$result
namespace being checked & $result
Definition hooks.txt:2385

$term
For QUnit the mediawiki tests qunit testrunner dependency will be added to any module whereas SearchGetNearMatch runs after $term
Definition hooks.txt:2926

$output
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place $output
Definition hooks.txt:2317

$t
$t
Definition testCompression.php:69