Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 168 |
|
0.00% |
0 / 21 |
CRAP | |
0.00% |
0 / 1 |
| PEGParserBase | |
0.00% |
0 / 168 |
|
0.00% |
0 / 21 |
2862 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
| traceCall | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
12 | |||
| text | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| location | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| expected | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| error | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| charAt | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
30 | |||
| charsAt | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| consumeChar | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
30 | |||
| advanceChar | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
| newRef | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| computePosDetails | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
90 | |||
| computeLocation | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
6 | |||
| fail | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
| expandExpectations | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
| buildMessage | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
20 | |||
| buildException | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
6 | |||
| buildParseException | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
| initialize | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| initInternal | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
2 | |||
| parse | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
0 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace Wikimedia\WikiPEG; |
| 4 | |
| 5 | use stdClass; |
| 6 | |
| 7 | abstract class PEGParserBase { |
| 8 | protected static ?stdClass $FAILED = null; |
| 9 | protected static ?stdClass $UNDEFINED = null; |
| 10 | protected int $currPos; |
| 11 | protected int $savedPos; |
| 12 | protected string $input; |
| 13 | protected int $inputLength; |
| 14 | protected array $options; |
| 15 | /** @var array */ |
| 16 | protected $cache; |
| 17 | |
| 18 | /** @var array<int,array{line:int,column:int,seenCR:bool}> */ |
| 19 | protected array $posDetailsCache; |
| 20 | protected int $maxFailPos; |
| 21 | protected array $maxFailExpected; |
| 22 | |
| 23 | /** @var array Associative arrays of expectation info */ |
| 24 | protected $expectations; |
| 25 | |
| 26 | /** @var Expectation[] */ |
| 27 | private array $expectationCache; |
| 28 | |
| 29 | protected Tracer $tracer; |
| 30 | |
| 31 | public function __construct() { |
| 32 | if ( !self::$FAILED ) { |
| 33 | self::$FAILED = new \stdClass; |
| 34 | } |
| 35 | if ( !self::$UNDEFINED ) { |
| 36 | self::$UNDEFINED = new \stdClass; |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | /** @return mixed */ |
| 41 | protected function traceCall( callable $parseFunc, string $name, array $argNames, array $args ) { |
| 42 | $argMap = []; |
| 43 | foreach ( $args as $i => $argValue ) { |
| 44 | $argMap[$argNames[$i]] = $argValue; |
| 45 | } |
| 46 | $startPos = $this->currPos; |
| 47 | $this->tracer->trace( [ |
| 48 | 'type' => 'rule.enter', |
| 49 | 'rule' => $name, |
| 50 | 'location' => $this->computeLocation( $startPos, $startPos ), |
| 51 | 'args' => $argMap |
| 52 | ] ); |
| 53 | $result = $parseFunc( ...$args ); |
| 54 | if ( $result !== self::$FAILED ) { |
| 55 | $this->tracer->trace( [ |
| 56 | 'type' => 'rule.match', |
| 57 | 'rule' => $name, |
| 58 | 'location' => $this->computeLocation( $startPos, $this->currPos ), |
| 59 | ] ); |
| 60 | } else { |
| 61 | $this->tracer->trace( [ |
| 62 | 'type' => 'rule.fail', |
| 63 | 'rule' => $name, |
| 64 | 'result' => $result, |
| 65 | 'location' => $this->computeLocation( $startPos, $startPos ) |
| 66 | ] ); |
| 67 | } |
| 68 | return $result; |
| 69 | } |
| 70 | |
| 71 | protected function text(): string { |
| 72 | return substr( $this->input, $this->savedPos, $this->currPos - $this->savedPos ); |
| 73 | } |
| 74 | |
| 75 | protected function location(): LocationRange { |
| 76 | return $this->computeLocation( $this->savedPos, $this->currPos ); |
| 77 | } |
| 78 | |
| 79 | /** |
| 80 | * @param string $description |
| 81 | * @return never |
| 82 | * @throws SyntaxError |
| 83 | */ |
| 84 | protected function expected( $description ) { |
| 85 | throw $this->buildException( |
| 86 | null, |
| 87 | [ [ 'type' => "other", 'description' => $description ] ], |
| 88 | $this->text(), |
| 89 | $this->computeLocation( $this->savedPos, $this->currPos ) |
| 90 | ); |
| 91 | } |
| 92 | |
| 93 | /** |
| 94 | * @param string $message |
| 95 | * @return never |
| 96 | * @throws SyntaxError |
| 97 | */ |
| 98 | protected function error( $message ) { |
| 99 | throw $this->buildException( |
| 100 | $message, |
| 101 | null, |
| 102 | $this->text(), |
| 103 | $this->computeLocation( $this->savedPos, $this->currPos ) |
| 104 | ); |
| 105 | } |
| 106 | |
| 107 | public static function charAt( string $s, int $byteOffset ): string { |
| 108 | if ( !isset( $s[$byteOffset] ) ) { |
| 109 | return ''; |
| 110 | } |
| 111 | $char = $s[$byteOffset]; |
| 112 | $byte1 = ord( $char ); |
| 113 | if ( ( $byte1 & 0xc0 ) === 0xc0 ) { |
| 114 | $char .= $s[$byteOffset + 1]; |
| 115 | } |
| 116 | if ( ( $byte1 & 0xe0 ) === 0xe0 ) { |
| 117 | $char .= $s[$byteOffset + 2]; |
| 118 | } |
| 119 | if ( ( $byte1 & 0xf0 ) === 0xf0 ) { |
| 120 | $char .= $s[$byteOffset + 3]; |
| 121 | } |
| 122 | return $char; |
| 123 | } |
| 124 | |
| 125 | public static function charsAt( string $s, int $byteOffset, int $numChars ): string { |
| 126 | $ret = ''; |
| 127 | for ( $i = 0; $i < $numChars; $i++ ) { |
| 128 | $ret .= self::consumeChar( $s, $byteOffset ); |
| 129 | } |
| 130 | return $ret; |
| 131 | } |
| 132 | |
| 133 | public static function consumeChar( string $s, int &$byteOffset ): string { |
| 134 | if ( !isset( $s[$byteOffset] ) ) { |
| 135 | return ''; |
| 136 | } |
| 137 | $char = $s[$byteOffset++]; |
| 138 | $byte1 = ord( $char ); |
| 139 | if ( ( $byte1 & 0xc0 ) === 0xc0 ) { |
| 140 | $char .= $s[$byteOffset++]; |
| 141 | } |
| 142 | if ( ( $byte1 & 0xe0 ) === 0xe0 ) { |
| 143 | $char .= $s[$byteOffset++]; |
| 144 | } |
| 145 | if ( ( $byte1 & 0xf0 ) === 0xf0 ) { |
| 146 | $char .= $s[$byteOffset++]; |
| 147 | } |
| 148 | return $char; |
| 149 | } |
| 150 | |
| 151 | public static function advanceChar( string $s, int &$byteOffset ): void { |
| 152 | if ( !isset( $s[$byteOffset] ) ) { |
| 153 | return; |
| 154 | } |
| 155 | $byteOffset += match ( ord( $s[$byteOffset] ) & 0xf0 ) { |
| 156 | default => 1, |
| 157 | 0xc0, 0xd0 => 2, |
| 158 | 0xe0 => 3, |
| 159 | 0xf0 => 4, |
| 160 | }; |
| 161 | } |
| 162 | |
| 163 | /** |
| 164 | * @param mixed $value |
| 165 | * @return mixed |
| 166 | */ |
| 167 | public static function &newRef( $value ) { |
| 168 | return $value; |
| 169 | } |
| 170 | |
| 171 | /** |
| 172 | * @param int $pos |
| 173 | * @return array{line:int,column:int,seenCR:bool} |
| 174 | */ |
| 175 | protected function computePosDetails( $pos ) { |
| 176 | if ( isset( $this->posDetailsCache[$pos] ) ) { |
| 177 | return $this->posDetailsCache[$pos]; |
| 178 | } |
| 179 | $p = $pos - 1; |
| 180 | while ( !isset( $this->posDetailsCache[$p] ) ) { |
| 181 | $p--; |
| 182 | } |
| 183 | |
| 184 | $details = $this->posDetailsCache[$p]; |
| 185 | |
| 186 | while ( $p < $pos ) { |
| 187 | $ch = self::charAt( $this->input, $p ); |
| 188 | if ( $ch === "\n" ) { |
| 189 | if ( !$details['seenCR'] ) { |
| 190 | $details['line']++; |
| 191 | } |
| 192 | $details['column'] = 1; |
| 193 | $details['seenCR'] = false; |
| 194 | } elseif ( $ch === "\r" || $ch === "\u2028" || $ch === "\u2029" ) { |
| 195 | $details['line']++; |
| 196 | $details['column'] = 1; |
| 197 | $details['seenCR'] = true; |
| 198 | } else { |
| 199 | $details['column']++; |
| 200 | $details['seenCR'] = false; |
| 201 | } |
| 202 | |
| 203 | $p++; |
| 204 | } |
| 205 | |
| 206 | $this->posDetailsCache[$pos] = $details; |
| 207 | return $details; |
| 208 | } |
| 209 | |
| 210 | protected function computeLocation( int $startPos, int $endPos ): LocationRange { |
| 211 | if ( $endPos > $this->inputLength ) { |
| 212 | $endPos--; |
| 213 | } |
| 214 | $startPosDetails = $this->computePosDetails( $startPos ); |
| 215 | $endPosDetails = $this->computePosDetails( $endPos ); |
| 216 | |
| 217 | return new LocationRange( |
| 218 | $startPos, |
| 219 | $startPosDetails['line'], |
| 220 | $startPosDetails['column'], |
| 221 | $endPos, |
| 222 | $endPosDetails['line'], |
| 223 | $endPosDetails['column'] |
| 224 | ); |
| 225 | } |
| 226 | |
| 227 | protected function fail( int $expected ) { |
| 228 | if ( $this->currPos < $this->maxFailPos ) { |
| 229 | return; |
| 230 | } |
| 231 | |
| 232 | if ( $this->currPos > $this->maxFailPos ) { |
| 233 | $this->maxFailPos = $this->currPos; |
| 234 | $this->maxFailExpected = []; |
| 235 | } |
| 236 | |
| 237 | $this->maxFailExpected[] = $expected; |
| 238 | } |
| 239 | |
| 240 | /** |
| 241 | * @param array<int|array{type:string,value?:?string,description:string}> $expected |
| 242 | * @return Expectation[] |
| 243 | */ |
| 244 | private function expandExpectations( $expected ) { |
| 245 | $expanded = []; |
| 246 | foreach ( $expected as $index ) { |
| 247 | if ( is_int( $index ) ) { |
| 248 | if ( !isset( $this->expectationCache[$index] ) ) { |
| 249 | $this->expectationCache[$index] = new Expectation( $this->expectations[$index] ); |
| 250 | } |
| 251 | $expanded[] = $this->expectationCache[$index]; |
| 252 | } else { |
| 253 | $expanded[] = new Expectation( $index ); |
| 254 | } |
| 255 | } |
| 256 | return $expanded; |
| 257 | } |
| 258 | |
| 259 | private function buildMessage( array $expected, ?string $found ): string { |
| 260 | $expectedDescs = []; |
| 261 | |
| 262 | foreach ( $expected as $info ) { |
| 263 | $expectedDescs[] = $info->description; |
| 264 | } |
| 265 | $lastDesc = array_pop( $expectedDescs ); |
| 266 | if ( $expectedDescs ) { |
| 267 | $expectedDesc = implode( ', ', $expectedDescs ) . ' or ' . $lastDesc; |
| 268 | } else { |
| 269 | $expectedDesc = $lastDesc; |
| 270 | } |
| 271 | $foundDesc = $found ? json_encode( $found ) : "end of input"; |
| 272 | |
| 273 | return "Expected " . $expectedDesc . " but " . $foundDesc . " found."; |
| 274 | } |
| 275 | |
| 276 | protected function buildException( |
| 277 | ?string $message, ?array $expected, ?string $found, LocationRange $location |
| 278 | ): SyntaxError { |
| 279 | if ( $expected !== null ) { |
| 280 | sort( $expected ); |
| 281 | $expected = array_unique( $expected ); |
| 282 | $expandedExpected = $this->expandExpectations( $expected ); |
| 283 | usort( $expandedExpected, static function ( $a, $b ) { |
| 284 | return Expectation::compare( $a, $b ); |
| 285 | } ); |
| 286 | } else { |
| 287 | $expandedExpected = []; |
| 288 | } |
| 289 | |
| 290 | return new SyntaxError( |
| 291 | $message ?? $this->buildMessage( $expandedExpected, $found ), |
| 292 | $expandedExpected, |
| 293 | $found, |
| 294 | $location |
| 295 | ); |
| 296 | } |
| 297 | |
| 298 | protected function buildParseException(): SyntaxError { |
| 299 | $char = self::charAt( $this->input, $this->maxFailPos ); |
| 300 | return $this->buildException( |
| 301 | null, |
| 302 | $this->maxFailExpected, |
| 303 | $char === '' ? null : $char, |
| 304 | $this->computeLocation( $this->maxFailPos, $this->maxFailPos + 1 ) |
| 305 | ); |
| 306 | } |
| 307 | |
| 308 | protected function initialize() { |
| 309 | } |
| 310 | |
| 311 | protected function initInternal( string $input, array $options ) { |
| 312 | $this->currPos = 0; |
| 313 | $this->savedPos = 0; |
| 314 | $this->input = $input; |
| 315 | $this->inputLength = strlen( $input ); |
| 316 | $this->options = $options; |
| 317 | $this->cache = []; |
| 318 | $this->posDetailsCache = [ [ 'line' => 1, 'column' => 1, 'seenCR' => false ] ]; |
| 319 | $this->maxFailPos = 0; |
| 320 | $this->maxFailExpected = []; |
| 321 | $this->tracer = $options['tracer'] ?? new DefaultTracer; |
| 322 | |
| 323 | $this->initialize(); |
| 324 | } |
| 325 | |
| 326 | /** @return mixed */ |
| 327 | abstract public function parse( string $input, array $options = [] ); |
| 328 | } |