Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 168 |
|
0.00% |
0 / 21 |
CRAP | |
0.00% |
0 / 1 |
| PEGParserBase | |
0.00% |
0 / 168 |
|
0.00% |
0 / 21 |
2862 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
| traceCall | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
12 | |||
| text | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| location | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| expected | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| error | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| charAt | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
30 | |||
| charsAt | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| consumeChar | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
30 | |||
| advanceChar | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
| newRef | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| computePosDetails | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
90 | |||
| computeLocation | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
6 | |||
| fail | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
| expandExpectations | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
| buildMessage | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
20 | |||
| buildException | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
6 | |||
| buildParseException | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
| initialize | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| initInternal | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
2 | |||
| parse | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
0 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\WikiPEG; |
| 5 | |
| 6 | use stdClass; |
| 7 | |
| 8 | abstract class PEGParserBase { |
| 9 | protected static ?stdClass $FAILED = null; |
| 10 | protected static ?stdClass $UNDEFINED = null; |
| 11 | protected int $currPos; |
| 12 | protected int $savedPos; |
| 13 | protected string $input; |
| 14 | protected int $inputLength; |
| 15 | protected array $options; |
| 16 | /** @var array */ |
| 17 | protected $cache; |
| 18 | |
| 19 | /** @var array<int,array{line:int,column:int,seenCR:bool}> */ |
| 20 | protected array $posDetailsCache; |
| 21 | protected int $maxFailPos; |
| 22 | protected array $maxFailExpected; |
| 23 | |
| 24 | /** @var array Associative arrays of expectation info */ |
| 25 | protected $expectations; |
| 26 | |
| 27 | /** @var Expectation[] */ |
| 28 | private array $expectationCache; |
| 29 | |
| 30 | protected Tracer $tracer; |
| 31 | |
| 32 | public function __construct() { |
| 33 | if ( !self::$FAILED ) { |
| 34 | self::$FAILED = new \stdClass; |
| 35 | } |
| 36 | if ( !self::$UNDEFINED ) { |
| 37 | self::$UNDEFINED = new \stdClass; |
| 38 | } |
| 39 | } |
| 40 | |
| 41 | /** @return mixed */ |
| 42 | protected function traceCall( callable $parseFunc, string $name, array $argNames, array $args ) { |
| 43 | $argMap = []; |
| 44 | foreach ( $args as $i => $argValue ) { |
| 45 | $argMap[$argNames[$i]] = $argValue; |
| 46 | } |
| 47 | $startPos = $this->currPos; |
| 48 | $this->tracer->trace( [ |
| 49 | 'type' => 'rule.enter', |
| 50 | 'rule' => $name, |
| 51 | 'location' => $this->computeLocation( $startPos, $startPos ), |
| 52 | 'args' => $argMap |
| 53 | ] ); |
| 54 | $result = $parseFunc( ...$args ); |
| 55 | if ( $result !== self::$FAILED ) { |
| 56 | $this->tracer->trace( [ |
| 57 | 'type' => 'rule.match', |
| 58 | 'rule' => $name, |
| 59 | 'location' => $this->computeLocation( $startPos, $this->currPos ), |
| 60 | ] ); |
| 61 | } else { |
| 62 | $this->tracer->trace( [ |
| 63 | 'type' => 'rule.fail', |
| 64 | 'rule' => $name, |
| 65 | 'result' => $result, |
| 66 | 'location' => $this->computeLocation( $startPos, $startPos ) |
| 67 | ] ); |
| 68 | } |
| 69 | return $result; |
| 70 | } |
| 71 | |
| 72 | protected function text(): string { |
| 73 | return substr( $this->input, $this->savedPos, $this->currPos - $this->savedPos ); |
| 74 | } |
| 75 | |
| 76 | protected function location(): LocationRange { |
| 77 | return $this->computeLocation( $this->savedPos, $this->currPos ); |
| 78 | } |
| 79 | |
| 80 | /** |
| 81 | * @param string $description |
| 82 | * @return never |
| 83 | * @throws SyntaxError |
| 84 | */ |
| 85 | protected function expected( $description ) { |
| 86 | throw $this->buildException( |
| 87 | null, |
| 88 | [ [ 'type' => "other", 'description' => $description ] ], |
| 89 | $this->text(), |
| 90 | $this->computeLocation( $this->savedPos, $this->currPos ) |
| 91 | ); |
| 92 | } |
| 93 | |
| 94 | /** |
| 95 | * @param string $message |
| 96 | * @return never |
| 97 | * @throws SyntaxError |
| 98 | */ |
| 99 | protected function error( $message ) { |
| 100 | throw $this->buildException( |
| 101 | $message, |
| 102 | null, |
| 103 | $this->text(), |
| 104 | $this->computeLocation( $this->savedPos, $this->currPos ) |
| 105 | ); |
| 106 | } |
| 107 | |
| 108 | public static function charAt( string $s, int $byteOffset ): string { |
| 109 | if ( !isset( $s[$byteOffset] ) ) { |
| 110 | return ''; |
| 111 | } |
| 112 | $char = $s[$byteOffset]; |
| 113 | $byte1 = ord( $char ); |
| 114 | if ( ( $byte1 & 0xc0 ) === 0xc0 ) { |
| 115 | $char .= $s[$byteOffset + 1]; |
| 116 | } |
| 117 | if ( ( $byte1 & 0xe0 ) === 0xe0 ) { |
| 118 | $char .= $s[$byteOffset + 2]; |
| 119 | } |
| 120 | if ( ( $byte1 & 0xf0 ) === 0xf0 ) { |
| 121 | $char .= $s[$byteOffset + 3]; |
| 122 | } |
| 123 | return $char; |
| 124 | } |
| 125 | |
| 126 | public static function charsAt( string $s, int $byteOffset, int $numChars ): string { |
| 127 | $ret = ''; |
| 128 | for ( $i = 0; $i < $numChars; $i++ ) { |
| 129 | $ret .= self::consumeChar( $s, $byteOffset ); |
| 130 | } |
| 131 | return $ret; |
| 132 | } |
| 133 | |
| 134 | public static function consumeChar( string $s, int &$byteOffset ): string { |
| 135 | if ( !isset( $s[$byteOffset] ) ) { |
| 136 | return ''; |
| 137 | } |
| 138 | $char = $s[$byteOffset++]; |
| 139 | $byte1 = ord( $char ); |
| 140 | if ( ( $byte1 & 0xc0 ) === 0xc0 ) { |
| 141 | $char .= $s[$byteOffset++]; |
| 142 | } |
| 143 | if ( ( $byte1 & 0xe0 ) === 0xe0 ) { |
| 144 | $char .= $s[$byteOffset++]; |
| 145 | } |
| 146 | if ( ( $byte1 & 0xf0 ) === 0xf0 ) { |
| 147 | $char .= $s[$byteOffset++]; |
| 148 | } |
| 149 | return $char; |
| 150 | } |
| 151 | |
| 152 | public static function advanceChar( string $s, int &$byteOffset ): void { |
| 153 | if ( !isset( $s[$byteOffset] ) ) { |
| 154 | return; |
| 155 | } |
| 156 | $byteOffset += match ( ord( $s[$byteOffset] ) & 0xf0 ) { |
| 157 | default => 1, |
| 158 | 0xc0, 0xd0 => 2, |
| 159 | 0xe0 => 3, |
| 160 | 0xf0 => 4, |
| 161 | }; |
| 162 | } |
| 163 | |
| 164 | /** |
| 165 | * @param mixed $value |
| 166 | * @return mixed |
| 167 | */ |
| 168 | public static function &newRef( $value ) { |
| 169 | return $value; |
| 170 | } |
| 171 | |
| 172 | /** |
| 173 | * @param int $pos |
| 174 | * @return array{line:int,column:int,seenCR:bool} |
| 175 | */ |
| 176 | protected function computePosDetails( $pos ) { |
| 177 | if ( isset( $this->posDetailsCache[$pos] ) ) { |
| 178 | return $this->posDetailsCache[$pos]; |
| 179 | } |
| 180 | $p = $pos - 1; |
| 181 | while ( !isset( $this->posDetailsCache[$p] ) ) { |
| 182 | $p--; |
| 183 | } |
| 184 | |
| 185 | $details = $this->posDetailsCache[$p]; |
| 186 | |
| 187 | while ( $p < $pos ) { |
| 188 | $ch = self::charAt( $this->input, $p ); |
| 189 | if ( $ch === "\n" ) { |
| 190 | if ( !$details['seenCR'] ) { |
| 191 | $details['line']++; |
| 192 | } |
| 193 | $details['column'] = 1; |
| 194 | $details['seenCR'] = false; |
| 195 | } elseif ( $ch === "\r" || $ch === "\u2028" || $ch === "\u2029" ) { |
| 196 | $details['line']++; |
| 197 | $details['column'] = 1; |
| 198 | $details['seenCR'] = true; |
| 199 | } else { |
| 200 | $details['column']++; |
| 201 | $details['seenCR'] = false; |
| 202 | } |
| 203 | |
| 204 | $p++; |
| 205 | } |
| 206 | |
| 207 | $this->posDetailsCache[$pos] = $details; |
| 208 | return $details; |
| 209 | } |
| 210 | |
| 211 | protected function computeLocation( int $startPos, int $endPos ): LocationRange { |
| 212 | if ( $endPos > $this->inputLength ) { |
| 213 | $endPos--; |
| 214 | } |
| 215 | $startPosDetails = $this->computePosDetails( $startPos ); |
| 216 | $endPosDetails = $this->computePosDetails( $endPos ); |
| 217 | |
| 218 | return new LocationRange( |
| 219 | $startPos, |
| 220 | $startPosDetails['line'], |
| 221 | $startPosDetails['column'], |
| 222 | $endPos, |
| 223 | $endPosDetails['line'], |
| 224 | $endPosDetails['column'] |
| 225 | ); |
| 226 | } |
| 227 | |
| 228 | protected function fail( int $expected ) { |
| 229 | if ( $this->currPos < $this->maxFailPos ) { |
| 230 | return; |
| 231 | } |
| 232 | |
| 233 | if ( $this->currPos > $this->maxFailPos ) { |
| 234 | $this->maxFailPos = $this->currPos; |
| 235 | $this->maxFailExpected = []; |
| 236 | } |
| 237 | |
| 238 | $this->maxFailExpected[] = $expected; |
| 239 | } |
| 240 | |
| 241 | /** |
| 242 | * @param array<int|array{type:string,value?:?string,description:string}> $expected |
| 243 | * @return Expectation[] |
| 244 | */ |
| 245 | private function expandExpectations( $expected ) { |
| 246 | $expanded = []; |
| 247 | foreach ( $expected as $index ) { |
| 248 | if ( is_int( $index ) ) { |
| 249 | if ( !isset( $this->expectationCache[$index] ) ) { |
| 250 | $this->expectationCache[$index] = new Expectation( $this->expectations[$index] ); |
| 251 | } |
| 252 | $expanded[] = $this->expectationCache[$index]; |
| 253 | } else { |
| 254 | $expanded[] = new Expectation( $index ); |
| 255 | } |
| 256 | } |
| 257 | return $expanded; |
| 258 | } |
| 259 | |
| 260 | private function buildMessage( array $expected, ?string $found ): string { |
| 261 | $expectedDescs = []; |
| 262 | |
| 263 | foreach ( $expected as $info ) { |
| 264 | $expectedDescs[] = $info->description; |
| 265 | } |
| 266 | $lastDesc = array_pop( $expectedDescs ); |
| 267 | if ( $expectedDescs ) { |
| 268 | $expectedDesc = implode( ', ', $expectedDescs ) . ' or ' . $lastDesc; |
| 269 | } else { |
| 270 | $expectedDesc = $lastDesc; |
| 271 | } |
| 272 | $foundDesc = $found ? json_encode( $found ) : "end of input"; |
| 273 | |
| 274 | return "Expected " . $expectedDesc . " but " . $foundDesc . " found."; |
| 275 | } |
| 276 | |
| 277 | protected function buildException( |
| 278 | ?string $message, ?array $expected, ?string $found, LocationRange $location |
| 279 | ): SyntaxError { |
| 280 | if ( $expected !== null ) { |
| 281 | sort( $expected ); |
| 282 | $expected = array_unique( $expected ); |
| 283 | $expandedExpected = $this->expandExpectations( $expected ); |
| 284 | usort( $expandedExpected, static function ( $a, $b ) { |
| 285 | return Expectation::compare( $a, $b ); |
| 286 | } ); |
| 287 | } else { |
| 288 | $expandedExpected = []; |
| 289 | } |
| 290 | |
| 291 | return new SyntaxError( |
| 292 | $message ?? $this->buildMessage( $expandedExpected, $found ), |
| 293 | $expandedExpected, |
| 294 | $found, |
| 295 | $location |
| 296 | ); |
| 297 | } |
| 298 | |
| 299 | protected function buildParseException(): SyntaxError { |
| 300 | $char = self::charAt( $this->input, $this->maxFailPos ); |
| 301 | return $this->buildException( |
| 302 | null, |
| 303 | $this->maxFailExpected, |
| 304 | $char === '' ? null : $char, |
| 305 | $this->computeLocation( $this->maxFailPos, $this->maxFailPos + 1 ) |
| 306 | ); |
| 307 | } |
| 308 | |
| 309 | protected function initialize() { |
| 310 | } |
| 311 | |
| 312 | protected function initInternal( string $input, array $options ) { |
| 313 | $this->currPos = 0; |
| 314 | $this->savedPos = 0; |
| 315 | $this->input = $input; |
| 316 | $this->inputLength = strlen( $input ); |
| 317 | $this->options = $options; |
| 318 | $this->cache = []; |
| 319 | $this->posDetailsCache = [ [ 'line' => 1, 'column' => 1, 'seenCR' => false ] ]; |
| 320 | $this->maxFailPos = 0; |
| 321 | $this->maxFailExpected = []; |
| 322 | $this->tracer = $options['tracer'] ?? new DefaultTracer; |
| 323 | |
| 324 | $this->initialize(); |
| 325 | } |
| 326 | |
| 327 | /** @return mixed */ |
| 328 | abstract public function parse( string $input, array $options = [] ); |
| 329 | } |