Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 102 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
PgnGameParser | |
0.00% |
0 / 102 |
|
0.00% |
0 / 9 |
1190 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getParsedData | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
getMetadata | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 | |||
getMetadataKeyAndValue | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
mergeAdjacentComments | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
110 | |||
getMoves | |
0.00% |
0 / 33 |
|
0.00% |
0 / 1 |
110 | |||
getMovesAndComments | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getMovesAndVariationFromString | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
getMoveString | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | /** |
3 | * This file is a part of ChessBrowser. |
4 | * |
5 | * ChessBrowser is free software: you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation, either version 3 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program. If not, see <https://www.gnu.org/licenses/>. |
17 | * |
18 | * This file is a part of PgnParser |
19 | * |
20 | * PgnParser is free software: you can redistribute it and/or modify |
21 | * it under the terms of the GNU Lesser General Public License as published by |
22 | * the Free Software Foundation, either version 3 of the License, or |
23 | * (at your option) any later version. |
24 | * |
25 | * This program is distributed in the hope that it will be useful, |
26 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
27 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
28 | * GNU Lesser General Public License for more details. |
29 | * |
30 | * You should have received a copy of the GNU Lesser General Public License |
31 | * along with this program. If not, see <https://www.gnu.org/licenses/>. |
32 | * |
33 | * @file PgnGameParser |
34 | * @ingroup ChessBrowser |
35 | * @author Alf Magne Kalleland |
36 | */ |
37 | |
38 | namespace MediaWiki\Extension\ChessBrowser\PgnParser; |
39 | |
40 | class PgnGameParser { |
41 | /** @var string */ |
42 | private $pgnGame; |
43 | /** @var string */ |
44 | private $defaultFen = 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1'; |
45 | /** @var string[] */ |
46 | private $specialMetadata = [ |
47 | 'event', |
48 | 'site', |
49 | 'white', |
50 | 'black', |
51 | 'result', |
52 | 'plycount', |
53 | 'eco', |
54 | 'fen', |
55 | 'timecontrol', |
56 | 'round', |
57 | 'date', |
58 | 'annotator', |
59 | 'termination' |
60 | ]; |
61 | |
62 | /** |
63 | * Set the parser's pgn |
64 | * |
65 | * @param string $pgnGame |
66 | */ |
67 | public function __construct( $pgnGame ) { |
68 | $this->pgnGame = trim( $pgnGame ); |
69 | } |
70 | |
71 | /** |
72 | * Get the parsed data |
73 | * |
74 | * @return array |
75 | */ |
76 | public function getParsedData() { |
77 | $gameData = $this->getMetadata(); |
78 | $moveReferences = $this->getMoves(); |
79 | $gameData[ChessJson::MOVE_MOVES] = $moveReferences[0]; |
80 | $gameData[ChessJson::MOVE_COMMENT] = $moveReferences; |
81 | return $gameData; |
82 | } |
83 | |
84 | /** |
85 | * Get the metadata |
86 | * |
87 | * @return array |
88 | */ |
89 | private function getMetadata() { |
90 | $ret = [ |
91 | ChessJson::GAME_METADATA => [] |
92 | ]; |
93 | // TODO set lastmoves property by reading last 3-4 moves in moves array |
94 | $lines = explode( "\n", $this->pgnGame ); |
95 | foreach ( $lines as $line ) { |
96 | $line = trim( $line ); |
97 | if ( substr( $line, 0, 1 ) === '[' && substr( $line, strlen( $line ) - 1, 1 ) === ']' ) { |
98 | $metadata = $this->getMetadataKeyAndValue( $line ); |
99 | if ( in_array( $metadata['key'], $this->specialMetadata ) ) { |
100 | $ret[$metadata['key']] = $metadata['value']; |
101 | } else { |
102 | $ret[ChessJson::GAME_METADATA][$metadata['key']] = $metadata['value']; |
103 | } |
104 | } |
105 | } |
106 | if ( !isset( $ret[ChessJson::FEN] ) ) { |
107 | $ret[ChessJson::FEN] = $this->defaultFen; |
108 | } |
109 | |
110 | return $ret; |
111 | } |
112 | |
113 | /** |
114 | * Get the metadata key and value from a string |
115 | * |
116 | * @param string $metadataString |
117 | * @return array |
118 | */ |
119 | private function getMetadataKeyAndValue( $metadataString ) { |
120 | $metadataString = preg_replace( "/[\[\]]/s", "", $metadataString ); |
121 | $metadataString = str_replace( '"', '', $metadataString ); |
122 | $tokens = explode( " ", $metadataString ); |
123 | |
124 | $key = $tokens[0]; |
125 | $value = implode( " ", array_slice( $tokens, 1 ) ); |
126 | return [ |
127 | 'key' => strtolower( $key ), |
128 | 'value' => $value |
129 | ]; |
130 | } |
131 | |
132 | /** |
133 | * Determine how many elements are part of the comment |
134 | * |
135 | * getMovesAndComments() Takes the PGN move string and splits it on the |
136 | * special characters `{`, `}`, `;`, and `\n`. These characters *sometimes* |
137 | * delimit a comment. From the PGN Standard section 5 |
138 | * |
139 | * > Brace comments do not nest; a left brace character appearing in a brace |
140 | * > comment loses its special meaning and is ignored. A semicolon appearing |
141 | * > inside of a brace comment loses its special meaning and is ignored. |
142 | * > Braces appearing inside of a semicolon comments lose their special meaning |
143 | * > and are ignored. |
144 | * |
145 | * The result is that a single comment might span multiple elements of |
146 | * $moveStringParts if it contains characters that lost their special meaning. This |
147 | * function implements a context-sensitive sub-parser to determine how far to move |
148 | * the main buffer when it runs into a comment start character. |
149 | * |
150 | * This function is called whenever getMoves() encounters a comment start character, |
151 | * so `{` or `;` and receives the whole $moveStringParts array and the main buffer |
152 | * position ($bufferPos) to start sub-parsing the comment. The element at that index |
153 | * will be `{` or `;` and the first for loop iteration sets the proper context flag. |
154 | * These flags are used to determine which characters lose special meaning. When |
155 | * the appropriate comment end character for the context is hit, the function returns |
156 | * an integer ($idx) specifying how far forward to move the main buffer. |
157 | * |
158 | * See T363230 |
159 | * |
160 | * @param array $moveStringParts Output of getMovesAndComments() |
161 | * @param int $bufferPos Index of where in $moveStringParts the comment starts |
162 | * @return int |
163 | */ |
164 | private function mergeAdjacentComments( array $moveStringParts, int $bufferPos ): int { |
165 | // Context flags |
166 | $inBraceComment = false; |
167 | $inEOLComment = false; |
168 | |
169 | $endIdx = count( $moveStringParts ) - $bufferPos; |
170 | for ( $idx = 0; $idx < $endIdx; $idx++ ) { |
171 | $move = $moveStringParts[ $bufferPos + $idx ]; |
172 | |
173 | /** |
174 | * The element following a comment start character or a '}' without |
175 | * its special meaning will always be a comment string, so we can |
176 | * save some time and potential parsing bugs by incrementing $idx |
177 | * past them and skipping the iteration. |
178 | * |
179 | * See getMovesAndComments for more info on $moveStringParts |
180 | */ |
181 | switch ( $move ) { |
182 | case '{': |
183 | // Set context flag if not in EOL comment context |
184 | // '{' in a brace comment context loses its special meaning |
185 | if ( !$inEOLComment ) { |
186 | $inBraceComment = true; |
187 | } |
188 | // Skip past following comment string |
189 | $idx++; |
190 | break; |
191 | case ';': |
192 | // Set context flag if not in brace comment context |
193 | // ';' in brace and EOL comments loses special meaning |
194 | if ( !$inBraceComment ) { |
195 | $inEOLComment = true; |
196 | } |
197 | // Skip past following comment string |
198 | $idx++; |
199 | break; |
200 | case '}': |
201 | // The first '}' ALWAYS has special meaning under the PGN |
202 | // standard and ends a brace comment. |
203 | if ( $inBraceComment ) { |
204 | // Don't include the '}' itself in the buffer increment |
205 | return $idx - 1; |
206 | } |
207 | // '}' in EOL comment loses special meaning |
208 | // Skip past following comment string |
209 | $idx++; |
210 | break; |
211 | case "\n": |
212 | // \n ALWAYS ends an EOL comment. |
213 | if ( $inEOLComment ) { |
214 | // Include the newline in the buffer increment, it gets |
215 | // removed later as whitespace |
216 | return $idx - 0; |
217 | } |
218 | break; |
219 | } |
220 | } |
221 | // Reached EOF so return a buffer increment past EOF |
222 | return $idx + 1; |
223 | } |
224 | |
225 | /** |
226 | * Process tokens in the move string |
227 | * |
228 | * @return array |
229 | */ |
230 | private function getMoves() { |
231 | $moveBuilder = new MoveBuilder(); |
232 | |
233 | $moveStringParts = $this->getMovesAndComments(); |
234 | $lenMSP = count( $moveStringParts ); |
235 | for ( $bufferPos = 0; $bufferPos < $lenMSP; $bufferPos++ ) { |
236 | $move = trim( $moveStringParts[$bufferPos] ); |
237 | |
238 | switch ( $move ) { |
239 | case '{': |
240 | case ';': |
241 | $commentBufferIncrement = $this->mergeAdjacentComments( |
242 | $moveStringParts, |
243 | $bufferPos |
244 | ); |
245 | $commentSlice = array_slice( |
246 | $moveStringParts, |
247 | $bufferPos + 1, |
248 | $commentBufferIncrement |
249 | ); |
250 | $comment = implode( '', $commentSlice ); |
251 | if ( $bufferPos == 0 ) { |
252 | $moveBuilder->addCommentBeforeFirstMove( $comment ); |
253 | } else { |
254 | $moveBuilder->addComment( $comment ); |
255 | } |
256 | $bufferPos += $commentBufferIncrement; |
257 | break; |
258 | default: |
259 | $moves = $this->getMovesAndVariationFromString( $move ); |
260 | foreach ( $moves as $move ) { |
261 | switch ( $move ) { |
262 | case '(': |
263 | $moveBuilder->startVariation(); |
264 | break; |
265 | case ')': |
266 | $moveBuilder->endVariation(); |
267 | break; |
268 | default: |
269 | $moveBuilder->addMoves( $move ); |
270 | } |
271 | } |
272 | break; |
273 | } |
274 | } |
275 | |
276 | return $moveBuilder->getMoves(); |
277 | } |
278 | |
279 | /** |
280 | * Split the move string based on comment indicators |
281 | * |
282 | * $moveSectionParts is an array split by the PGN special comment characters `{`, `}`, `;`, and `\n`. These |
283 | * splitting characters are also included in the array to aid parsing later. The structure for a string like: |
284 | * |
285 | * ``` |
286 | * $inputString = "e4 e5 { King's pawn opening } Nf3 ; Interesting! {Not really}\nNc6 ; A common response"; |
287 | * ``` |
288 | * |
289 | * Would result in the array: |
290 | * ``` |
291 | * $moveSectionParts = [ |
292 | * "e4 e5", |
293 | * "{" |
294 | * "King's pawn opening", |
295 | * "}", |
296 | * "Nf3", |
297 | * ";", |
298 | * "Interesting!", |
299 | * "{", |
300 | * "Not really", |
301 | * "}", |
302 | * "", |
303 | * "\n", |
304 | * "Nc6", |
305 | * ";", |
306 | * "A common response" |
307 | * ]; |
308 | * ``` |
309 | * |
310 | * Notice that even though the split characters `}` and `\n` are adjacent, the split results in an empty string |
311 | * being inserted between them. |
312 | * |
313 | * @return array |
314 | */ |
315 | private function getMovesAndComments() { |
316 | $moveSectionParts = preg_split( "/({|}|;|\n)/s", $this->getMoveString(), 0, PREG_SPLIT_DELIM_CAPTURE ); |
317 | if ( !$moveSectionParts[0] ) { |
318 | $moveSectionParts = array_slice( $moveSectionParts, 1 ); |
319 | } |
320 | return $moveSectionParts; |
321 | } |
322 | |
323 | /** |
324 | * Get the moves and variations from a string |
325 | * |
326 | * TODO make static |
327 | * |
328 | * @param string $string |
329 | * @return array |
330 | */ |
331 | private function getMovesAndVariationFromString( $string ) { |
332 | $string = " " . $string; |
333 | |
334 | $string = preg_replace( "/\d+?\./s", "", $string ); |
335 | $string = str_replace( " ..", "", $string ); |
336 | $string = str_replace( " ", " ", $string ); |
337 | $string = trim( $string ); |
338 | |
339 | return preg_split( "/(\(|\))/s", $string, 0, PREG_SPLIT_DELIM_CAPTURE ); |
340 | } |
341 | |
342 | /** |
343 | * Get a move string |
344 | * |
345 | * @return string |
346 | */ |
347 | private function getMoveString() { |
348 | $tokens = preg_split( "/\]\n\n/s", $this->pgnGame ); |
349 | if ( !isset( $tokens[1] ) ) { |
350 | return ""; |
351 | } |
352 | $gameData = $tokens[1]; |
353 | // \n is meaningful so don't trim them |
354 | return trim( $gameData, " \r\t\v\x00" ); |
355 | } |
356 | } |