Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.64% |
145 / 147 |
|
91.67% |
11 / 12 |
CRAP | |
0.00% |
0 / 1 |
SimpleStyleParser | |
98.64% |
145 / 147 |
|
91.67% |
11 / 12 |
74 | |
0.00% |
0 / 1 |
newFromParser | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
__construct | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
parse | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
4.59 | |||
parseObject | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
normalizeAndSanitize | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
updateMarkerSymbolCounters | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
9 | |||
findFirstMarkerSymbol | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
7 | |||
validateGeoJSON | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
7 | |||
recursivelySanitizeAndParseWikitext | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
8 | |||
recursivelyNormalizeExternalData | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
6 | |||
normalizeExternalDataServices | |
100.00% |
40 / 40 |
|
100.00% |
1 / 1 |
14 | |||
parseWikitextProperties | |
100.00% |
24 / 24 |
|
100.00% |
1 / 1 |
13 |
1 | <?php |
2 | |
3 | namespace Kartographer; |
4 | |
5 | use InvalidArgumentException; |
6 | use JsonConfig\JCMapDataContent; |
7 | use JsonConfig\JCSingleton; |
8 | use JsonSchema\Validator; |
9 | use MediaWiki\Json\FormatJson; |
10 | use MediaWiki\MediaWikiServices; |
11 | use MediaWiki\Parser\Parser; |
12 | use MediaWiki\Parser\PPFrame; |
13 | use StatusValue; |
14 | use stdClass; |
15 | |
16 | /** |
17 | * Parses and sanitizes text properties of GeoJSON/simplestyle by putting them through the MediaWiki |
18 | * wikitext parser. |
19 | * |
20 | * @license MIT |
21 | */ |
22 | class SimpleStyleParser { |
23 | |
24 | /** |
25 | * Maximum for marker-symbol="-number…" counters. See T141335 for discussion to possibly |
26 | * increase this to 199 or even 999. |
27 | */ |
28 | private const MAX_NUMERIC_COUNTER = 99; |
29 | public const WIKITEXT_PROPERTIES = [ 'title', 'description' ]; |
30 | |
31 | private WikitextParser $parser; |
32 | private array $options; |
33 | private string $mapServer; |
34 | |
35 | public static function newFromParser( Parser $parser, ?PPFrame $frame = null ): self { |
36 | return new self( new MediaWikiWikitextParser( $parser, $frame ) ); |
37 | } |
38 | |
39 | /** |
40 | * @param WikitextParser $parser |
41 | * @param array $options Set ['saveUnparsed' => true] to back up the original values of title |
42 | * and description in _origtitle and _origdescription |
43 | */ |
44 | public function __construct( WikitextParser $parser, array $options = [] ) { |
45 | $this->parser = $parser; |
46 | $this->options = $options; |
47 | // @fixme: More precise config? |
48 | $this->mapServer = MediaWikiServices::getInstance() |
49 | ->getMainConfig() |
50 | ->get( 'KartographerMapServer' ); |
51 | } |
52 | |
53 | /** |
54 | * Parses string into JSON and performs validation/sanitization |
55 | * |
56 | * @param string|null $input |
57 | * @return StatusValue with the value being [ 'data' => stdClass[], 'schema-errors' => array[] ] |
58 | */ |
59 | public function parse( ?string $input ): StatusValue { |
60 | if ( !$input || trim( $input ) === '' ) { |
61 | return StatusValue::newGood( [ 'data' => [] ] ); |
62 | } |
63 | |
64 | $status = FormatJson::parse( $input, FormatJson::TRY_FIXING | FormatJson::STRIP_COMMENTS ); |
65 | if ( !$status->isOK() ) { |
66 | return StatusValue::newFatal( 'kartographer-error-json', $status->getMessage() ); |
67 | } |
68 | |
69 | return $this->parseObject( $status->value ); |
70 | } |
71 | |
72 | /** |
73 | * Validate and sanitize a parsed GeoJSON data object |
74 | * |
75 | * @param array|stdClass &$data |
76 | * @return StatusValue |
77 | */ |
78 | public function parseObject( &$data ): StatusValue { |
79 | if ( !is_array( $data ) ) { |
80 | $data = [ $data ]; |
81 | } |
82 | $status = $this->validateGeoJSON( $data ); |
83 | if ( $status->isOK() ) { |
84 | $status = $this->normalizeAndSanitize( $data ); |
85 | } |
86 | return $status; |
87 | } |
88 | |
89 | /** |
90 | * @param stdClass[]|stdClass &$data |
91 | * @return StatusValue |
92 | */ |
93 | public function normalizeAndSanitize( &$data ): StatusValue { |
94 | $status = $this->recursivelyNormalizeExternalData( $data ); |
95 | $this->recursivelySanitizeAndParseWikitext( $data ); |
96 | return $status; |
97 | } |
98 | |
99 | /** |
100 | * @param stdClass[] $values |
101 | * @param array<string,int> &$counters |
102 | * @return array{string,stdClass}|null [ string $firstMarkerSymbol, stdClass $firstMarkerProperties ] |
103 | */ |
104 | public static function updateMarkerSymbolCounters( array $values, array &$counters = [] ): ?array { |
105 | $firstMarker = null; |
106 | foreach ( $values as $item ) { |
107 | // While the input should be validated, it's still arbitrary user input. |
108 | if ( !( $item instanceof stdClass ) ) { |
109 | continue; |
110 | } |
111 | |
112 | $marker = $item->properties->{'marker-symbol'} ?? ''; |
113 | $isNumber = str_starts_with( $marker, '-number' ); |
114 | if ( $isNumber || str_starts_with( $marker, '-letter' ) ) { |
115 | // numbers 1..99 or letters a..z |
116 | $count = $counters[$marker] ?? 0; |
117 | if ( $count < ( $isNumber ? self::MAX_NUMERIC_COUNTER : 26 ) ) { |
118 | $counters[$marker] = ++$count; |
119 | } |
120 | $marker = $isNumber ? strval( $count ) : chr( ord( 'a' ) + $count - 1 ); |
121 | $item->properties->{'marker-symbol'} = $marker; |
122 | // GeoJSON is in lowercase, but the letter is shown as uppercase |
123 | $firstMarker ??= [ mb_strtoupper( $marker ), $item->properties ]; |
124 | } |
125 | |
126 | // Recurse into FeatureCollection and GeometryCollection |
127 | $features = $item->features ?? $item->geometries ?? null; |
128 | if ( $features ) { |
129 | $firstMarker ??= self::updateMarkerSymbolCounters( $features, $counters ); |
130 | } |
131 | } |
132 | return $firstMarker; |
133 | } |
134 | |
135 | /** |
136 | * @param stdClass[] $values |
137 | * @return array{string,stdClass}|null Same as {@see updateMarkerSymbolCounters}, but with the |
138 | * $firstMarkerSymbol name not updated |
139 | */ |
140 | public static function findFirstMarkerSymbol( array $values ): ?array { |
141 | foreach ( $values as $item ) { |
142 | // While the input should be validated, it's still arbitrary user input. |
143 | if ( !( $item instanceof stdClass ) ) { |
144 | continue; |
145 | } |
146 | |
147 | $marker = $item->properties->{'marker-symbol'} ?? ''; |
148 | if ( str_starts_with( $marker, '-number' ) || str_starts_with( $marker, '-letter' ) ) { |
149 | return [ $marker, $item->properties ]; |
150 | } |
151 | |
152 | // Recurse into FeatureCollection and GeometryCollection |
153 | $features = $item->features ?? $item->geometries ?? null; |
154 | if ( $features ) { |
155 | $found = self::findFirstMarkerSymbol( $features ); |
156 | if ( $found ) { |
157 | return $found; |
158 | } |
159 | } |
160 | } |
161 | |
162 | return null; |
163 | } |
164 | |
165 | /** |
166 | * @param stdClass[] $data |
167 | * @return StatusValue |
168 | */ |
169 | private function validateGeoJSON( array $data ): StatusValue { |
170 | // Basic top-level validation. The JSON schema validation below does this again, but gives |
171 | // terrible, very hard to understand error messages. |
172 | foreach ( $data as $geoJSON ) { |
173 | if ( !( $geoJSON instanceof stdClass ) ) { |
174 | return StatusValue::newFatal( 'kartographer-error-json-object' ); |
175 | } |
176 | if ( !isset( $geoJSON->type ) || !is_string( $geoJSON->type ) || !$geoJSON->type ) { |
177 | return StatusValue::newFatal( 'kartographer-error-json-type' ); |
178 | } |
179 | } |
180 | |
181 | $schema = (object)[ '$ref' => 'file://' . dirname( __DIR__ ) . '/schemas/geojson.json' ]; |
182 | $validator = new Validator(); |
183 | $validator->check( $data, $schema ); |
184 | |
185 | if ( !$validator->isValid() ) { |
186 | $errors = $validator->getErrors( Validator::ERROR_DOCUMENT_VALIDATION ); |
187 | $status = StatusValue::newFatal( 'kartographer-error-bad_data' ); |
188 | $status->setResult( false, [ 'schema-errors' => $errors ] ); |
189 | return $status; |
190 | } |
191 | |
192 | return StatusValue::newGood(); |
193 | } |
194 | |
195 | /** |
196 | * Performs recursive sanitizaton. |
197 | * Does not attempt to be smart, just recurses through everything that can be dangerous even |
198 | * if not a valid GeoJSON. |
199 | * |
200 | * @param stdClass[]|stdClass &$json |
201 | */ |
202 | private function recursivelySanitizeAndParseWikitext( &$json ): void { |
203 | if ( is_array( $json ) ) { |
204 | foreach ( $json as &$element ) { |
205 | $this->recursivelySanitizeAndParseWikitext( $element ); |
206 | } |
207 | } elseif ( is_object( $json ) ) { |
208 | foreach ( array_keys( get_object_vars( $json ) ) as $prop ) { |
209 | // https://phabricator.wikimedia.org/T134719 |
210 | if ( str_starts_with( $prop, '_' ) ) { |
211 | unset( $json->$prop ); |
212 | } else { |
213 | $this->recursivelySanitizeAndParseWikitext( $json->$prop ); |
214 | } |
215 | } |
216 | |
217 | if ( isset( $json->properties ) && is_object( $json->properties ) ) { |
218 | $this->parseWikitextProperties( $json->properties ); |
219 | } |
220 | } |
221 | } |
222 | |
223 | /** |
224 | * @param stdClass[]|stdClass &$json |
225 | * @return StatusValue |
226 | */ |
227 | private function recursivelyNormalizeExternalData( &$json ): StatusValue { |
228 | $status = StatusValue::newGood(); |
229 | if ( is_array( $json ) ) { |
230 | foreach ( $json as &$element ) { |
231 | $status->merge( $this->recursivelyNormalizeExternalData( $element ) ); |
232 | } |
233 | unset( $element ); |
234 | } elseif ( is_object( $json ) && isset( $json->type ) && $json->type === 'ExternalData' ) { |
235 | $status->merge( $this->normalizeExternalDataServices( $json ) ); |
236 | } |
237 | $status->value = [ 'data' => $json ]; |
238 | |
239 | return $status; |
240 | } |
241 | |
242 | /** |
243 | * Canonicalizes an ExternalData object |
244 | * |
245 | * @param stdClass &$object |
246 | * @return StatusValue |
247 | */ |
248 | private function normalizeExternalDataServices( stdClass &$object ): StatusValue { |
249 | $service = $object->service ?? null; |
250 | $ret = (object)[ |
251 | 'type' => 'ExternalData', |
252 | 'service' => $service, |
253 | ]; |
254 | |
255 | switch ( $service ) { |
256 | case 'geoshape': |
257 | case 'geopoint': |
258 | case 'geoline': |
259 | case 'geomask': |
260 | $query = [ 'getgeojson' => 1 ]; |
261 | if ( isset( $object->ids ) ) { |
262 | $query['ids'] = |
263 | is_array( $object->ids ) ? implode( ',', $object->ids ) |
264 | : preg_replace( '/\s*,\s*/', ',', $object->ids ); |
265 | } |
266 | if ( isset( $object->query ) ) { |
267 | $query['query'] = $object->query; |
268 | } |
269 | $ret->url = $this->mapServer . '/' . |
270 | // 'geomask' service is the same as inverted geoshape service |
271 | // Kartotherian does not support it, request it as geoshape |
272 | ( $service === 'geomask' ? 'geoshape' : $service ) . |
273 | '?' . wfArrayToCgi( $query ); |
274 | if ( isset( $object->properties ) ) { |
275 | $ret->properties = $object->properties; |
276 | } |
277 | break; |
278 | |
279 | case 'page': |
280 | $jct = JCSingleton::parseTitle( $object->title, NS_DATA ); |
281 | if ( !$jct || JCSingleton::getContentClass( $jct->getConfig()->model ) !== |
282 | JCMapDataContent::class |
283 | ) { |
284 | return StatusValue::newFatal( 'kartographer-error-title', $object->title ); |
285 | } |
286 | $query = [ |
287 | 'format' => 'json', |
288 | 'formatversion' => '2', |
289 | 'action' => 'jsondata', |
290 | 'title' => $jct->getText(), |
291 | ]; |
292 | $ret->url = wfScript( 'api' ) . '?' . wfArrayToCgi( $query ); |
293 | break; |
294 | |
295 | default: |
296 | throw new InvalidArgumentException( "Unexpected service name '$service'" ); |
297 | } |
298 | |
299 | $object = $ret; |
300 | return StatusValue::newGood(); |
301 | } |
302 | |
303 | /** |
304 | * HACK: this function supports JsonConfig-style localization that doesn't pass validation |
305 | * |
306 | * @param stdClass $properties |
307 | */ |
308 | private function parseWikitextProperties( stdClass $properties ) { |
309 | $saveUnparsed = $this->options['saveUnparsed'] ?? false; |
310 | |
311 | foreach ( self::WIKITEXT_PROPERTIES as $prop ) { |
312 | if ( !property_exists( $properties, $prop ) ) { |
313 | continue; |
314 | } |
315 | |
316 | $origProp = "_orig$prop"; |
317 | $property = &$properties->$prop; |
318 | |
319 | if ( is_string( $property ) && $property !== '' ) { |
320 | if ( $saveUnparsed ) { |
321 | $properties->$origProp = $property; |
322 | } |
323 | $property = $this->parser->parseWikitext( $property ); |
324 | } elseif ( is_object( $property ) ) { |
325 | if ( $saveUnparsed ) { |
326 | $properties->$origProp = (object)[]; |
327 | } |
328 | foreach ( $property as $language => &$text ) { |
329 | if ( !is_string( $text ) || $text === '' ) { |
330 | unset( $property->$language ); |
331 | } else { |
332 | if ( $saveUnparsed ) { |
333 | $properties->$origProp->$language = $text; |
334 | } |
335 | $text = $this->parser->parseWikitext( $text ); |
336 | } |
337 | } |
338 | unset( $text ); |
339 | |
340 | // Delete empty localizations |
341 | if ( !get_object_vars( $property ) ) { |
342 | unset( $properties->$prop ); |
343 | unset( $properties->$origProp ); |
344 | } |
345 | } else { |
346 | // Dunno what the hell it is, ditch |
347 | unset( $properties->$prop ); |
348 | } |
349 | } |
350 | } |
351 | |
352 | } |