Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
98.65% |
146 / 148 |
|
91.67% |
11 / 12 |
CRAP | |
0.00% |
0 / 1 |
| SimpleStyleParser | |
98.65% |
146 / 148 |
|
91.67% |
11 / 12 |
74 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| newFromParser | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
| parse | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
4.59 | |||
| parseObject | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
| normalizeAndSanitize | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| updateMarkerSymbolCounters | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
9 | |||
| findFirstMarkerSymbol | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
7 | |||
| validateGeoJSON | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
7 | |||
| recursivelySanitizeAndParseWikitext | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
8 | |||
| recursivelyNormalizeExternalData | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
6 | |||
| normalizeExternalDataServices | |
100.00% |
41 / 41 |
|
100.00% |
1 / 1 |
14 | |||
| parseWikitextProperties | |
100.00% |
24 / 24 |
|
100.00% |
1 / 1 |
13 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace Kartographer; |
| 4 | |
| 5 | use InvalidArgumentException; |
| 6 | use JsonConfig\JCMapDataContent; |
| 7 | use JsonConfig\JCSingleton; |
| 8 | use JsonSchema\Validator; |
| 9 | use MediaWiki\Config\Config; |
| 10 | use MediaWiki\Json\FormatJson; |
| 11 | use MediaWiki\Parser\Parser; |
| 12 | use MediaWiki\Parser\PPFrame; |
| 13 | use StatusValue; |
| 14 | use stdClass; |
| 15 | |
| 16 | /** |
| 17 | * Parses and sanitizes text properties of GeoJSON/simplestyle by putting them through the MediaWiki |
| 18 | * wikitext parser. |
| 19 | * |
| 20 | * @license MIT |
| 21 | */ |
| 22 | class SimpleStyleParser { |
| 23 | |
| 24 | /** |
| 25 | * Maximum for marker-symbol="-number…" counters. See T141335 for discussion to possibly |
| 26 | * increase this to 199 or even 999. |
| 27 | */ |
| 28 | private const MAX_NUMERIC_COUNTER = 99; |
| 29 | public const WIKITEXT_PROPERTIES = [ 'title', 'description' ]; |
| 30 | |
| 31 | private readonly string $mapServer; |
| 32 | |
| 33 | /** |
| 34 | * @param WikitextParser $parser |
| 35 | * @param Config $config |
| 36 | * @param array $options Set ['saveUnparsed' => true] to back up the original values of title |
| 37 | * and description in _origtitle and _origdescription |
| 38 | */ |
| 39 | public function __construct( |
| 40 | private readonly WikitextParser $parser, |
| 41 | Config $config, |
| 42 | private readonly array $options = [], |
| 43 | ) { |
| 44 | // @fixme: More precise config? |
| 45 | $this->mapServer = $config->get( 'KartographerMapServer' ); |
| 46 | } |
| 47 | |
| 48 | public static function newFromParser( |
| 49 | Parser $parser, |
| 50 | Config $config, |
| 51 | ?PPFrame $frame = null, |
| 52 | array $options = [] |
| 53 | ): self { |
| 54 | return new self( |
| 55 | new MediaWikiWikitextParser( $parser, $frame ), |
| 56 | $config, |
| 57 | $options |
| 58 | ); |
| 59 | } |
| 60 | |
| 61 | /** |
| 62 | * Parses string into JSON and performs validation/sanitization |
| 63 | * |
| 64 | * @param string|null $input |
| 65 | * @return StatusValue<array> with the value being [ 'data' => stdClass[], 'schema-errors' => array[] ] |
| 66 | */ |
| 67 | public function parse( ?string $input ): StatusValue { |
| 68 | if ( !$input || trim( $input ) === '' ) { |
| 69 | return StatusValue::newGood( [ 'data' => [] ] ); |
| 70 | } |
| 71 | |
| 72 | $status = FormatJson::parse( $input, FormatJson::TRY_FIXING | FormatJson::STRIP_COMMENTS ); |
| 73 | if ( !$status->isOK() ) { |
| 74 | return StatusValue::newFatal( 'kartographer-error-json', $status->getMessage() ); |
| 75 | } |
| 76 | |
| 77 | // @phan-suppress-next-line PhanTypeMismatchPropertyByRef phan confused by generic and pass-by-ref |
| 78 | return $this->parseObject( $status->value ); |
| 79 | } |
| 80 | |
| 81 | /** |
| 82 | * Validate and sanitize a parsed GeoJSON data object |
| 83 | * |
| 84 | * @param array|stdClass &$data |
| 85 | * @return StatusValue<array> |
| 86 | */ |
| 87 | public function parseObject( &$data ): StatusValue { |
| 88 | if ( !is_array( $data ) ) { |
| 89 | $data = [ $data ]; |
| 90 | } |
| 91 | $status = $this->validateGeoJSON( $data ); |
| 92 | if ( $status->isOK() ) { |
| 93 | $status = $this->normalizeAndSanitize( $data ); |
| 94 | } |
| 95 | return $status; |
| 96 | } |
| 97 | |
| 98 | /** |
| 99 | * @param stdClass[]|stdClass &$data |
| 100 | * @return StatusValue<array> |
| 101 | */ |
| 102 | public function normalizeAndSanitize( &$data ): StatusValue { |
| 103 | $status = $this->recursivelyNormalizeExternalData( $data ); |
| 104 | $this->recursivelySanitizeAndParseWikitext( $data ); |
| 105 | return $status; |
| 106 | } |
| 107 | |
| 108 | /** |
| 109 | * @param stdClass[] $values |
| 110 | * @param array<string,int> &$counters |
| 111 | * @return array{string,stdClass}|null [ string $firstMarkerSymbol, stdClass $firstMarkerProperties ] |
| 112 | */ |
| 113 | public static function updateMarkerSymbolCounters( array $values, array &$counters = [] ): ?array { |
| 114 | $firstMarker = null; |
| 115 | foreach ( $values as $item ) { |
| 116 | // While the input should be validated, it's still arbitrary user input. |
| 117 | if ( !( $item instanceof stdClass ) ) { |
| 118 | continue; |
| 119 | } |
| 120 | |
| 121 | $marker = $item->properties->{'marker-symbol'} ?? ''; |
| 122 | $isNumber = str_starts_with( $marker, '-number' ); |
| 123 | if ( $isNumber || str_starts_with( $marker, '-letter' ) ) { |
| 124 | // numbers 1..99 or letters a..z |
| 125 | $count = $counters[$marker] ?? 0; |
| 126 | if ( $count < ( $isNumber ? self::MAX_NUMERIC_COUNTER : 26 ) ) { |
| 127 | $counters[$marker] = ++$count; |
| 128 | } |
| 129 | $marker = $isNumber ? strval( $count ) : chr( ord( 'a' ) + $count - 1 ); |
| 130 | $item->properties->{'marker-symbol'} = $marker; |
| 131 | // GeoJSON is in lowercase, but the letter is shown as uppercase |
| 132 | $firstMarker ??= [ mb_strtoupper( $marker ), $item->properties ]; |
| 133 | } |
| 134 | |
| 135 | // Recurse into FeatureCollection and GeometryCollection |
| 136 | $features = $item->features ?? $item->geometries ?? null; |
| 137 | if ( $features ) { |
| 138 | $firstMarker ??= self::updateMarkerSymbolCounters( $features, $counters ); |
| 139 | } |
| 140 | } |
| 141 | return $firstMarker; |
| 142 | } |
| 143 | |
| 144 | /** |
| 145 | * @param stdClass[] $values |
| 146 | * @return array{string,stdClass}|null Same as {@see updateMarkerSymbolCounters}, but with the |
| 147 | * $firstMarkerSymbol name not updated |
| 148 | */ |
| 149 | public static function findFirstMarkerSymbol( array $values ): ?array { |
| 150 | foreach ( $values as $item ) { |
| 151 | // While the input should be validated, it's still arbitrary user input. |
| 152 | if ( !( $item instanceof stdClass ) ) { |
| 153 | continue; |
| 154 | } |
| 155 | |
| 156 | $marker = $item->properties->{'marker-symbol'} ?? ''; |
| 157 | if ( str_starts_with( $marker, '-number' ) || str_starts_with( $marker, '-letter' ) ) { |
| 158 | return [ $marker, $item->properties ]; |
| 159 | } |
| 160 | |
| 161 | // Recurse into FeatureCollection and GeometryCollection |
| 162 | $features = $item->features ?? $item->geometries ?? null; |
| 163 | if ( $features ) { |
| 164 | $found = self::findFirstMarkerSymbol( $features ); |
| 165 | if ( $found ) { |
| 166 | return $found; |
| 167 | } |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | return null; |
| 172 | } |
| 173 | |
| 174 | /** |
| 175 | * @param stdClass[] $data |
| 176 | * @return StatusValue |
| 177 | */ |
| 178 | private function validateGeoJSON( array $data ): StatusValue { |
| 179 | // Basic top-level validation. The JSON schema validation below does this again, but gives |
| 180 | // terrible, very hard to understand error messages. |
| 181 | foreach ( $data as $geoJSON ) { |
| 182 | if ( !( $geoJSON instanceof stdClass ) ) { |
| 183 | return StatusValue::newFatal( 'kartographer-error-json-object' ); |
| 184 | } |
| 185 | if ( !isset( $geoJSON->type ) || !is_string( $geoJSON->type ) || !$geoJSON->type ) { |
| 186 | return StatusValue::newFatal( 'kartographer-error-json-type' ); |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | $schema = (object)[ '$ref' => 'file://' . dirname( __DIR__ ) . '/schemas/geojson.json' ]; |
| 191 | $validator = new Validator(); |
| 192 | $validator->check( $data, $schema ); |
| 193 | |
| 194 | if ( !$validator->isValid() ) { |
| 195 | $errors = $validator->getErrors( Validator::ERROR_DOCUMENT_VALIDATION ); |
| 196 | $status = StatusValue::newFatal( 'kartographer-error-bad_data' ); |
| 197 | $status->setResult( false, [ 'schema-errors' => $errors ] ); |
| 198 | return $status; |
| 199 | } |
| 200 | |
| 201 | return StatusValue::newGood(); |
| 202 | } |
| 203 | |
| 204 | /** |
| 205 | * Performs recursive sanitizaton. |
| 206 | * Does not attempt to be smart, just recurses through everything that can be dangerous even |
| 207 | * if not a valid GeoJSON. |
| 208 | * |
| 209 | * @param stdClass[]|stdClass &$json |
| 210 | */ |
| 211 | private function recursivelySanitizeAndParseWikitext( &$json ): void { |
| 212 | if ( is_array( $json ) ) { |
| 213 | foreach ( $json as &$element ) { |
| 214 | $this->recursivelySanitizeAndParseWikitext( $element ); |
| 215 | } |
| 216 | } elseif ( is_object( $json ) ) { |
| 217 | foreach ( array_keys( get_object_vars( $json ) ) as $prop ) { |
| 218 | // https://phabricator.wikimedia.org/T134719 |
| 219 | if ( str_starts_with( $prop, '_' ) ) { |
| 220 | unset( $json->$prop ); |
| 221 | } else { |
| 222 | $this->recursivelySanitizeAndParseWikitext( $json->$prop ); |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | if ( isset( $json->properties ) && is_object( $json->properties ) ) { |
| 227 | $this->parseWikitextProperties( $json->properties ); |
| 228 | } |
| 229 | } |
| 230 | } |
| 231 | |
| 232 | /** |
| 233 | * @param stdClass[]|stdClass &$json |
| 234 | * @return StatusValue<array> |
| 235 | */ |
| 236 | private function recursivelyNormalizeExternalData( &$json ): StatusValue { |
| 237 | $status = StatusValue::newGood(); |
| 238 | if ( is_array( $json ) ) { |
| 239 | foreach ( $json as &$element ) { |
| 240 | $status->merge( $this->recursivelyNormalizeExternalData( $element ) ); |
| 241 | } |
| 242 | unset( $element ); |
| 243 | } elseif ( is_object( $json ) && isset( $json->type ) && $json->type === 'ExternalData' ) { |
| 244 | $status->merge( $this->normalizeExternalDataServices( $json ) ); |
| 245 | } |
| 246 | $status->value = [ 'data' => $json ]; |
| 247 | |
| 248 | return $status; |
| 249 | } |
| 250 | |
| 251 | /** |
| 252 | * Canonicalizes an ExternalData object |
| 253 | * |
| 254 | * @param stdClass &$object |
| 255 | * @return StatusValue |
| 256 | */ |
| 257 | private function normalizeExternalDataServices( stdClass &$object ): StatusValue { |
| 258 | $service = $object->service ?? null; |
| 259 | $ret = (object)[ |
| 260 | 'type' => 'ExternalData', |
| 261 | 'service' => $service, |
| 262 | ]; |
| 263 | |
| 264 | switch ( $service ) { |
| 265 | case 'geoshape': |
| 266 | case 'geopoint': |
| 267 | case 'geoline': |
| 268 | case 'geomask': |
| 269 | $query = [ 'getgeojson' => 1 ]; |
| 270 | if ( isset( $object->ids ) ) { |
| 271 | $query['ids'] = |
| 272 | is_array( $object->ids ) ? implode( ',', $object->ids ) |
| 273 | : preg_replace( '/\s*,\s*/', ',', $object->ids ); |
| 274 | } |
| 275 | if ( isset( $object->query ) ) { |
| 276 | $query['query'] = $object->query; |
| 277 | } |
| 278 | $ret->url = $this->mapServer . '/' . |
| 279 | // 'geomask' service is the same as inverted geoshape service |
| 280 | // Kartotherian does not support it, request it as geoshape |
| 281 | ( $service === 'geomask' ? 'geoshape' : $service ) . |
| 282 | '?' . wfArrayToCgi( $query ); |
| 283 | if ( isset( $object->properties ) ) { |
| 284 | $ret->properties = $object->properties; |
| 285 | } |
| 286 | break; |
| 287 | |
| 288 | case 'page': |
| 289 | $jct = JCSingleton::parseTitle( $object->title, NS_DATA ); |
| 290 | if ( !$jct || JCSingleton::getContentClass( $jct->getConfig()->model ) !== |
| 291 | JCMapDataContent::class |
| 292 | ) { |
| 293 | return StatusValue::newFatal( 'kartographer-error-title', $object->title ); |
| 294 | } |
| 295 | $query = [ |
| 296 | 'format' => 'json', |
| 297 | 'formatversion' => '2', |
| 298 | 'action' => 'jsondata', |
| 299 | 'title' => $jct->getText(), |
| 300 | 'uselang' => $this->options['uselang'] ?? null, |
| 301 | ]; |
| 302 | $ret->url = wfScript( 'api' ) . '?' . wfArrayToCgi( $query ); |
| 303 | break; |
| 304 | |
| 305 | default: |
| 306 | throw new InvalidArgumentException( "Unexpected service name '$service'" ); |
| 307 | } |
| 308 | |
| 309 | $object = $ret; |
| 310 | return StatusValue::newGood(); |
| 311 | } |
| 312 | |
| 313 | /** |
| 314 | * HACK: this function supports JsonConfig-style localization that doesn't pass validation |
| 315 | */ |
| 316 | private function parseWikitextProperties( stdClass $properties ) { |
| 317 | $saveUnparsed = $this->options['saveUnparsed'] ?? false; |
| 318 | |
| 319 | foreach ( self::WIKITEXT_PROPERTIES as $prop ) { |
| 320 | if ( !property_exists( $properties, $prop ) ) { |
| 321 | continue; |
| 322 | } |
| 323 | |
| 324 | $origProp = "_orig$prop"; |
| 325 | $property = &$properties->$prop; |
| 326 | |
| 327 | if ( is_string( $property ) && $property !== '' ) { |
| 328 | if ( $saveUnparsed ) { |
| 329 | $properties->$origProp = $property; |
| 330 | } |
| 331 | $property = $this->parser->parseWikitext( $property ); |
| 332 | } elseif ( is_object( $property ) ) { |
| 333 | if ( $saveUnparsed ) { |
| 334 | $properties->$origProp = (object)[]; |
| 335 | } |
| 336 | foreach ( $property as $language => &$text ) { |
| 337 | if ( !is_string( $text ) || $text === '' ) { |
| 338 | unset( $property->$language ); |
| 339 | } else { |
| 340 | if ( $saveUnparsed ) { |
| 341 | $properties->$origProp->$language = $text; |
| 342 | } |
| 343 | $text = $this->parser->parseWikitext( $text ); |
| 344 | } |
| 345 | } |
| 346 | unset( $text ); |
| 347 | |
| 348 | // Delete empty localizations |
| 349 | if ( !get_object_vars( $property ) ) { |
| 350 | unset( $properties->$prop ); |
| 351 | unset( $properties->$origProp ); |
| 352 | } |
| 353 | } else { |
| 354 | // Dunno what the hell it is, ditch |
| 355 | unset( $properties->$prop ); |
| 356 | } |
| 357 | } |
| 358 | } |
| 359 | |
| 360 | } |