Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 407 |
|
0.00% |
0 / 13 |
CRAP | |
0.00% |
0 / 1 |
| Test | |
0.00% |
0 / 407 |
|
0.00% |
0 / 13 |
20022 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
110 | |||
| __clone | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
| matchesFilter | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
| pageName | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
| computeTestModes | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
| applyManualChanges | |
0.00% |
0 / 117 |
|
0.00% |
0 / 1 |
420 | |||
| applyChanges | |
0.00% |
0 / 96 |
|
0.00% |
0 / 1 |
992 | |||
| isDuplicateChangeTree | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
| generateChanges | |
0.00% |
0 / 65 |
|
0.00% |
0 / 1 |
702 | |||
| testAllModes | |
0.00% |
0 / 38 |
|
0.00% |
0 / 1 |
342 | |||
| normalizeHTML | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
240 | |||
| normalizeKnownFailure | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| normalizeWT | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\ParserTests; |
| 5 | |
| 6 | use Error; |
| 7 | use Psr\Log\LogLevel; |
| 8 | use Wikimedia\Alea\Alea; |
| 9 | use Wikimedia\Assert\Assert; |
| 10 | use Wikimedia\Parsoid\DOM\Document; |
| 11 | use Wikimedia\Parsoid\DOM\Element; |
| 12 | use Wikimedia\Parsoid\DOM\Node; |
| 13 | use Wikimedia\Parsoid\Utils\ContentUtils; |
| 14 | use Wikimedia\Parsoid\Utils\DOMCompat; |
| 15 | use Wikimedia\Parsoid\Utils\DOMUtils; |
| 16 | use Wikimedia\Parsoid\Utils\PHPUtils; |
| 17 | use Wikimedia\Parsoid\Utils\Utils; |
| 18 | use Wikimedia\Parsoid\Utils\WTUtils; |
| 19 | |
| 20 | /** |
| 21 | * Represents a parser test |
| 22 | */ |
| 23 | class Test extends Item { |
| 24 | |
| 25 | // 'testAllModes' and 'TestRunner::runTest' assume that test modes are added |
| 26 | // in this order for caching to work properly (and even when test objects are cloned). |
| 27 | // This ordering is enforced in computeTestModes. |
| 28 | public const ALL_TEST_MODES = [ 'wt2html', 'wt2wt', 'html2html', 'html2wt', 'selser' ]; |
| 29 | |
| 30 | /* --- These are test properties from the test file --- */ |
| 31 | |
| 32 | /** This is the test name, not page title for the test */ |
| 33 | public ?string $testName = null; |
| 34 | |
| 35 | /** @var array<string,string|bool|array> */ |
| 36 | public array $options = []; |
| 37 | |
| 38 | /** @var array<string,string|array> */ |
| 39 | public array $config = []; |
| 40 | |
| 41 | /** @var array<string,string> */ |
| 42 | public array $sections = []; |
| 43 | |
| 44 | /** @var array Known failures for this test, indexed by testing mode. */ |
| 45 | public array $knownFailures = []; |
| 46 | |
| 47 | /* --- These next are computed based on an ordered list of preferred |
| 48 | * section keys --- */ |
| 49 | |
| 50 | public ?string $wikitext = null; |
| 51 | |
| 52 | public ?string $parsoidHtml = null; |
| 53 | |
| 54 | public ?string $legacyHtml = null; |
| 55 | |
| 56 | /* --- The rest below are computed by Parsoid while running tests -- */ |
| 57 | |
| 58 | private ?string $pageName = null; |
| 59 | |
| 60 | private ?int $pageNs = null; |
| 61 | |
| 62 | /** @var list */ |
| 63 | public array $selserChangeTrees = []; |
| 64 | |
| 65 | /** @var ?list */ |
| 66 | public ?array $changetree = null; |
| 67 | |
| 68 | public bool $duplicateChange = false; |
| 69 | |
| 70 | public ?string $seed = null; |
| 71 | |
| 72 | public ?string $resultWT = null; |
| 73 | |
| 74 | public ?bool $wt2wtPassed = null; |
| 75 | |
| 76 | public ?string $wt2wtResult = null; |
| 77 | |
| 78 | public ?string $selser = null; |
| 79 | |
| 80 | public ?string $changedHTMLStr = null; |
| 81 | |
| 82 | public ?string $cachedBODYstr = null; |
| 83 | |
| 84 | public ?string $cachedWTstr = null; |
| 85 | |
| 86 | public ?string $cachedNormalizedHTML = null; |
| 87 | |
| 88 | public array $time = []; |
| 89 | |
| 90 | private const DIRECT_KEYS = [ |
| 91 | 'type', |
| 92 | 'filename', |
| 93 | 'lineNumStart', |
| 94 | 'lineNumEnd', |
| 95 | 'testName', |
| 96 | 'options', |
| 97 | 'config', |
| 98 | ]; |
| 99 | private const WIKITEXT_KEYS = [ |
| 100 | 'wikitext', |
| 101 | # deprecated |
| 102 | 'input', |
| 103 | ]; |
| 104 | private const LEGACY_HTML_KEYS = [ |
| 105 | 'html/php', 'html/*', 'html', |
| 106 | # deprecated |
| 107 | 'result', |
| 108 | 'html/php+tidy', |
| 109 | 'html/*+tidy', |
| 110 | 'html+tidy', |
| 111 | ]; |
| 112 | private const PARSOID_HTML_KEYS = [ |
| 113 | 'html/parsoid', 'html/*', 'html', |
| 114 | # deprecated |
| 115 | 'result', |
| 116 | 'html/*+tidy', |
| 117 | 'html+tidy', |
| 118 | ]; |
| 119 | private const WARN_DEPRECATED_KEYS = [ |
| 120 | 'input', |
| 121 | 'result', |
| 122 | 'html/php+tidy', |
| 123 | 'html/*+tidy', |
| 124 | 'html+tidy', |
| 125 | 'html/php+untidy', |
| 126 | 'html+untidy', |
| 127 | ]; |
| 128 | |
| 129 | /** |
| 130 | * @param array $testProperties key-value mapping of properties |
| 131 | * @param array $knownFailures Known failures for this test, indexed by testing mode |
| 132 | * @param ?string $comment Optional comment describing the test |
| 133 | * @param ?callable $warnFunc Optional callback used to emit |
| 134 | * deprecation warnings. |
| 135 | */ |
| 136 | public function __construct( |
| 137 | array $testProperties, |
| 138 | array $knownFailures = [], |
| 139 | ?string $comment = null, |
| 140 | ?callable $warnFunc = null |
| 141 | ) { |
| 142 | parent::__construct( $testProperties, $comment ); |
| 143 | $this->knownFailures = $knownFailures; |
| 144 | |
| 145 | foreach ( $testProperties as $key => $value ) { |
| 146 | if ( in_array( $key, self::DIRECT_KEYS, true ) ) { |
| 147 | $this->$key = $value; |
| 148 | } else { |
| 149 | if ( isset( $this->sections[$key] ) ) { |
| 150 | $this->error( "Duplicate test section", $key ); |
| 151 | } |
| 152 | $this->sections[$key] = $value; |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | # Priority order for wikitext, legacyHtml, and parsoidHtml properties |
| 157 | $cats = [ |
| 158 | 'wikitext' => self::WIKITEXT_KEYS, |
| 159 | 'legacyHtml' => self::LEGACY_HTML_KEYS, |
| 160 | 'parsoidHtml' => self::PARSOID_HTML_KEYS, |
| 161 | ]; |
| 162 | foreach ( $cats as $prop => $keys ) { |
| 163 | foreach ( $keys as $key ) { |
| 164 | if ( isset( $this->sections[$key] ) ) { |
| 165 | $this->$prop = $this->sections[$key]; |
| 166 | break; |
| 167 | } |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | # Deprecation warnings |
| 172 | if ( $warnFunc ) { |
| 173 | foreach ( self::WARN_DEPRECATED_KEYS as $key ) { |
| 174 | if ( isset( $this->sections[$key] ) ) { |
| 175 | $warnFunc( $this->errorMsg( |
| 176 | "Parser test section $key is deprecated" |
| 177 | ) ); |
| 178 | } |
| 179 | } |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | public function __clone() { |
| 184 | // Properties that need deep cloning |
| 185 | foreach ( [ 'options', 'config', 'sections', |
| 186 | 'knownFailures', 'selserChangeTrees', 'time' ] as $f ) { |
| 187 | $this->$f = Utils::cloneArray( $this->$f ); |
| 188 | } |
| 189 | if ( $this->changetree !== null ) { |
| 190 | $this->changetree = Utils::cloneArray( $this->changetree ); |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | /** |
| 195 | * @param array $testFilter Test Filter as set in TestRunner |
| 196 | * @return bool if test matches the filter |
| 197 | */ |
| 198 | public function matchesFilter( $testFilter ): bool { |
| 199 | if ( !$testFilter ) { |
| 200 | return true; // Trivial match |
| 201 | } |
| 202 | |
| 203 | if ( !empty( $testFilter['regex'] ) ) { |
| 204 | $regex = isset( $testFilter['raw'] ) ? |
| 205 | ( '/' . $testFilter['raw'] . '/' ) : |
| 206 | $testFilter['regex']; |
| 207 | return (bool)preg_match( $regex, $this->testName ); |
| 208 | } |
| 209 | |
| 210 | if ( !empty( $testFilter['string'] ) ) { |
| 211 | return strpos( $this->testName, $testFilter['raw'] ) !== false; |
| 212 | } |
| 213 | |
| 214 | return true; // Trivial match because of a bad test filter |
| 215 | } |
| 216 | |
| 217 | public function pageName(): string { |
| 218 | if ( !$this->pageName ) { |
| 219 | $this->pageName = $this->options['title'] ?? 'Parser test'; |
| 220 | if ( is_array( $this->pageName ) ) { |
| 221 | $this->pageName = 'Parser test'; |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | return $this->pageName; |
| 226 | } |
| 227 | |
| 228 | /** |
| 229 | * Given a test runner that runs in a specific set of test modes ($testRunnerModes) |
| 230 | * compute the list of valid test modes based on what modes have been enabled on the |
| 231 | * test itself. |
| 232 | * |
| 233 | * @param array $testRunnerModes What test modes is the test runner running with? |
| 234 | * @return array |
| 235 | */ |
| 236 | public function computeTestModes( array $testRunnerModes ): array { |
| 237 | // Ensure we compute valid modes in the order specificed in ALL_TEST_MODES since |
| 238 | // caching in the presence of test cloning rely on tests running in this order. |
| 239 | $validModes = array_intersect( self::ALL_TEST_MODES, $testRunnerModes ); |
| 240 | |
| 241 | // Filter for modes the test has opted in for |
| 242 | $testModes = $this->options['parsoid']['modes'] ?? null; |
| 243 | if ( $testModes ) { |
| 244 | $selserEnabled = in_array( 'selser', $testRunnerModes, true ); |
| 245 | // Avoid filtering out the selser test |
| 246 | if ( $selserEnabled && |
| 247 | !in_array( 'selser', $testModes, true ) && |
| 248 | in_array( 'wt2wt', $testModes, true ) |
| 249 | ) { |
| 250 | $testModes[] = 'selser'; |
| 251 | } |
| 252 | |
| 253 | $validModes = array_intersect( $validModes, $testModes ); |
| 254 | } |
| 255 | |
| 256 | return $validModes; |
| 257 | } |
| 258 | |
| 259 | // Random string used as selser comment content |
| 260 | public const STATIC_RANDOM_STRING = 'ahseeyooxooZ8Oon0boh'; |
| 261 | |
| 262 | /** |
| 263 | * Apply manually-specified changes, which are provided in a pseudo-jQuery |
| 264 | * format. |
| 265 | * |
| 266 | * @param Document $doc |
| 267 | */ |
| 268 | public function applyManualChanges( Document $doc ) { |
| 269 | $changes = $this->options['parsoid']['changes']; |
| 270 | $err = null; |
| 271 | // changes are specified using jquery methods. |
| 272 | // [x,y,z...] becomes $(x)[y](z....) |
| 273 | // that is, ['fig', 'attr', 'width', '120'] is interpreted as |
| 274 | // $('fig').attr('width', '120') |
| 275 | // See http://api.jquery.com/ for documentation of these methods. |
| 276 | // "contents" as second argument calls the jquery .contents() method |
| 277 | // on the results of the selector in the first argument, which is |
| 278 | // a good way to get at the text and comment nodes |
| 279 | $jquery = [ |
| 280 | 'after' => static function ( Node $node, string $html ) { |
| 281 | $div = null; |
| 282 | $tbl = null; |
| 283 | if ( DOMCompat::nodeName( $node->parentNode ) === 'tbody' ) { |
| 284 | $tbl = $node->ownerDocument->createElement( 'table' ); |
| 285 | DOMCompat::setInnerHTML( $tbl, $html ); |
| 286 | // <tbody> is implicitly added when inner html is set to <tr>..</tr> |
| 287 | DOMUtils::migrateChildren( $tbl->firstChild, $node->parentNode, $node->nextSibling ); |
| 288 | } elseif ( DOMCompat::nodeName( $node->parentNode ) === 'tr' ) { |
| 289 | $tbl = $node->ownerDocument->createElement( 'table' ); |
| 290 | DOMCompat::setInnerHTML( $tbl, '<tbody><tr></tr></tbody>' ); |
| 291 | $tr = $tbl->firstChild->firstChild; |
| 292 | '@phan-var Element $tr'; // @var Element $tr |
| 293 | DOMCompat::setInnerHTML( $tr, $html ); |
| 294 | DOMUtils::migrateChildren( $tbl->firstChild->firstChild, |
| 295 | $node->parentNode, $node->nextSibling ); |
| 296 | } else { |
| 297 | $div = $node->ownerDocument->createElement( 'div' ); |
| 298 | DOMCompat::setInnerHTML( $div, $html ); |
| 299 | DOMUtils::migrateChildren( $div, $node->parentNode, $node->nextSibling ); |
| 300 | } |
| 301 | }, |
| 302 | 'append' => static function ( Node $node, string $html ) { |
| 303 | if ( DOMCompat::nodeName( $node ) === 'tr' ) { |
| 304 | $tbl = $node->ownerDocument->createElement( 'table' ); |
| 305 | DOMCompat::setInnerHTML( $tbl, $html ); |
| 306 | // <tbody> is implicitly added when inner html is set to <tr>..</tr> |
| 307 | DOMUtils::migrateChildren( $tbl->firstChild, $node ); |
| 308 | } else { |
| 309 | $div = $node->ownerDocument->createElement( 'div' ); |
| 310 | DOMCompat::setInnerHTML( $div, $html ); |
| 311 | DOMUtils::migrateChildren( $div, $node ); |
| 312 | } |
| 313 | }, |
| 314 | 'attr' => static function ( Node $node, string $name, string $val ) { |
| 315 | '@phan-var Element $node'; // @var Element $node |
| 316 | $node->setAttribute( $name, $val ); |
| 317 | }, |
| 318 | 'before' => static function ( Node $node, string $html ) { |
| 319 | $div = null; |
| 320 | $tbl = null; |
| 321 | if ( DOMCompat::nodeName( $node->parentNode ) === 'tbody' ) { |
| 322 | $tbl = $node->ownerDocument->createElement( 'table' ); |
| 323 | DOMCompat::setInnerHTML( $tbl, $html ); |
| 324 | // <tbody> is implicitly added when inner html is set to <tr>..</tr> |
| 325 | DOMUtils::migrateChildren( $tbl->firstChild, $node->parentNode, $node ); |
| 326 | } elseif ( DOMCompat::nodeName( $node->parentNode ) === 'tr' ) { |
| 327 | $tbl = $node->ownerDocument->createElement( 'table' ); |
| 328 | DOMCompat::setInnerHTML( $tbl, '<tbody><tr></tr></tbody>' ); |
| 329 | $tr = $tbl->firstChild->firstChild; |
| 330 | '@phan-var Element $tr'; // @var Element $tr |
| 331 | DOMCompat::setInnerHTML( $tr, $html ); |
| 332 | DOMUtils::migrateChildren( $tbl->firstChild->firstChild, $node->parentNode, $node ); |
| 333 | } else { |
| 334 | $div = $node->ownerDocument->createElement( 'div' ); |
| 335 | DOMCompat::setInnerHTML( $div, $html ); |
| 336 | DOMUtils::migrateChildren( $div, $node->parentNode, $node ); |
| 337 | } |
| 338 | }, |
| 339 | 'removeAttr' => static function ( Node $node, string $name ) { |
| 340 | '@phan-var Element $node'; // @var Element $node |
| 341 | $node->removeAttribute( $name ); |
| 342 | }, |
| 343 | 'removeClass' => static function ( Node $node, string $c ) { |
| 344 | '@phan-var Element $node'; // @var Element $node |
| 345 | DOMCompat::getClassList( $node )->remove( $c ); |
| 346 | }, |
| 347 | 'addClass' => static function ( Node $node, string $c ) { |
| 348 | '@phan-var Element $node'; // @var Element $node |
| 349 | DOMCompat::getClassList( $node )->add( $c ); |
| 350 | }, |
| 351 | 'text' => static function ( Node $node, string $t ) { |
| 352 | $node->textContent = $t; |
| 353 | }, |
| 354 | 'html' => static function ( Node $node, string $h ) { |
| 355 | '@phan-var Element $node'; // @var Element $node |
| 356 | DOMCompat::setInnerHTML( $node, $h ); |
| 357 | }, |
| 358 | 'remove' => static function ( Node $node, ?string $optSelector = null ) { |
| 359 | // jquery lets us specify an optional selector to further |
| 360 | // restrict the removed elements. |
| 361 | // text nodes don't have the "querySelectorAll" method, so |
| 362 | // just include them by default (jquery excludes them, which |
| 363 | // is less useful) |
| 364 | if ( !$optSelector ) { |
| 365 | $what = [ $node ]; |
| 366 | } elseif ( !( $node instanceof Element ) ) { |
| 367 | $what = [ $node ];/* text node hack! */ |
| 368 | } else { |
| 369 | '@phan-var Element $node'; // @var Element $node |
| 370 | $what = DOMCompat::querySelectorAll( $node, $optSelector ); |
| 371 | } |
| 372 | foreach ( $what as $node ) { |
| 373 | if ( $node->parentNode ) { |
| 374 | $node->parentNode->removeChild( $node ); |
| 375 | } |
| 376 | } |
| 377 | }, |
| 378 | 'empty' => static function ( Node $node ) { |
| 379 | '@phan-var Element $node'; // @var Element $node |
| 380 | DOMCompat::replaceChildren( $node ); |
| 381 | }, |
| 382 | 'wrap' => static function ( Node $node, string $w ) { |
| 383 | $frag = $node->ownerDocument->createElement( 'div' ); |
| 384 | DOMCompat::setInnerHTML( $frag, $w ); |
| 385 | $first = $frag->firstChild; |
| 386 | $node->parentNode->replaceChild( $first, $node ); |
| 387 | while ( $first->firstChild ) { |
| 388 | $first = $first->firstChild; |
| 389 | } |
| 390 | $first->appendChild( $node ); |
| 391 | } |
| 392 | ]; |
| 393 | |
| 394 | $body = DOMCompat::getBody( $doc ); |
| 395 | |
| 396 | foreach ( $changes as $change ) { |
| 397 | if ( $err ) { |
| 398 | continue; |
| 399 | } |
| 400 | if ( count( $change ) < 2 ) { |
| 401 | $err = new Error( 'bad change: ' . $change ); |
| 402 | continue; |
| 403 | } |
| 404 | // use document.querySelectorAll as a poor man's $(...) |
| 405 | $els = PHPUtils::iterable_to_array( |
| 406 | DOMCompat::querySelectorAll( $body, $change[0] ) |
| 407 | ); |
| 408 | if ( !count( $els ) ) { |
| 409 | $err = new Error( $change[0] . |
| 410 | ' did not match any elements: ' . DOMCompat::getOuterHTML( $body ) ); |
| 411 | continue; |
| 412 | } |
| 413 | if ( $change[1] === 'contents' ) { |
| 414 | $change = array_slice( $change, 1 ); |
| 415 | $acc = []; |
| 416 | foreach ( $els as $el ) { |
| 417 | PHPUtils::pushArray( $acc, iterator_to_array( $el->childNodes ) ); |
| 418 | } |
| 419 | $els = $acc; |
| 420 | } |
| 421 | $fn = $jquery[$change[1]] ?? null; |
| 422 | if ( !$fn ) { |
| 423 | $err = new Error( 'bad mutator function: ' . $change[1] ); |
| 424 | continue; |
| 425 | } |
| 426 | foreach ( $els as $el ) { |
| 427 | $fn( $el, ...array_slice( $change, 2 ) ); |
| 428 | } |
| 429 | } |
| 430 | |
| 431 | if ( $err ) { |
| 432 | print TestUtils::colorString( (string)$err, "red" ) . "\n"; |
| 433 | throw $err; |
| 434 | } |
| 435 | } |
| 436 | |
| 437 | /** |
| 438 | * Make changes to a DOM in order to run a selser test on it. |
| 439 | * |
| 440 | * @param array $dumpOpts |
| 441 | * @param Document $doc |
| 442 | * @param array $changelist |
| 443 | */ |
| 444 | public function applyChanges( array $dumpOpts, Document $doc, array $changelist ) { |
| 445 | $logger = $dumpOpts['logger'] ?? null; |
| 446 | // Seed the random-number generator based on the item title and changelist |
| 447 | $alea = new Alea( ( json_encode( $changelist ) ) . ( $this->testName ?? '' ) ); |
| 448 | |
| 449 | // Keep the changes in the test object |
| 450 | // to check for duplicates while building tasks |
| 451 | $this->changetree = $changelist; |
| 452 | |
| 453 | // Helper function for getting a random string |
| 454 | $randomString = static function () use ( &$alea ): string { |
| 455 | return base_convert( (string)$alea->uint32(), 10, 36 ); |
| 456 | }; |
| 457 | |
| 458 | $insertNewNode = static function ( Node $n ) use ( $randomString ): void { |
| 459 | // Insert a text node, if not in a fosterable position. |
| 460 | // If in foster position, enter a comment. |
| 461 | // In either case, dom-diff should register a new node |
| 462 | $str = $randomString(); |
| 463 | $ownerDoc = $n->ownerDocument; |
| 464 | $wrapperName = null; |
| 465 | $newNode = null; |
| 466 | |
| 467 | // Don't separate legacy IDs from their H? node. |
| 468 | if ( WTUtils::isFallbackIdSpan( $n ) ) { |
| 469 | $n = $n->nextSibling ?? $n->parentNode; |
| 470 | } |
| 471 | |
| 472 | // For these container nodes, it would be buggy |
| 473 | // to insert text nodes as children |
| 474 | switch ( DOMCompat::nodeName( $n->parentNode ) ) { |
| 475 | case 'ol': |
| 476 | case 'ul': |
| 477 | $wrapperName = 'li'; |
| 478 | break; |
| 479 | case 'dl': |
| 480 | $wrapperName = 'dd'; |
| 481 | break; |
| 482 | case 'tr': |
| 483 | $prev = DOMCompat::getPreviousElementSibling( $n ); |
| 484 | if ( $prev ) { |
| 485 | // TH or TD |
| 486 | $wrapperName = DOMCompat::nodeName( $prev ); |
| 487 | } else { |
| 488 | $next = DOMCompat::getNextElementSibling( $n ); |
| 489 | if ( $next ) { |
| 490 | // TH or TD |
| 491 | $wrapperName = DOMCompat::nodeName( $next ); |
| 492 | } else { |
| 493 | $wrapperName = 'td'; |
| 494 | } |
| 495 | } |
| 496 | break; |
| 497 | case 'body': |
| 498 | $wrapperName = 'p'; |
| 499 | break; |
| 500 | default: |
| 501 | // We're trying to determine if it is safe to place a |
| 502 | // bare text node. If $n is a block, wrap the text node |
| 503 | // we're putting beside it. |
| 504 | if ( DOMUtils::isWikitextBlockNode( $n ) ) { |
| 505 | $wrapperName = 'p'; |
| 506 | } |
| 507 | break; |
| 508 | } |
| 509 | |
| 510 | if ( DOMUtils::isFosterablePosition( $n ) && DOMCompat::nodeName( $n->parentNode ) !== 'tr' ) { |
| 511 | $newNode = $ownerDoc->createComment( $str ); |
| 512 | } elseif ( $wrapperName ) { |
| 513 | $newNode = $ownerDoc->createElement( $wrapperName ); |
| 514 | $newNode->appendChild( $ownerDoc->createTextNode( $str ) ); |
| 515 | } else { |
| 516 | $newNode = $ownerDoc->createTextNode( $str ); |
| 517 | } |
| 518 | |
| 519 | $n->parentNode->insertBefore( $newNode, $n ); |
| 520 | }; |
| 521 | |
| 522 | $removeNode = static function ( Node $n ): void { |
| 523 | $n->parentNode->removeChild( $n ); |
| 524 | }; |
| 525 | |
| 526 | $applyChangesInternal = static function ( Node $node, array $changes ) use ( |
| 527 | &$applyChangesInternal, $removeNode, $insertNewNode, |
| 528 | $randomString, $logger |
| 529 | ): void { |
| 530 | if ( count( $node->childNodes ) < count( $changes ) ) { |
| 531 | throw new Error( "Error: more changes than nodes to apply them to!" ); |
| 532 | } |
| 533 | |
| 534 | // Clone array since we are mutating the children in the changes loop below |
| 535 | $nodeArray = []; |
| 536 | foreach ( $node->childNodes as $n ) { |
| 537 | $nodeArray[] = $n; |
| 538 | } |
| 539 | |
| 540 | foreach ( $changes as $i => $change ) { |
| 541 | $child = $nodeArray[$i]; |
| 542 | |
| 543 | if ( is_array( $change ) ) { |
| 544 | $applyChangesInternal( $child, $change ); |
| 545 | } else { |
| 546 | switch ( $change ) { |
| 547 | // No change |
| 548 | case 0: |
| 549 | break; |
| 550 | |
| 551 | // Change node wrapper |
| 552 | // (sufficient to insert a random attr) |
| 553 | case 1: |
| 554 | if ( $child instanceof Element ) { |
| 555 | $child->setAttribute( 'data-foobar', $randomString() ); |
| 556 | } elseif ( $logger ) { |
| 557 | $logger->log( |
| 558 | LogLevel::ERROR, |
| 559 | 'Buggy changetree. changetype 1 (modify attribute)' . |
| 560 | ' cannot be applied on text/comment nodes.' |
| 561 | ); |
| 562 | } |
| 563 | break; |
| 564 | |
| 565 | // Insert new node before child |
| 566 | case 2: |
| 567 | $insertNewNode( $child ); |
| 568 | break; |
| 569 | |
| 570 | // Delete tree rooted at child |
| 571 | case 3: |
| 572 | $removeNode( $child ); |
| 573 | break; |
| 574 | |
| 575 | // Change tree rooted at child |
| 576 | case 4: |
| 577 | $insertNewNode( $child ); |
| 578 | $removeNode( $child ); |
| 579 | break; |
| 580 | } |
| 581 | |
| 582 | } |
| 583 | } |
| 584 | }; |
| 585 | |
| 586 | $body = DOMCompat::getBody( $doc ); |
| 587 | |
| 588 | if ( $logger && ( $dumpOpts['dom:post-changes'] ?? false ) ) { |
| 589 | $logger->log( LogLevel::ERROR, "----- Original DOM -----" ); |
| 590 | $logger->log( LogLevel::ERROR, ContentUtils::dumpDOM( $body, '', [ 'quiet' => true ] ) ); |
| 591 | } |
| 592 | |
| 593 | if ( $this->changetree === [ 5 ] ) { |
| 594 | // Hack so that we can work on the parent node rather than just the |
| 595 | // children: Append a comment with known content. This is later |
| 596 | // stripped from the output, and the result is compared to the |
| 597 | // original wikitext rather than the non-selser wt2wt result. |
| 598 | $body->appendChild( $doc->createComment( self::STATIC_RANDOM_STRING ) ); |
| 599 | } elseif ( $this->changetree !== [] ) { |
| 600 | $applyChangesInternal( $body, $this->changetree ); |
| 601 | } |
| 602 | |
| 603 | if ( $logger && ( $dumpOpts['dom:post-changes'] ?? false ) ) { |
| 604 | $logger->log( LogLevel::ERROR, "----- Change Tree -----" ); |
| 605 | $logger->log( LogLevel::ERROR, json_encode( $this->changetree ) ); |
| 606 | $logger->log( LogLevel::ERROR, "----- Edited DOM -----" ); |
| 607 | $logger->log( LogLevel::ERROR, ContentUtils::dumpDOM( $body, '', [ 'quiet' => true ] ) ); |
| 608 | } |
| 609 | } |
| 610 | |
| 611 | /** |
| 612 | * For a selser test, check if a change we could make has already been |
| 613 | * tested in this round. |
| 614 | * Used for generating unique tests. |
| 615 | * |
| 616 | * @param array $change Candidate change. |
| 617 | * @return bool |
| 618 | */ |
| 619 | public function isDuplicateChangeTree( array $change ): bool { |
| 620 | $allChanges = $this->selserChangeTrees; |
| 621 | foreach ( $allChanges as $c ) { |
| 622 | if ( $c == $change ) { |
| 623 | return true; |
| 624 | } |
| 625 | } |
| 626 | return false; |
| 627 | } |
| 628 | |
| 629 | /** |
| 630 | * Generate a change object for a document, so we can apply it during a selser test. |
| 631 | * |
| 632 | * @param Document $doc |
| 633 | * @return array The list of changes. |
| 634 | */ |
| 635 | public function generateChanges( Document $doc ): array { |
| 636 | $alea = new Alea( ( $this->seed ?? '' ) . ( $this->testName ?? '' ) ); |
| 637 | |
| 638 | /** |
| 639 | * If no node in the DOM subtree rooted at 'node' is editable in the VE, |
| 640 | * this function should return false. |
| 641 | * |
| 642 | * Currently true for template and extension content, and for entities. |
| 643 | */ |
| 644 | $domSubtreeIsEditable = static function ( Node $node ): bool { |
| 645 | return !( $node instanceof Element ) || |
| 646 | ( !WTUtils::isEncapsulationWrapper( $node ) && |
| 647 | // These wrappers can only be edited in restricted ways. |
| 648 | // Simpler to just block all editing on them. |
| 649 | !DOMUtils::matchTypeOf( $node, |
| 650 | '#^mw:(Entity|Placeholder|DisplaySpace|Annotation|ExtendedAnnRange)(/|$)#' |
| 651 | ) && |
| 652 | // Deleting these wrappers is tantamount to removing the |
| 653 | // references-tag encapsulation wrappers, which results in errors. |
| 654 | !DOMUtils::hasClass( $node, 'mw-references-wrap' ) |
| 655 | ); |
| 656 | }; |
| 657 | |
| 658 | /** |
| 659 | * Even if a DOM subtree might be editable in the VE, |
| 660 | * certain nodes in the DOM might not be directly editable. |
| 661 | * |
| 662 | * Currently, this restriction is only applied to DOMs generated for images. |
| 663 | * Possibly, there are other candidates. |
| 664 | */ |
| 665 | $nodeIsUneditable = static function ( Node $node ) use ( &$nodeIsUneditable ): bool { |
| 666 | // Text and comment nodes are always editable |
| 667 | if ( !( $node instanceof Element ) ) { |
| 668 | return false; |
| 669 | } |
| 670 | |
| 671 | if ( WTUtils::isMarkerAnnotation( $node ) ) { |
| 672 | return true; |
| 673 | } |
| 674 | |
| 675 | // - File wrapper is an uneditable elt. |
| 676 | // - Any node nested in a file wrapper that is not a figcaption |
| 677 | // is an uneditable elt. |
| 678 | // - Entity spans are uneditable as well |
| 679 | // - Placeholder is defined to be uneditable in the spec |
| 680 | // - ExtendedAnnRange is an "unknown" type in the spec, and hence uneditable |
| 681 | return DOMUtils::matchTypeOf( $node, |
| 682 | '#^mw:(File|Entity|Placeholder|DisplaySpace|ExtendedAnnRange)(/|$)#' ) || ( |
| 683 | DOMCompat::nodeName( $node ) !== 'figcaption' && |
| 684 | $node->parentNode && |
| 685 | DOMCompat::nodeName( $node->parentNode ) !== 'body' && |
| 686 | $nodeIsUneditable( $node->parentNode ) |
| 687 | ); |
| 688 | }; |
| 689 | |
| 690 | $defaultChangeType = 0; |
| 691 | |
| 692 | $hasChangeMarkers = static function ( array $list ) use ( |
| 693 | &$hasChangeMarkers, $defaultChangeType |
| 694 | ): bool { |
| 695 | // If all recorded changes are 0, then nothing has been modified |
| 696 | foreach ( $list as $c ) { |
| 697 | if ( ( is_array( $c ) && $hasChangeMarkers( $c ) ) || |
| 698 | ( !is_array( $c ) && $c !== $defaultChangeType ) |
| 699 | ) { |
| 700 | return true; |
| 701 | } |
| 702 | } |
| 703 | return false; |
| 704 | }; |
| 705 | |
| 706 | $genChangesInternal = static function ( Node $node ) use ( |
| 707 | &$genChangesInternal, &$hasChangeMarkers, |
| 708 | $domSubtreeIsEditable, $nodeIsUneditable, $alea, |
| 709 | $defaultChangeType |
| 710 | ): array { |
| 711 | // Seed the random-number generator based on the item title |
| 712 | $changelist = []; |
| 713 | $children = $node->childNodes ? iterator_to_array( $node->childNodes ) : []; |
| 714 | foreach ( $children as $child ) { |
| 715 | $changeType = $defaultChangeType; |
| 716 | if ( $domSubtreeIsEditable( $child ) ) { |
| 717 | if ( $nodeIsUneditable( $child ) || $alea->random() < 0.5 ) { |
| 718 | // This call to random is a hack to preserve the current |
| 719 | // determined state of our knownFailures entries after a |
| 720 | // refactor. |
| 721 | $alea->uint32(); |
| 722 | $changeType = $genChangesInternal( $child ); |
| 723 | // `$genChangesInternal` returns an array, which can be |
| 724 | // empty. Revert to the `$defaultChangeType` if that's |
| 725 | // the case. |
| 726 | if ( count( $changeType ) === 0 ) { |
| 727 | $changeType = $defaultChangeType; |
| 728 | } |
| 729 | } else { |
| 730 | if ( !( $child instanceof Element ) ) { |
| 731 | // Text or comment node -- valid changes: 2, 3, 4 |
| 732 | // since we cannot set attributes on these |
| 733 | $changeType = floor( $alea->random() * 3 ) + 2; |
| 734 | } else { |
| 735 | $changeType = floor( $alea->random() * 4 ) + 1; |
| 736 | } |
| 737 | } |
| 738 | } |
| 739 | |
| 740 | $changelist[] = $changeType; |
| 741 | |
| 742 | } |
| 743 | |
| 744 | return $hasChangeMarkers( $changelist ) ? $changelist : []; |
| 745 | }; |
| 746 | |
| 747 | $body = DOMCompat::getBody( $doc ); |
| 748 | |
| 749 | $changetree = null; |
| 750 | $numAttempts = 0; |
| 751 | do { |
| 752 | $numAttempts++; |
| 753 | $changetree = $genChangesInternal( $body ); |
| 754 | } while ( |
| 755 | $numAttempts < 1000 && |
| 756 | ( count( $changetree ) === 0 || |
| 757 | $this->isDuplicateChangeTree( $changetree ) ) |
| 758 | ); |
| 759 | |
| 760 | if ( $numAttempts === 1000 ) { |
| 761 | // couldn't generate a change ... marking as such |
| 762 | $this->duplicateChange = true; |
| 763 | } |
| 764 | |
| 765 | return $changetree; |
| 766 | } |
| 767 | |
| 768 | /** |
| 769 | * FIXME: clean up this mess! |
| 770 | * - generate all changes at once (generateChanges should return a tree really) |
| 771 | * rather than going to all these lengths of interleaving change |
| 772 | * generation with tests |
| 773 | * - set up the changes in item directly rather than juggling around with |
| 774 | * indexes etc |
| 775 | * - indicate whether to compare to wt2wt or the original input |
| 776 | * - maybe make a full selser test one method that uses others rather than the |
| 777 | * current chain of methods that sometimes do something for selser |
| 778 | * |
| 779 | * @param array $targetModes |
| 780 | * @param array $runnerOpts |
| 781 | * @param callable $runTest |
| 782 | */ |
| 783 | public function testAllModes( // phpcs:ignore MediaWiki.Commenting.MissingCovers.MissingCovers |
| 784 | array $targetModes, array $runnerOpts, callable $runTest |
| 785 | ): void { |
| 786 | if ( !$this->testName ) { |
| 787 | throw new Error( 'Missing title from test case.' ); |
| 788 | } |
| 789 | $selserNoAuto = ( ( $runnerOpts['selser'] ?? false ) === 'noauto' ); |
| 790 | |
| 791 | foreach ( $targetModes as $targetMode ) { |
| 792 | if ( |
| 793 | $targetMode === 'selser' && |
| 794 | !( $selserNoAuto || isset( $runnerOpts['changetree'] ) ) |
| 795 | ) { |
| 796 | // Run selser tests in the following order: |
| 797 | // 1. Manual changes (if provided) |
| 798 | // 2. changetree 5 (oracle exists for verifying output) |
| 799 | // 3. All other change trees (no oracle exists for verifying output) |
| 800 | |
| 801 | if ( isset( $this->options['parsoid']['changes'] ) ) { |
| 802 | // Mutating the item here is necessary to output 'manual' in |
| 803 | // the test's title and to differentiate it for knownFailures. |
| 804 | $this->changetree = [ 'manual' ]; |
| 805 | $runTest( $this, 'selser', $runnerOpts ); |
| 806 | } |
| 807 | |
| 808 | // Skip the rest if the test doesn't want changetrees |
| 809 | if ( ( $this->options['parsoid']['selser'] ?? '' ) === 'noauto' ) { |
| 810 | continue; |
| 811 | } |
| 812 | |
| 813 | // Changetree 5 (append a comment to the root node) |
| 814 | $this->changetree = [ 5 ]; |
| 815 | $runTest( $this, 'selser', $runnerOpts ); |
| 816 | |
| 817 | // Automatically generated changed trees |
| 818 | $this->selserChangeTrees = []; |
| 819 | for ( $j = 0; $j < $runnerOpts['numchanges']; $j++ ) { |
| 820 | // Set changetree to null to ensure we don't assume [ 5 ] in $runTest |
| 821 | $this->changetree = null; |
| 822 | $this->seed = $j . ''; |
| 823 | $runTest( $this, 'selser', $runnerOpts ); |
| 824 | if ( $this->isDuplicateChangeTree( $this->changetree ) ) { |
| 825 | // Once we get a duplicate change tree, we can no longer |
| 826 | // generate and run new tests. So, be done now! |
| 827 | break; |
| 828 | } else { |
| 829 | $this->selserChangeTrees[$j] = $this->changetree; |
| 830 | } |
| 831 | } |
| 832 | } elseif ( $targetMode === 'selser' && $selserNoAuto ) { |
| 833 | // Manual changes were requested on the command line, |
| 834 | // check that the item does have them. |
| 835 | if ( isset( $this->options['parsoid']['changes'] ) ) { |
| 836 | $this->changetree = [ 'manual' ]; |
| 837 | $runTest( $this, 'selser', $runnerOpts ); |
| 838 | } |
| 839 | continue; |
| 840 | } else { |
| 841 | if ( $targetMode === 'wt2html' && isset( $this->sections['html/parsoid+langconv'] ) ) { |
| 842 | // Since we are clobbering options and parsoidHtml, clone the test object |
| 843 | $testClone = clone $this; |
| 844 | $testClone->options['langconv'] = true; |
| 845 | $testClone->parsoidHtml = $this->sections['html/parsoid+langconv']; |
| 846 | $runTest( $testClone, $targetMode, $runnerOpts ); |
| 847 | if ( $this->parsoidHtml === null ) { |
| 848 | // Don't run the same test in non-langconv mode |
| 849 | // unless we have a non-langconv section |
| 850 | continue; |
| 851 | } |
| 852 | } |
| 853 | |
| 854 | Assert::invariant( |
| 855 | $targetMode !== 'selser' || |
| 856 | ( isset( $runnerOpts['changetree'] ) && !$selserNoAuto ), |
| 857 | "Unexpected target mode $targetMode" ); |
| 858 | |
| 859 | $runTest( $this, $targetMode, $runnerOpts ); |
| 860 | } |
| 861 | } |
| 862 | } |
| 863 | |
| 864 | /** |
| 865 | * Normalize expected and actual HTML to suppress irrelevant differences. |
| 866 | * The normalization is determined by the HTML sections present in the test |
| 867 | * as well as other Parsoid-specific test options. |
| 868 | * |
| 869 | * @param Element|string $actual |
| 870 | * @param ?string $normExpected |
| 871 | * @param bool $standalone |
| 872 | * @return array |
| 873 | */ |
| 874 | public function normalizeHTML( $actual, ?string $normExpected, bool $standalone = true ): array { |
| 875 | $opts = $this->options; |
| 876 | $haveStandaloneHTML = $standalone && isset( $this->sections['html/parsoid+standalone'] ); |
| 877 | $haveIntegratedHTML = !$standalone && isset( $this->sections['html/parsoid+integrated'] ); |
| 878 | $parsoidOnly = isset( $this->sections['html/parsoid'] ) || |
| 879 | $haveStandaloneHTML || |
| 880 | $haveIntegratedHTML || |
| 881 | isset( $this->sections['html/parsoid+langconv'] ) || |
| 882 | ( isset( $opts['parsoid'] ) && !isset( $opts['parsoid']['normalizePhp'] ) ); |
| 883 | $externalLinkTarget = ( $opts['externallinktarget'] ?? false ) || |
| 884 | isset( $this->config['wgExternalLinkTarget'] ) || |
| 885 | isset( $this->config['wgNoFollowLinks'] ) || |
| 886 | isset( $this->config['wgNoFollowDomainExceptions'] ); |
| 887 | $normOpts = [ |
| 888 | 'parsoidOnly' => $parsoidOnly, |
| 889 | 'preserveIEW' => isset( $opts['parsoid']['preserveIEW'] ), |
| 890 | 'externallinktarget' => $externalLinkTarget, |
| 891 | ]; |
| 892 | |
| 893 | if ( $normExpected === null ) { |
| 894 | if ( $haveIntegratedHTML ) { |
| 895 | $parsoidHTML = $this->sections['html/parsoid+integrated']; |
| 896 | } elseif ( $haveStandaloneHTML ) { |
| 897 | $parsoidHTML = $this->sections['html/parsoid+standalone']; |
| 898 | } else { |
| 899 | $parsoidHTML = $this->parsoidHtml; |
| 900 | } |
| 901 | if ( $parsoidOnly ) { |
| 902 | $normExpected = TestUtils::normalizeOut( $parsoidHTML, $normOpts ); |
| 903 | } else { |
| 904 | $normExpected = TestUtils::normalizeHTML( $parsoidHTML ); |
| 905 | } |
| 906 | $this->cachedNormalizedHTML = $normExpected; |
| 907 | } |
| 908 | |
| 909 | return [ TestUtils::normalizeOut( $actual, $normOpts ), $normExpected ]; |
| 910 | } |
| 911 | |
| 912 | /** |
| 913 | * Normalize "known failure" output. |
| 914 | * |
| 915 | * This is an extremely light normalization, since the point of the |
| 916 | * known failure file is to catch changes in output, even if we don't |
| 917 | * know what "correct" is. But we do remove 'about' numbering, since |
| 918 | * that is not guaranteed consistent from run to run. |
| 919 | */ |
| 920 | public function normalizeKnownFailure( string $out ): string { |
| 921 | return TestUtils::normalizeAbout( $out ); |
| 922 | } |
| 923 | |
| 924 | /** |
| 925 | * Normalize expected and actual wikitext to suppress irrelevant differences. |
| 926 | * |
| 927 | * Because of selser as well as manual edit trees, expected wikitext isn't always |
| 928 | * found in the same section for all tests ending in WT (unlike normalizeHTML). |
| 929 | * Hence, |
| 930 | * (a) this code has a different structure than normalizeHTML |
| 931 | * (b) we cannot cache normalized wikitext |
| 932 | * |
| 933 | * @param string $actual |
| 934 | * @param string $expected |
| 935 | * @param bool $standalone |
| 936 | * @return array |
| 937 | */ |
| 938 | public function normalizeWT( string $actual, string $expected, bool $standalone = true ): array { |
| 939 | // No other normalizations at this time |
| 940 | $normalizedActual = rtrim( $actual, "\n" ); |
| 941 | $normalizedExpected = rtrim( $expected, "\n" ); |
| 942 | |
| 943 | return [ $normalizedActual, $normalizedExpected ]; |
| 944 | } |
| 945 | } |