Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 398 |
|
0.00% |
0 / 13 |
CRAP | |
0.00% |
0 / 1 |
| Test | |
0.00% |
0 / 398 |
|
0.00% |
0 / 13 |
19460 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
110 | |||
| __clone | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
| matchesFilter | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
| pageName | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
| computeTestModes | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
| applyManualChanges | |
0.00% |
0 / 113 |
|
0.00% |
0 / 1 |
420 | |||
| applyChanges | |
0.00% |
0 / 93 |
|
0.00% |
0 / 1 |
930 | |||
| isDuplicateChangeTree | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
| generateChanges | |
0.00% |
0 / 63 |
|
0.00% |
0 / 1 |
650 | |||
| testAllModes | |
0.00% |
0 / 38 |
|
0.00% |
0 / 1 |
342 | |||
| normalizeHTML | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
240 | |||
| normalizeKnownFailure | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| normalizeWT | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\ParserTests; |
| 5 | |
| 6 | use Error; |
| 7 | use Psr\Log\LogLevel; |
| 8 | use Wikimedia\Alea\Alea; |
| 9 | use Wikimedia\Assert\Assert; |
| 10 | use Wikimedia\Parsoid\DOM\Document; |
| 11 | use Wikimedia\Parsoid\DOM\Element; |
| 12 | use Wikimedia\Parsoid\DOM\Node; |
| 13 | use Wikimedia\Parsoid\Utils\ContentUtils; |
| 14 | use Wikimedia\Parsoid\Utils\DOMCompat; |
| 15 | use Wikimedia\Parsoid\Utils\DOMUtils; |
| 16 | use Wikimedia\Parsoid\Utils\PHPUtils; |
| 17 | use Wikimedia\Parsoid\Utils\Utils; |
| 18 | use Wikimedia\Parsoid\Utils\WTUtils; |
| 19 | |
| 20 | /** |
| 21 | * Represents a parser test |
| 22 | */ |
| 23 | class Test extends Item { |
| 24 | |
| 25 | // 'testAllModes' and 'TestRunner::runTest' assume that test modes are added |
| 26 | // in this order for caching to work properly (and even when test objects are cloned). |
| 27 | // This ordering is enforced in computeTestModes. |
| 28 | public const ALL_TEST_MODES = [ 'wt2html', 'wt2wt', 'html2html', 'html2wt', 'selser' ]; |
| 29 | |
| 30 | /* --- These are test properties from the test file --- */ |
| 31 | |
| 32 | /** This is the test name, not page title for the test */ |
| 33 | public ?string $testName = null; |
| 34 | |
| 35 | /** @var array<string,string|bool|array> */ |
| 36 | public array $options = []; |
| 37 | |
| 38 | /** @var array<string,string|array> */ |
| 39 | public array $config = []; |
| 40 | |
| 41 | /** @var array<string,string> */ |
| 42 | public array $sections = []; |
| 43 | |
| 44 | /** @var array Known failures for this test, indexed by testing mode. */ |
| 45 | public array $knownFailures = []; |
| 46 | |
| 47 | /* --- These next are computed based on an ordered list of preferred |
| 48 | * section keys --- */ |
| 49 | |
| 50 | public ?string $wikitext = null; |
| 51 | |
| 52 | public ?string $parsoidHtml = null; |
| 53 | |
| 54 | public ?string $legacyHtml = null; |
| 55 | |
| 56 | /* --- The rest below are computed by Parsoid while running tests -- */ |
| 57 | |
| 58 | private ?string $pageName = null; |
| 59 | |
| 60 | private ?int $pageNs = null; |
| 61 | |
| 62 | /** @var list */ |
| 63 | public array $selserChangeTrees = []; |
| 64 | |
| 65 | /** @var ?list */ |
| 66 | public ?array $changetree = null; |
| 67 | |
| 68 | public bool $duplicateChange = false; |
| 69 | |
| 70 | public ?string $seed = null; |
| 71 | |
| 72 | public ?string $resultWT = null; |
| 73 | |
| 74 | public ?bool $wt2wtPassed = null; |
| 75 | |
| 76 | public ?string $wt2wtResult = null; |
| 77 | |
| 78 | public ?string $selser = null; |
| 79 | |
| 80 | public ?string $changedHTMLStr = null; |
| 81 | |
| 82 | public ?string $cachedBODYstr = null; |
| 83 | |
| 84 | public ?string $cachedWTstr = null; |
| 85 | |
| 86 | public ?string $cachedNormalizedHTML = null; |
| 87 | |
| 88 | public array $time = []; |
| 89 | |
| 90 | private const DIRECT_KEYS = [ |
| 91 | 'type', |
| 92 | 'filename', |
| 93 | 'lineNumStart', |
| 94 | 'lineNumEnd', |
| 95 | 'testName', |
| 96 | 'options', |
| 97 | 'config', |
| 98 | ]; |
| 99 | private const WIKITEXT_KEYS = [ |
| 100 | 'wikitext', |
| 101 | # deprecated |
| 102 | 'input', |
| 103 | ]; |
| 104 | private const LEGACY_HTML_KEYS = [ |
| 105 | 'html/php', 'html/*', 'html', |
| 106 | # deprecated |
| 107 | 'result', |
| 108 | 'html/php+tidy', |
| 109 | 'html/*+tidy', |
| 110 | 'html+tidy', |
| 111 | ]; |
| 112 | private const PARSOID_HTML_KEYS = [ |
| 113 | 'html/parsoid', 'html/*', 'html', |
| 114 | # deprecated |
| 115 | 'result', |
| 116 | 'html/*+tidy', |
| 117 | 'html+tidy', |
| 118 | ]; |
| 119 | private const WARN_DEPRECATED_KEYS = [ |
| 120 | 'input', |
| 121 | 'result', |
| 122 | 'html/php+tidy', |
| 123 | 'html/*+tidy', |
| 124 | 'html+tidy', |
| 125 | 'html/php+untidy', |
| 126 | 'html+untidy', |
| 127 | ]; |
| 128 | |
| 129 | /** |
| 130 | * @param array $testProperties key-value mapping of properties |
| 131 | * @param array $knownFailures Known failures for this test, indexed by testing mode |
| 132 | * @param ?string $comment Optional comment describing the test |
| 133 | * @param ?callable $warnFunc Optional callback used to emit |
| 134 | * deprecation warnings. |
| 135 | */ |
| 136 | public function __construct( |
| 137 | array $testProperties, |
| 138 | array $knownFailures = [], |
| 139 | ?string $comment = null, |
| 140 | ?callable $warnFunc = null |
| 141 | ) { |
| 142 | parent::__construct( $testProperties, $comment ); |
| 143 | $this->knownFailures = $knownFailures; |
| 144 | |
| 145 | foreach ( $testProperties as $key => $value ) { |
| 146 | if ( in_array( $key, self::DIRECT_KEYS, true ) ) { |
| 147 | $this->$key = $value; |
| 148 | } else { |
| 149 | if ( isset( $this->sections[$key] ) ) { |
| 150 | $this->error( "Duplicate test section", $key ); |
| 151 | } |
| 152 | $this->sections[$key] = $value; |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | # Priority order for wikitext, legacyHtml, and parsoidHtml properties |
| 157 | $cats = [ |
| 158 | 'wikitext' => self::WIKITEXT_KEYS, |
| 159 | 'legacyHtml' => self::LEGACY_HTML_KEYS, |
| 160 | 'parsoidHtml' => self::PARSOID_HTML_KEYS, |
| 161 | ]; |
| 162 | foreach ( $cats as $prop => $keys ) { |
| 163 | foreach ( $keys as $key ) { |
| 164 | if ( isset( $this->sections[$key] ) ) { |
| 165 | $this->$prop = $this->sections[$key]; |
| 166 | break; |
| 167 | } |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | # Deprecation warnings |
| 172 | if ( $warnFunc ) { |
| 173 | foreach ( self::WARN_DEPRECATED_KEYS as $key ) { |
| 174 | if ( isset( $this->sections[$key] ) ) { |
| 175 | $warnFunc( $this->errorMsg( |
| 176 | "Parser test section $key is deprecated" |
| 177 | ) ); |
| 178 | } |
| 179 | } |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | public function __clone() { |
| 184 | // Properties that need deep cloning |
| 185 | foreach ( [ 'options', 'config', 'sections', |
| 186 | 'knownFailures', 'selserChangeTrees', 'time' ] as $f ) { |
| 187 | $this->$f = Utils::cloneArray( $this->$f ); |
| 188 | } |
| 189 | if ( $this->changetree !== null ) { |
| 190 | $this->changetree = Utils::cloneArray( $this->changetree ); |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | /** |
| 195 | * @param array $testFilter Test Filter as set in TestRunner |
| 196 | * @return bool if test matches the filter |
| 197 | */ |
| 198 | public function matchesFilter( $testFilter ): bool { |
| 199 | if ( !$testFilter ) { |
| 200 | return true; // Trivial match |
| 201 | } |
| 202 | |
| 203 | if ( !empty( $testFilter['regex'] ) ) { |
| 204 | $regex = isset( $testFilter['raw'] ) ? |
| 205 | ( '/' . $testFilter['raw'] . '/' ) : |
| 206 | $testFilter['regex']; |
| 207 | return (bool)preg_match( $regex, $this->testName ); |
| 208 | } |
| 209 | |
| 210 | if ( !empty( $testFilter['string'] ) ) { |
| 211 | return str_contains( $this->testName, $testFilter['raw'] ); |
| 212 | } |
| 213 | |
| 214 | return true; // Trivial match because of a bad test filter |
| 215 | } |
| 216 | |
| 217 | public function pageName(): string { |
| 218 | if ( !$this->pageName ) { |
| 219 | $this->pageName = $this->options['title'] ?? 'Parser test'; |
| 220 | if ( is_array( $this->pageName ) ) { |
| 221 | $this->pageName = 'Parser test'; |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | return $this->pageName; |
| 226 | } |
| 227 | |
| 228 | /** |
| 229 | * Given a test runner that runs in a specific set of test modes ($testRunnerModes) |
| 230 | * compute the list of valid test modes based on what modes have been enabled on the |
| 231 | * test itself. |
| 232 | * |
| 233 | * @param array $testRunnerModes What test modes is the test runner running with? |
| 234 | * @return array |
| 235 | */ |
| 236 | public function computeTestModes( array $testRunnerModes ): array { |
| 237 | // Ensure we compute valid modes in the order specificed in ALL_TEST_MODES since |
| 238 | // caching in the presence of test cloning rely on tests running in this order. |
| 239 | $validModes = array_intersect( self::ALL_TEST_MODES, $testRunnerModes ); |
| 240 | |
| 241 | // Filter for modes the test has opted in for |
| 242 | $testModes = $this->options['parsoid']['modes'] ?? null; |
| 243 | if ( $testModes ) { |
| 244 | $selserEnabled = in_array( 'selser', $testRunnerModes, true ); |
| 245 | // Avoid filtering out the selser test |
| 246 | if ( $selserEnabled && |
| 247 | !in_array( 'selser', $testModes, true ) && |
| 248 | in_array( 'wt2wt', $testModes, true ) |
| 249 | ) { |
| 250 | $testModes[] = 'selser'; |
| 251 | } |
| 252 | |
| 253 | $validModes = array_intersect( $validModes, $testModes ); |
| 254 | } |
| 255 | |
| 256 | return $validModes; |
| 257 | } |
| 258 | |
| 259 | // Random string used as selser comment content |
| 260 | public const STATIC_RANDOM_STRING = 'ahseeyooxooZ8Oon0boh'; |
| 261 | |
| 262 | /** |
| 263 | * Apply manually-specified changes, which are provided in a pseudo-jQuery |
| 264 | * format. |
| 265 | */ |
| 266 | public function applyManualChanges( Document $doc ): void { |
| 267 | $changes = $this->options['parsoid']['changes']; |
| 268 | $err = null; |
| 269 | // changes are specified using jquery methods. |
| 270 | // [x,y,z...] becomes $(x)[y](z....) |
| 271 | // that is, ['fig', 'attr', 'width', '120'] is interpreted as |
| 272 | // $('fig').attr('width', '120') |
| 273 | // See http://api.jquery.com/ for documentation of these methods. |
| 274 | // "contents" as second argument calls the jquery .contents() method |
| 275 | // on the results of the selector in the first argument, which is |
| 276 | // a good way to get at the text and comment nodes |
| 277 | $jquery = [ |
| 278 | 'after' => static function ( Node $node, string $html ): void { |
| 279 | if ( DOMUtils::nodeName( $node->parentNode ) === 'tbody' ) { |
| 280 | $tbl = $node->ownerDocument->createElement( 'table' ); |
| 281 | DOMCompat::setInnerHTML( $tbl, $html ); |
| 282 | // <tbody> is implicitly added when inner html is set to <tr>..</tr> |
| 283 | DOMUtils::migrateChildren( $tbl->firstChild, $node->parentNode, $node->nextSibling ); |
| 284 | } elseif ( DOMUtils::nodeName( $node->parentNode ) === 'tr' ) { |
| 285 | $tbl = $node->ownerDocument->createElement( 'table' ); |
| 286 | DOMCompat::setInnerHTML( $tbl, '<tbody><tr></tr></tbody>' ); |
| 287 | $tr = $tbl->firstChild->firstChild; |
| 288 | '@phan-var Element $tr'; // @var Element $tr |
| 289 | DOMCompat::setInnerHTML( $tr, $html ); |
| 290 | DOMUtils::migrateChildren( $tbl->firstChild->firstChild, |
| 291 | $node->parentNode, $node->nextSibling ); |
| 292 | } else { |
| 293 | $div = $node->ownerDocument->createElement( 'div' ); |
| 294 | DOMCompat::setInnerHTML( $div, $html ); |
| 295 | DOMUtils::migrateChildren( $div, $node->parentNode, $node->nextSibling ); |
| 296 | } |
| 297 | }, |
| 298 | 'append' => static function ( Node $node, string $html ): void { |
| 299 | if ( DOMUtils::nodeName( $node ) === 'tr' ) { |
| 300 | $tbl = $node->ownerDocument->createElement( 'table' ); |
| 301 | DOMCompat::setInnerHTML( $tbl, $html ); |
| 302 | // <tbody> is implicitly added when inner html is set to <tr>..</tr> |
| 303 | DOMUtils::migrateChildren( $tbl->firstChild, $node ); |
| 304 | } else { |
| 305 | $div = $node->ownerDocument->createElement( 'div' ); |
| 306 | DOMCompat::setInnerHTML( $div, $html ); |
| 307 | DOMUtils::migrateChildren( $div, $node ); |
| 308 | } |
| 309 | }, |
| 310 | 'attr' => static function ( Node $node, string $name, string $val ): void { |
| 311 | '@phan-var Element $node'; // @var Element $node |
| 312 | $node->setAttribute( $name, $val ); |
| 313 | }, |
| 314 | 'before' => static function ( Node $node, string $html ): void { |
| 315 | if ( DOMUtils::nodeName( $node->parentNode ) === 'tbody' ) { |
| 316 | $tbl = $node->ownerDocument->createElement( 'table' ); |
| 317 | DOMCompat::setInnerHTML( $tbl, $html ); |
| 318 | // <tbody> is implicitly added when inner html is set to <tr>..</tr> |
| 319 | DOMUtils::migrateChildren( $tbl->firstChild, $node->parentNode, $node ); |
| 320 | } elseif ( DOMUtils::nodeName( $node->parentNode ) === 'tr' ) { |
| 321 | $tbl = $node->ownerDocument->createElement( 'table' ); |
| 322 | DOMCompat::setInnerHTML( $tbl, '<tbody><tr></tr></tbody>' ); |
| 323 | $tr = $tbl->firstChild->firstChild; |
| 324 | '@phan-var Element $tr'; // @var Element $tr |
| 325 | DOMCompat::setInnerHTML( $tr, $html ); |
| 326 | DOMUtils::migrateChildren( $tbl->firstChild->firstChild, $node->parentNode, $node ); |
| 327 | } else { |
| 328 | $div = $node->ownerDocument->createElement( 'div' ); |
| 329 | DOMCompat::setInnerHTML( $div, $html ); |
| 330 | DOMUtils::migrateChildren( $div, $node->parentNode, $node ); |
| 331 | } |
| 332 | }, |
| 333 | 'removeAttr' => static function ( Node $node, string $name ): void { |
| 334 | '@phan-var Element $node'; // @var Element $node |
| 335 | $node->removeAttribute( $name ); |
| 336 | }, |
| 337 | 'removeClass' => static function ( Node $node, string $c ): void { |
| 338 | '@phan-var Element $node'; // @var Element $node |
| 339 | DOMCompat::getClassList( $node )->remove( $c ); |
| 340 | }, |
| 341 | 'addClass' => static function ( Node $node, string $c ): void { |
| 342 | '@phan-var Element $node'; // @var Element $node |
| 343 | DOMCompat::getClassList( $node )->add( $c ); |
| 344 | }, |
| 345 | 'text' => static function ( Node $node, string $t ): void { |
| 346 | $node->textContent = $t; |
| 347 | }, |
| 348 | 'html' => static function ( Node $node, string $h ): void { |
| 349 | '@phan-var Element $node'; // @var Element $node |
| 350 | DOMCompat::setInnerHTML( $node, $h ); |
| 351 | }, |
| 352 | 'remove' => static function ( Node $node, ?string $optSelector = null ): void { |
| 353 | // jquery lets us specify an optional selector to further |
| 354 | // restrict the removed elements. |
| 355 | // text nodes don't have the "querySelectorAll" method, so |
| 356 | // just include them by default (jquery excludes them, which |
| 357 | // is less useful) |
| 358 | if ( !$optSelector ) { |
| 359 | $what = [ $node ]; |
| 360 | } elseif ( !( $node instanceof Element ) ) { |
| 361 | $what = [ $node ];/* text node hack! */ |
| 362 | } else { |
| 363 | '@phan-var Element $node'; // @var Element $node |
| 364 | $what = DOMCompat::querySelectorAll( $node, $optSelector ); |
| 365 | } |
| 366 | foreach ( $what as $node ) { |
| 367 | if ( $node->parentNode ) { |
| 368 | $node->parentNode->removeChild( $node ); |
| 369 | } |
| 370 | } |
| 371 | }, |
| 372 | 'empty' => static function ( Node $node ): void { |
| 373 | '@phan-var Element $node'; // @var Element $node |
| 374 | DOMCompat::replaceChildren( $node ); |
| 375 | }, |
| 376 | 'wrap' => static function ( Node $node, string $w ): void { |
| 377 | $frag = $node->ownerDocument->createElement( 'div' ); |
| 378 | DOMCompat::setInnerHTML( $frag, $w ); |
| 379 | $first = $frag->firstChild; |
| 380 | $node->parentNode->replaceChild( $first, $node ); |
| 381 | while ( $first->firstChild ) { |
| 382 | $first = $first->firstChild; |
| 383 | } |
| 384 | $first->appendChild( $node ); |
| 385 | } |
| 386 | ]; |
| 387 | |
| 388 | $body = DOMCompat::getBody( $doc ); |
| 389 | |
| 390 | foreach ( $changes as $change ) { |
| 391 | if ( $err ) { |
| 392 | continue; |
| 393 | } |
| 394 | if ( count( $change ) < 2 ) { |
| 395 | $err = new Error( 'bad change: ' . $change ); |
| 396 | continue; |
| 397 | } |
| 398 | // use document.querySelectorAll as a poor man's $(...) |
| 399 | $els = PHPUtils::iterable_to_array( |
| 400 | DOMCompat::querySelectorAll( $body, $change[0] ) |
| 401 | ); |
| 402 | if ( !count( $els ) ) { |
| 403 | $err = new Error( $change[0] . |
| 404 | ' did not match any elements: ' . DOMCompat::getOuterHTML( $body ) ); |
| 405 | continue; |
| 406 | } |
| 407 | if ( $change[1] === 'contents' ) { |
| 408 | $change = array_slice( $change, 1 ); |
| 409 | $acc = []; |
| 410 | foreach ( $els as $el ) { |
| 411 | PHPUtils::pushArray( $acc, DOMUtils::childNodes( $el ) ); |
| 412 | } |
| 413 | $els = $acc; |
| 414 | } |
| 415 | $fn = $jquery[$change[1]] ?? null; |
| 416 | if ( !$fn ) { |
| 417 | $err = new Error( 'bad mutator function: ' . $change[1] ); |
| 418 | continue; |
| 419 | } |
| 420 | foreach ( $els as $el ) { |
| 421 | $fn( $el, ...array_slice( $change, 2 ) ); |
| 422 | } |
| 423 | } |
| 424 | |
| 425 | if ( $err ) { |
| 426 | print TestUtils::colorString( (string)$err, "red" ) . "\n"; |
| 427 | throw $err; |
| 428 | } |
| 429 | } |
| 430 | |
| 431 | /** |
| 432 | * Make changes to a DOM in order to run a selser test on it. |
| 433 | */ |
| 434 | public function applyChanges( array $dumpOpts, Document $doc, array $changelist ): void { |
| 435 | $logger = $dumpOpts['logger'] ?? null; |
| 436 | // Seed the random-number generator based on the item title and changelist |
| 437 | $alea = new Alea( ( json_encode( $changelist ) ) . ( $this->testName ?? '' ) ); |
| 438 | |
| 439 | // Keep the changes in the test object |
| 440 | // to check for duplicates while building tasks |
| 441 | $this->changetree = $changelist; |
| 442 | |
| 443 | // Helper function for getting a random string |
| 444 | $randomString = static function () use ( &$alea ): string { |
| 445 | return base_convert( (string)$alea->uint32(), 10, 36 ); |
| 446 | }; |
| 447 | |
| 448 | $insertNewNode = static function ( Node $n ) use ( $randomString ): void { |
| 449 | // Insert a text node, if not in a fosterable position. |
| 450 | // If in foster position, enter a comment. |
| 451 | // In either case, dom-diff should register a new node |
| 452 | $str = $randomString(); |
| 453 | $ownerDoc = $n->ownerDocument; |
| 454 | $wrapperName = null; |
| 455 | |
| 456 | // Don't separate legacy IDs from their H? node. |
| 457 | if ( WTUtils::isFallbackIdSpan( $n ) ) { |
| 458 | $n = $n->nextSibling ?? $n->parentNode; |
| 459 | } |
| 460 | |
| 461 | // For these container nodes, it would be buggy |
| 462 | // to insert text nodes as children |
| 463 | switch ( DOMUtils::nodeName( $n->parentNode ) ) { |
| 464 | case 'ol': |
| 465 | case 'ul': |
| 466 | $wrapperName = 'li'; |
| 467 | break; |
| 468 | case 'dl': |
| 469 | $wrapperName = 'dd'; |
| 470 | break; |
| 471 | case 'tr': |
| 472 | $prev = DOMCompat::getPreviousElementSibling( $n ); |
| 473 | if ( $prev ) { |
| 474 | // TH or TD |
| 475 | $wrapperName = DOMUtils::nodeName( $prev ); |
| 476 | } else { |
| 477 | $next = DOMCompat::getNextElementSibling( $n ); |
| 478 | if ( $next ) { |
| 479 | // TH or TD |
| 480 | $wrapperName = DOMUtils::nodeName( $next ); |
| 481 | } else { |
| 482 | $wrapperName = 'td'; |
| 483 | } |
| 484 | } |
| 485 | break; |
| 486 | case 'body': |
| 487 | $wrapperName = 'p'; |
| 488 | break; |
| 489 | default: |
| 490 | // We're trying to determine if it is safe to place a |
| 491 | // bare text node. If $n is a block, wrap the text node |
| 492 | // we're putting beside it. |
| 493 | if ( DOMUtils::isWikitextBlockNode( $n ) ) { |
| 494 | $wrapperName = 'p'; |
| 495 | } |
| 496 | break; |
| 497 | } |
| 498 | |
| 499 | if ( DOMUtils::isFosterablePosition( $n ) && DOMUtils::nodeName( $n->parentNode ) !== 'tr' ) { |
| 500 | $newNode = $ownerDoc->createComment( $str ); |
| 501 | } elseif ( $wrapperName ) { |
| 502 | $newNode = $ownerDoc->createElement( $wrapperName ); |
| 503 | $newNode->appendChild( $ownerDoc->createTextNode( $str ) ); |
| 504 | } else { |
| 505 | $newNode = $ownerDoc->createTextNode( $str ); |
| 506 | } |
| 507 | |
| 508 | $n->parentNode->insertBefore( $newNode, $n ); |
| 509 | }; |
| 510 | |
| 511 | $removeNode = static function ( Node $n ): void { |
| 512 | $n->parentNode->removeChild( $n ); |
| 513 | }; |
| 514 | |
| 515 | $applyChangesInternal = static function ( Node $node, array $changes ) use ( |
| 516 | &$applyChangesInternal, $removeNode, $insertNewNode, |
| 517 | $randomString, $logger |
| 518 | ): void { |
| 519 | $nodeArray = DOMUtils::childNodes( $node ); |
| 520 | if ( count( $nodeArray ) < count( $changes ) ) { |
| 521 | throw new Error( "Error: more changes than nodes to apply them to!" ); |
| 522 | } |
| 523 | |
| 524 | foreach ( $changes as $i => $change ) { |
| 525 | $child = $nodeArray[$i]; |
| 526 | |
| 527 | if ( is_array( $change ) ) { |
| 528 | $applyChangesInternal( $child, $change ); |
| 529 | } else { |
| 530 | switch ( $change ) { |
| 531 | // No change |
| 532 | case 0: |
| 533 | break; |
| 534 | |
| 535 | // Change node wrapper |
| 536 | // (sufficient to insert a random attr) |
| 537 | case 1: |
| 538 | if ( $child instanceof Element ) { |
| 539 | $child->setAttribute( 'data-foobar', $randomString() ); |
| 540 | } elseif ( $logger ) { |
| 541 | $logger->log( |
| 542 | LogLevel::ERROR, |
| 543 | 'Buggy changetree. changetype 1 (modify attribute)' . |
| 544 | ' cannot be applied on text/comment nodes.' |
| 545 | ); |
| 546 | } |
| 547 | break; |
| 548 | |
| 549 | // Insert new node before child |
| 550 | case 2: |
| 551 | $insertNewNode( $child ); |
| 552 | break; |
| 553 | |
| 554 | // Delete tree rooted at child |
| 555 | case 3: |
| 556 | $removeNode( $child ); |
| 557 | break; |
| 558 | |
| 559 | // Change tree rooted at child |
| 560 | case 4: |
| 561 | $insertNewNode( $child ); |
| 562 | $removeNode( $child ); |
| 563 | break; |
| 564 | } |
| 565 | |
| 566 | } |
| 567 | } |
| 568 | }; |
| 569 | |
| 570 | $body = DOMCompat::getBody( $doc ); |
| 571 | |
| 572 | if ( $logger && ( $dumpOpts['dom:post-changes'] ?? false ) ) { |
| 573 | $logger->log( LogLevel::ERROR, "----- Original DOM -----" ); |
| 574 | $logger->log( LogLevel::ERROR, ContentUtils::dumpDOM( $body, '', [ 'quiet' => true ] ) ); |
| 575 | } |
| 576 | |
| 577 | if ( $this->changetree === [ 5 ] ) { |
| 578 | // Hack so that we can work on the parent node rather than just the |
| 579 | // children: Append a comment with known content. This is later |
| 580 | // stripped from the output, and the result is compared to the |
| 581 | // original wikitext rather than the non-selser wt2wt result. |
| 582 | $body->appendChild( $doc->createComment( self::STATIC_RANDOM_STRING ) ); |
| 583 | } elseif ( $this->changetree !== [] ) { |
| 584 | $applyChangesInternal( $body, $this->changetree ); |
| 585 | } |
| 586 | |
| 587 | if ( $logger && ( $dumpOpts['dom:post-changes'] ?? false ) ) { |
| 588 | $logger->log( LogLevel::ERROR, "----- Change Tree -----" ); |
| 589 | $logger->log( LogLevel::ERROR, json_encode( $this->changetree ) ); |
| 590 | $logger->log( LogLevel::ERROR, "----- Edited DOM -----" ); |
| 591 | $logger->log( LogLevel::ERROR, ContentUtils::dumpDOM( $body, '', [ 'quiet' => true ] ) ); |
| 592 | } |
| 593 | } |
| 594 | |
| 595 | /** |
| 596 | * For a selser test, check if a change we could make has already been |
| 597 | * tested in this round. |
| 598 | * Used for generating unique tests. |
| 599 | * |
| 600 | * @param array $change Candidate change. |
| 601 | * @return bool |
| 602 | */ |
| 603 | public function isDuplicateChangeTree( array $change ): bool { |
| 604 | $allChanges = $this->selserChangeTrees; |
| 605 | foreach ( $allChanges as $c ) { |
| 606 | if ( $c == $change ) { |
| 607 | return true; |
| 608 | } |
| 609 | } |
| 610 | return false; |
| 611 | } |
| 612 | |
| 613 | /** |
| 614 | * Generate a change object for a document, so we can apply it during a selser test. |
| 615 | * |
| 616 | * @param Document $doc |
| 617 | * @return array The list of changes. |
| 618 | */ |
| 619 | public function generateChanges( Document $doc ): array { |
| 620 | $alea = new Alea( ( $this->seed ?? '' ) . ( $this->testName ?? '' ) ); |
| 621 | |
| 622 | /** |
| 623 | * If no node in the DOM subtree rooted at 'node' is editable in the VE, |
| 624 | * this function should return false. |
| 625 | * |
| 626 | * Currently true for template and extension content, and for entities. |
| 627 | */ |
| 628 | $domSubtreeIsEditable = static function ( Node $node ): bool { |
| 629 | return !( $node instanceof Element ) || |
| 630 | ( !WTUtils::isEncapsulationWrapper( $node ) && |
| 631 | // These wrappers can only be edited in restricted ways. |
| 632 | // Simpler to just block all editing on them. |
| 633 | !DOMUtils::matchTypeOf( $node, |
| 634 | '#^mw:(Entity|Placeholder|DisplaySpace|Annotation|ExtendedAnnRange)(/|$)#' |
| 635 | ) && |
| 636 | // Deleting these wrappers is tantamount to removing the |
| 637 | // references-tag encapsulation wrappers, which results in errors. |
| 638 | !DOMUtils::hasClass( $node, 'mw-references-wrap' ) |
| 639 | ); |
| 640 | }; |
| 641 | |
| 642 | /** |
| 643 | * Even if a DOM subtree might be editable in the VE, |
| 644 | * certain nodes in the DOM might not be directly editable. |
| 645 | * |
| 646 | * Currently, this restriction is only applied to DOMs generated for images. |
| 647 | * Possibly, there are other candidates. |
| 648 | */ |
| 649 | $nodeIsUneditable = static function ( Node $node ) use ( &$nodeIsUneditable ): bool { |
| 650 | // Text and comment nodes are always editable |
| 651 | if ( !( $node instanceof Element ) ) { |
| 652 | return false; |
| 653 | } |
| 654 | |
| 655 | if ( WTUtils::isMarkerAnnotation( $node ) ) { |
| 656 | return true; |
| 657 | } |
| 658 | |
| 659 | // - File wrapper is an uneditable elt. |
| 660 | // - Any node nested in a file wrapper that is not a figcaption |
| 661 | // is an uneditable elt. |
| 662 | // - Entity spans are uneditable as well |
| 663 | // - Placeholder is defined to be uneditable in the spec |
| 664 | // - ExtendedAnnRange is an "unknown" type in the spec, and hence uneditable |
| 665 | return DOMUtils::matchTypeOf( $node, |
| 666 | '#^mw:(File|Entity|Placeholder|DisplaySpace|ExtendedAnnRange)(/|$)#' ) || ( |
| 667 | DOMUtils::nodeName( $node ) !== 'figcaption' && |
| 668 | $node->parentNode && |
| 669 | DOMUtils::nodeName( $node->parentNode ) !== 'body' && |
| 670 | $nodeIsUneditable( $node->parentNode ) |
| 671 | ); |
| 672 | }; |
| 673 | |
| 674 | $defaultChangeType = 0; |
| 675 | |
| 676 | $hasChangeMarkers = static function ( array $list ) use ( |
| 677 | &$hasChangeMarkers, $defaultChangeType |
| 678 | ): bool { |
| 679 | // If all recorded changes are 0, then nothing has been modified |
| 680 | foreach ( $list as $c ) { |
| 681 | if ( ( is_array( $c ) && $hasChangeMarkers( $c ) ) || |
| 682 | ( !is_array( $c ) && $c !== $defaultChangeType ) |
| 683 | ) { |
| 684 | return true; |
| 685 | } |
| 686 | } |
| 687 | return false; |
| 688 | }; |
| 689 | |
| 690 | $genChangesInternal = static function ( Node $node ) use ( |
| 691 | &$genChangesInternal, &$hasChangeMarkers, |
| 692 | $domSubtreeIsEditable, $nodeIsUneditable, $alea, |
| 693 | $defaultChangeType |
| 694 | ): array { |
| 695 | // Seed the random-number generator based on the item title |
| 696 | $changelist = []; |
| 697 | foreach ( DOMUtils::childNodes( $node ) as $child ) { |
| 698 | $changeType = $defaultChangeType; |
| 699 | if ( $domSubtreeIsEditable( $child ) ) { |
| 700 | if ( $nodeIsUneditable( $child ) || $alea->random() < 0.5 ) { |
| 701 | // This call to random is a hack to preserve the current |
| 702 | // determined state of our knownFailures entries after a |
| 703 | // refactor. |
| 704 | $alea->uint32(); |
| 705 | $changeType = $genChangesInternal( $child ); |
| 706 | // `$genChangesInternal` returns an array, which can be |
| 707 | // empty. Revert to the `$defaultChangeType` if that's |
| 708 | // the case. |
| 709 | if ( count( $changeType ) === 0 ) { |
| 710 | $changeType = $defaultChangeType; |
| 711 | } |
| 712 | } else { |
| 713 | if ( !( $child instanceof Element ) ) { |
| 714 | // Text or comment node -- valid changes: 2, 3, 4 |
| 715 | // since we cannot set attributes on these |
| 716 | $changeType = floor( $alea->random() * 3 ) + 2; |
| 717 | } else { |
| 718 | $changeType = floor( $alea->random() * 4 ) + 1; |
| 719 | } |
| 720 | } |
| 721 | } |
| 722 | |
| 723 | $changelist[] = $changeType; |
| 724 | |
| 725 | } |
| 726 | |
| 727 | return $hasChangeMarkers( $changelist ) ? $changelist : []; |
| 728 | }; |
| 729 | |
| 730 | $body = DOMCompat::getBody( $doc ); |
| 731 | |
| 732 | $numAttempts = 0; |
| 733 | do { |
| 734 | $numAttempts++; |
| 735 | $changetree = $genChangesInternal( $body ); |
| 736 | } while ( |
| 737 | $numAttempts < 1000 && |
| 738 | ( count( $changetree ) === 0 || |
| 739 | $this->isDuplicateChangeTree( $changetree ) ) |
| 740 | ); |
| 741 | |
| 742 | if ( $numAttempts === 1000 ) { |
| 743 | // couldn't generate a change ... marking as such |
| 744 | $this->duplicateChange = true; |
| 745 | } |
| 746 | |
| 747 | return $changetree; |
| 748 | } |
| 749 | |
| 750 | /** |
| 751 | * FIXME: clean up this mess! |
| 752 | * - generate all changes at once (generateChanges should return a tree really) |
| 753 | * rather than going to all these lengths of interleaving change |
| 754 | * generation with tests |
| 755 | * - set up the changes in item directly rather than juggling around with |
| 756 | * indexes etc |
| 757 | * - indicate whether to compare to wt2wt or the original input |
| 758 | * - maybe make a full selser test one method that uses others rather than the |
| 759 | * current chain of methods that sometimes do something for selser |
| 760 | * |
| 761 | * @param array $targetModes |
| 762 | * @param array $runnerOpts |
| 763 | * @param callable $runTest |
| 764 | */ |
| 765 | public function testAllModes( // phpcs:ignore MediaWiki.Commenting.MissingCovers.MissingCovers |
| 766 | array $targetModes, array $runnerOpts, callable $runTest |
| 767 | ): void { |
| 768 | if ( !$this->testName ) { |
| 769 | throw new Error( 'Missing title from test case.' ); |
| 770 | } |
| 771 | $selserNoAuto = ( ( $runnerOpts['selser'] ?? false ) === 'noauto' ); |
| 772 | |
| 773 | foreach ( $targetModes as $targetMode ) { |
| 774 | if ( |
| 775 | $targetMode === 'selser' && |
| 776 | !( $selserNoAuto || isset( $runnerOpts['changetree'] ) ) |
| 777 | ) { |
| 778 | // Run selser tests in the following order: |
| 779 | // 1. Manual changes (if provided) |
| 780 | // 2. changetree 5 (oracle exists for verifying output) |
| 781 | // 3. All other change trees (no oracle exists for verifying output) |
| 782 | |
| 783 | if ( isset( $this->options['parsoid']['changes'] ) ) { |
| 784 | // Mutating the item here is necessary to output 'manual' in |
| 785 | // the test's title and to differentiate it for knownFailures. |
| 786 | $this->changetree = [ 'manual' ]; |
| 787 | $runTest( $this, 'selser', $runnerOpts ); |
| 788 | } |
| 789 | |
| 790 | // Skip the rest if the test doesn't want changetrees |
| 791 | if ( ( $this->options['parsoid']['selser'] ?? '' ) === 'noauto' ) { |
| 792 | continue; |
| 793 | } |
| 794 | |
| 795 | // Changetree 5 (append a comment to the root node) |
| 796 | $this->changetree = [ 5 ]; |
| 797 | $runTest( $this, 'selser', $runnerOpts ); |
| 798 | |
| 799 | // Automatically generated changed trees |
| 800 | $this->selserChangeTrees = []; |
| 801 | for ( $j = 0; $j < $runnerOpts['numchanges']; $j++ ) { |
| 802 | // Set changetree to null to ensure we don't assume [ 5 ] in $runTest |
| 803 | $this->changetree = null; |
| 804 | $this->seed = $j . ''; |
| 805 | $runTest( $this, 'selser', $runnerOpts ); |
| 806 | if ( $this->isDuplicateChangeTree( $this->changetree ) ) { |
| 807 | // Once we get a duplicate change tree, we can no longer |
| 808 | // generate and run new tests. So, be done now! |
| 809 | break; |
| 810 | } else { |
| 811 | $this->selserChangeTrees[$j] = $this->changetree; |
| 812 | } |
| 813 | } |
| 814 | } elseif ( $targetMode === 'selser' && $selserNoAuto ) { |
| 815 | // Manual changes were requested on the command line, |
| 816 | // check that the item does have them. |
| 817 | if ( isset( $this->options['parsoid']['changes'] ) ) { |
| 818 | $this->changetree = [ 'manual' ]; |
| 819 | $runTest( $this, 'selser', $runnerOpts ); |
| 820 | } |
| 821 | continue; |
| 822 | } else { |
| 823 | if ( $targetMode === 'wt2html' && isset( $this->sections['html/parsoid+langconv'] ) ) { |
| 824 | // Since we are clobbering options and parsoidHtml, clone the test object |
| 825 | $testClone = clone $this; |
| 826 | $testClone->options['langconv'] = true; |
| 827 | $testClone->parsoidHtml = $this->sections['html/parsoid+langconv']; |
| 828 | $runTest( $testClone, $targetMode, $runnerOpts ); |
| 829 | if ( $this->parsoidHtml === null ) { |
| 830 | // Don't run the same test in non-langconv mode |
| 831 | // unless we have a non-langconv section |
| 832 | continue; |
| 833 | } |
| 834 | } |
| 835 | |
| 836 | Assert::invariant( |
| 837 | $targetMode !== 'selser' || |
| 838 | ( isset( $runnerOpts['changetree'] ) && !$selserNoAuto ), |
| 839 | "Unexpected target mode $targetMode" ); |
| 840 | |
| 841 | $runTest( $this, $targetMode, $runnerOpts ); |
| 842 | } |
| 843 | } |
| 844 | } |
| 845 | |
| 846 | /** |
| 847 | * Normalize expected and actual HTML to suppress irrelevant differences. |
| 848 | * The normalization is determined by the HTML sections present in the test |
| 849 | * as well as other Parsoid-specific test options. |
| 850 | * |
| 851 | * @param Element|string $actual |
| 852 | * @param ?string $normExpected |
| 853 | * @param bool $standalone |
| 854 | * |
| 855 | * @return list{string, string} |
| 856 | */ |
| 857 | public function normalizeHTML( $actual, ?string $normExpected, bool $standalone = true ): array { |
| 858 | $opts = $this->options; |
| 859 | $haveStandaloneHTML = $standalone && isset( $this->sections['html/parsoid+standalone'] ); |
| 860 | $haveIntegratedHTML = !$standalone && isset( $this->sections['html/parsoid+integrated'] ); |
| 861 | $parsoidOnly = isset( $this->sections['html/parsoid'] ) || |
| 862 | $haveStandaloneHTML || |
| 863 | $haveIntegratedHTML || |
| 864 | isset( $this->sections['html/parsoid+langconv'] ) || |
| 865 | ( isset( $opts['parsoid'] ) && !isset( $opts['parsoid']['normalizePhp'] ) ); |
| 866 | $externalLinkTarget = ( $opts['externallinktarget'] ?? false ) || |
| 867 | isset( $this->config['wgExternalLinkTarget'] ) || |
| 868 | isset( $this->config['wgNoFollowLinks'] ) || |
| 869 | isset( $this->config['wgNoFollowDomainExceptions'] ); |
| 870 | $normOpts = [ |
| 871 | 'parsoidOnly' => $parsoidOnly, |
| 872 | 'preserveIEW' => isset( $opts['parsoid']['preserveIEW'] ), |
| 873 | 'externallinktarget' => $externalLinkTarget, |
| 874 | ]; |
| 875 | |
| 876 | if ( $normExpected === null ) { |
| 877 | if ( $haveIntegratedHTML ) { |
| 878 | $parsoidHTML = $this->sections['html/parsoid+integrated']; |
| 879 | } elseif ( $haveStandaloneHTML ) { |
| 880 | $parsoidHTML = $this->sections['html/parsoid+standalone']; |
| 881 | } else { |
| 882 | $parsoidHTML = $this->parsoidHtml; |
| 883 | } |
| 884 | if ( $parsoidOnly ) { |
| 885 | $normExpected = TestUtils::normalizeOut( $parsoidHTML, $normOpts ); |
| 886 | } else { |
| 887 | $normExpected = TestUtils::normalizeHTML( $parsoidHTML ); |
| 888 | } |
| 889 | $this->cachedNormalizedHTML = $normExpected; |
| 890 | } |
| 891 | |
| 892 | return [ TestUtils::normalizeOut( $actual, $normOpts ), $normExpected ]; |
| 893 | } |
| 894 | |
| 895 | /** |
| 896 | * Normalize "known failure" output. |
| 897 | * |
| 898 | * This is an extremely light normalization, since the point of the |
| 899 | * known failure file is to catch changes in output, even if we don't |
| 900 | * know what "correct" is. But we do remove 'about' numbering, since |
| 901 | * that is not guaranteed consistent from run to run. |
| 902 | */ |
| 903 | public function normalizeKnownFailure( string $out ): string { |
| 904 | return TestUtils::normalizeAbout( $out ); |
| 905 | } |
| 906 | |
| 907 | /** |
| 908 | * Normalize expected and actual wikitext to suppress irrelevant differences. |
| 909 | * |
| 910 | * Because of selser as well as manual edit trees, expected wikitext isn't always |
| 911 | * found in the same section for all tests ending in WT (unlike normalizeHTML). |
| 912 | * Hence, |
| 913 | * (a) this code has a different structure than normalizeHTML |
| 914 | * (b) we cannot cache normalized wikitext |
| 915 | * |
| 916 | * @param string $actual |
| 917 | * @param string $expected |
| 918 | * @param bool $standalone |
| 919 | * |
| 920 | * @return list{string, string} |
| 921 | */ |
| 922 | public function normalizeWT( string $actual, string $expected, bool $standalone = true ): array { |
| 923 | // No other normalizations at this time |
| 924 | $normalizedActual = rtrim( $actual, "\n" ); |
| 925 | $normalizedExpected = rtrim( $expected, "\n" ); |
| 926 | |
| 927 | return [ $normalizedActual, $normalizedExpected ]; |
| 928 | } |
| 929 | } |