Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 398 |
|
0.00% |
0 / 13 |
CRAP | |
0.00% |
0 / 1 |
Test | |
0.00% |
0 / 398 |
|
0.00% |
0 / 13 |
19460 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
110 | |||
__clone | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
matchesFilter | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
pageName | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
computeTestModes | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
applyManualChanges | |
0.00% |
0 / 113 |
|
0.00% |
0 / 1 |
420 | |||
applyChanges | |
0.00% |
0 / 93 |
|
0.00% |
0 / 1 |
930 | |||
isDuplicateChangeTree | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
generateChanges | |
0.00% |
0 / 63 |
|
0.00% |
0 / 1 |
650 | |||
testAllModes | |
0.00% |
0 / 38 |
|
0.00% |
0 / 1 |
342 | |||
normalizeHTML | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
240 | |||
normalizeKnownFailure | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
normalizeWT | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\ParserTests; |
5 | |
6 | use Error; |
7 | use Psr\Log\LogLevel; |
8 | use Wikimedia\Alea\Alea; |
9 | use Wikimedia\Assert\Assert; |
10 | use Wikimedia\Parsoid\DOM\Document; |
11 | use Wikimedia\Parsoid\DOM\Element; |
12 | use Wikimedia\Parsoid\DOM\Node; |
13 | use Wikimedia\Parsoid\Utils\ContentUtils; |
14 | use Wikimedia\Parsoid\Utils\DOMCompat; |
15 | use Wikimedia\Parsoid\Utils\DOMUtils; |
16 | use Wikimedia\Parsoid\Utils\PHPUtils; |
17 | use Wikimedia\Parsoid\Utils\Utils; |
18 | use Wikimedia\Parsoid\Utils\WTUtils; |
19 | |
20 | /** |
21 | * Represents a parser test |
22 | */ |
23 | class Test extends Item { |
24 | |
25 | // 'testAllModes' and 'TestRunner::runTest' assume that test modes are added |
26 | // in this order for caching to work properly (and even when test objects are cloned). |
27 | // This ordering is enforced in computeTestModes. |
28 | public const ALL_TEST_MODES = [ 'wt2html', 'wt2wt', 'html2html', 'html2wt', 'selser' ]; |
29 | |
30 | /* --- These are test properties from the test file --- */ |
31 | |
32 | /** This is the test name, not page title for the test */ |
33 | public ?string $testName = null; |
34 | |
35 | /** @var array<string,string|bool|array> */ |
36 | public array $options = []; |
37 | |
38 | /** @var array<string,string|array> */ |
39 | public array $config = []; |
40 | |
41 | /** @var array<string,string> */ |
42 | public array $sections = []; |
43 | |
44 | /** @var array Known failures for this test, indexed by testing mode. */ |
45 | public array $knownFailures = []; |
46 | |
47 | /* --- These next are computed based on an ordered list of preferred |
48 | * section keys --- */ |
49 | |
50 | public ?string $wikitext = null; |
51 | |
52 | public ?string $parsoidHtml = null; |
53 | |
54 | public ?string $legacyHtml = null; |
55 | |
56 | /* --- The rest below are computed by Parsoid while running tests -- */ |
57 | |
58 | private ?string $pageName = null; |
59 | |
60 | private ?int $pageNs = null; |
61 | |
62 | /** @var list */ |
63 | public array $selserChangeTrees = []; |
64 | |
65 | /** @var ?list */ |
66 | public ?array $changetree = null; |
67 | |
68 | public bool $duplicateChange = false; |
69 | |
70 | public ?string $seed = null; |
71 | |
72 | public ?string $resultWT = null; |
73 | |
74 | public ?bool $wt2wtPassed = null; |
75 | |
76 | public ?string $wt2wtResult = null; |
77 | |
78 | public ?string $selser = null; |
79 | |
80 | public ?string $changedHTMLStr = null; |
81 | |
82 | public ?string $cachedBODYstr = null; |
83 | |
84 | public ?string $cachedWTstr = null; |
85 | |
86 | public ?string $cachedNormalizedHTML = null; |
87 | |
88 | public array $time = []; |
89 | |
90 | private const DIRECT_KEYS = [ |
91 | 'type', |
92 | 'filename', |
93 | 'lineNumStart', |
94 | 'lineNumEnd', |
95 | 'testName', |
96 | 'options', |
97 | 'config', |
98 | ]; |
99 | private const WIKITEXT_KEYS = [ |
100 | 'wikitext', |
101 | # deprecated |
102 | 'input', |
103 | ]; |
104 | private const LEGACY_HTML_KEYS = [ |
105 | 'html/php', 'html/*', 'html', |
106 | # deprecated |
107 | 'result', |
108 | 'html/php+tidy', |
109 | 'html/*+tidy', |
110 | 'html+tidy', |
111 | ]; |
112 | private const PARSOID_HTML_KEYS = [ |
113 | 'html/parsoid', 'html/*', 'html', |
114 | # deprecated |
115 | 'result', |
116 | 'html/*+tidy', |
117 | 'html+tidy', |
118 | ]; |
119 | private const WARN_DEPRECATED_KEYS = [ |
120 | 'input', |
121 | 'result', |
122 | 'html/php+tidy', |
123 | 'html/*+tidy', |
124 | 'html+tidy', |
125 | 'html/php+untidy', |
126 | 'html+untidy', |
127 | ]; |
128 | |
129 | /** |
130 | * @param array $testProperties key-value mapping of properties |
131 | * @param array $knownFailures Known failures for this test, indexed by testing mode |
132 | * @param ?string $comment Optional comment describing the test |
133 | * @param ?callable $warnFunc Optional callback used to emit |
134 | * deprecation warnings. |
135 | */ |
136 | public function __construct( |
137 | array $testProperties, |
138 | array $knownFailures = [], |
139 | ?string $comment = null, |
140 | ?callable $warnFunc = null |
141 | ) { |
142 | parent::__construct( $testProperties, $comment ); |
143 | $this->knownFailures = $knownFailures; |
144 | |
145 | foreach ( $testProperties as $key => $value ) { |
146 | if ( in_array( $key, self::DIRECT_KEYS, true ) ) { |
147 | $this->$key = $value; |
148 | } else { |
149 | if ( isset( $this->sections[$key] ) ) { |
150 | $this->error( "Duplicate test section", $key ); |
151 | } |
152 | $this->sections[$key] = $value; |
153 | } |
154 | } |
155 | |
156 | # Priority order for wikitext, legacyHtml, and parsoidHtml properties |
157 | $cats = [ |
158 | 'wikitext' => self::WIKITEXT_KEYS, |
159 | 'legacyHtml' => self::LEGACY_HTML_KEYS, |
160 | 'parsoidHtml' => self::PARSOID_HTML_KEYS, |
161 | ]; |
162 | foreach ( $cats as $prop => $keys ) { |
163 | foreach ( $keys as $key ) { |
164 | if ( isset( $this->sections[$key] ) ) { |
165 | $this->$prop = $this->sections[$key]; |
166 | break; |
167 | } |
168 | } |
169 | } |
170 | |
171 | # Deprecation warnings |
172 | if ( $warnFunc ) { |
173 | foreach ( self::WARN_DEPRECATED_KEYS as $key ) { |
174 | if ( isset( $this->sections[$key] ) ) { |
175 | $warnFunc( $this->errorMsg( |
176 | "Parser test section $key is deprecated" |
177 | ) ); |
178 | } |
179 | } |
180 | } |
181 | } |
182 | |
183 | public function __clone() { |
184 | // Properties that need deep cloning |
185 | foreach ( [ 'options', 'config', 'sections', |
186 | 'knownFailures', 'selserChangeTrees', 'time' ] as $f ) { |
187 | $this->$f = Utils::cloneArray( $this->$f ); |
188 | } |
189 | if ( $this->changetree !== null ) { |
190 | $this->changetree = Utils::cloneArray( $this->changetree ); |
191 | } |
192 | } |
193 | |
194 | /** |
195 | * @param array $testFilter Test Filter as set in TestRunner |
196 | * @return bool if test matches the filter |
197 | */ |
198 | public function matchesFilter( $testFilter ): bool { |
199 | if ( !$testFilter ) { |
200 | return true; // Trivial match |
201 | } |
202 | |
203 | if ( !empty( $testFilter['regex'] ) ) { |
204 | $regex = isset( $testFilter['raw'] ) ? |
205 | ( '/' . $testFilter['raw'] . '/' ) : |
206 | $testFilter['regex']; |
207 | return (bool)preg_match( $regex, $this->testName ); |
208 | } |
209 | |
210 | if ( !empty( $testFilter['string'] ) ) { |
211 | return strpos( $this->testName, $testFilter['raw'] ) !== false; |
212 | } |
213 | |
214 | return true; // Trivial match because of a bad test filter |
215 | } |
216 | |
217 | public function pageName(): string { |
218 | if ( !$this->pageName ) { |
219 | $this->pageName = $this->options['title'] ?? 'Parser test'; |
220 | if ( is_array( $this->pageName ) ) { |
221 | $this->pageName = 'Parser test'; |
222 | } |
223 | } |
224 | |
225 | return $this->pageName; |
226 | } |
227 | |
228 | /** |
229 | * Given a test runner that runs in a specific set of test modes ($testRunnerModes) |
230 | * compute the list of valid test modes based on what modes have been enabled on the |
231 | * test itself. |
232 | * |
233 | * @param array $testRunnerModes What test modes is the test runner running with? |
234 | * @return array |
235 | */ |
236 | public function computeTestModes( array $testRunnerModes ): array { |
237 | // Ensure we compute valid modes in the order specificed in ALL_TEST_MODES since |
238 | // caching in the presence of test cloning rely on tests running in this order. |
239 | $validModes = array_intersect( self::ALL_TEST_MODES, $testRunnerModes ); |
240 | |
241 | // Filter for modes the test has opted in for |
242 | $testModes = $this->options['parsoid']['modes'] ?? null; |
243 | if ( $testModes ) { |
244 | $selserEnabled = in_array( 'selser', $testRunnerModes, true ); |
245 | // Avoid filtering out the selser test |
246 | if ( $selserEnabled && |
247 | !in_array( 'selser', $testModes, true ) && |
248 | in_array( 'wt2wt', $testModes, true ) |
249 | ) { |
250 | $testModes[] = 'selser'; |
251 | } |
252 | |
253 | $validModes = array_intersect( $validModes, $testModes ); |
254 | } |
255 | |
256 | return $validModes; |
257 | } |
258 | |
259 | // Random string used as selser comment content |
260 | public const STATIC_RANDOM_STRING = 'ahseeyooxooZ8Oon0boh'; |
261 | |
262 | /** |
263 | * Apply manually-specified changes, which are provided in a pseudo-jQuery |
264 | * format. |
265 | */ |
266 | public function applyManualChanges( Document $doc ): void { |
267 | $changes = $this->options['parsoid']['changes']; |
268 | $err = null; |
269 | // changes are specified using jquery methods. |
270 | // [x,y,z...] becomes $(x)[y](z....) |
271 | // that is, ['fig', 'attr', 'width', '120'] is interpreted as |
272 | // $('fig').attr('width', '120') |
273 | // See http://api.jquery.com/ for documentation of these methods. |
274 | // "contents" as second argument calls the jquery .contents() method |
275 | // on the results of the selector in the first argument, which is |
276 | // a good way to get at the text and comment nodes |
277 | $jquery = [ |
278 | 'after' => static function ( Node $node, string $html ): void { |
279 | if ( DOMCompat::nodeName( $node->parentNode ) === 'tbody' ) { |
280 | $tbl = $node->ownerDocument->createElement( 'table' ); |
281 | DOMCompat::setInnerHTML( $tbl, $html ); |
282 | // <tbody> is implicitly added when inner html is set to <tr>..</tr> |
283 | DOMUtils::migrateChildren( $tbl->firstChild, $node->parentNode, $node->nextSibling ); |
284 | } elseif ( DOMCompat::nodeName( $node->parentNode ) === 'tr' ) { |
285 | $tbl = $node->ownerDocument->createElement( 'table' ); |
286 | DOMCompat::setInnerHTML( $tbl, '<tbody><tr></tr></tbody>' ); |
287 | $tr = $tbl->firstChild->firstChild; |
288 | '@phan-var Element $tr'; // @var Element $tr |
289 | DOMCompat::setInnerHTML( $tr, $html ); |
290 | DOMUtils::migrateChildren( $tbl->firstChild->firstChild, |
291 | $node->parentNode, $node->nextSibling ); |
292 | } else { |
293 | $div = $node->ownerDocument->createElement( 'div' ); |
294 | DOMCompat::setInnerHTML( $div, $html ); |
295 | DOMUtils::migrateChildren( $div, $node->parentNode, $node->nextSibling ); |
296 | } |
297 | }, |
298 | 'append' => static function ( Node $node, string $html ): void { |
299 | if ( DOMCompat::nodeName( $node ) === 'tr' ) { |
300 | $tbl = $node->ownerDocument->createElement( 'table' ); |
301 | DOMCompat::setInnerHTML( $tbl, $html ); |
302 | // <tbody> is implicitly added when inner html is set to <tr>..</tr> |
303 | DOMUtils::migrateChildren( $tbl->firstChild, $node ); |
304 | } else { |
305 | $div = $node->ownerDocument->createElement( 'div' ); |
306 | DOMCompat::setInnerHTML( $div, $html ); |
307 | DOMUtils::migrateChildren( $div, $node ); |
308 | } |
309 | }, |
310 | 'attr' => static function ( Node $node, string $name, string $val ): void { |
311 | '@phan-var Element $node'; // @var Element $node |
312 | $node->setAttribute( $name, $val ); |
313 | }, |
314 | 'before' => static function ( Node $node, string $html ): void { |
315 | if ( DOMCompat::nodeName( $node->parentNode ) === 'tbody' ) { |
316 | $tbl = $node->ownerDocument->createElement( 'table' ); |
317 | DOMCompat::setInnerHTML( $tbl, $html ); |
318 | // <tbody> is implicitly added when inner html is set to <tr>..</tr> |
319 | DOMUtils::migrateChildren( $tbl->firstChild, $node->parentNode, $node ); |
320 | } elseif ( DOMCompat::nodeName( $node->parentNode ) === 'tr' ) { |
321 | $tbl = $node->ownerDocument->createElement( 'table' ); |
322 | DOMCompat::setInnerHTML( $tbl, '<tbody><tr></tr></tbody>' ); |
323 | $tr = $tbl->firstChild->firstChild; |
324 | '@phan-var Element $tr'; // @var Element $tr |
325 | DOMCompat::setInnerHTML( $tr, $html ); |
326 | DOMUtils::migrateChildren( $tbl->firstChild->firstChild, $node->parentNode, $node ); |
327 | } else { |
328 | $div = $node->ownerDocument->createElement( 'div' ); |
329 | DOMCompat::setInnerHTML( $div, $html ); |
330 | DOMUtils::migrateChildren( $div, $node->parentNode, $node ); |
331 | } |
332 | }, |
333 | 'removeAttr' => static function ( Node $node, string $name ): void { |
334 | '@phan-var Element $node'; // @var Element $node |
335 | $node->removeAttribute( $name ); |
336 | }, |
337 | 'removeClass' => static function ( Node $node, string $c ): void { |
338 | '@phan-var Element $node'; // @var Element $node |
339 | DOMCompat::getClassList( $node )->remove( $c ); |
340 | }, |
341 | 'addClass' => static function ( Node $node, string $c ): void { |
342 | '@phan-var Element $node'; // @var Element $node |
343 | DOMCompat::getClassList( $node )->add( $c ); |
344 | }, |
345 | 'text' => static function ( Node $node, string $t ): void { |
346 | $node->textContent = $t; |
347 | }, |
348 | 'html' => static function ( Node $node, string $h ): void { |
349 | '@phan-var Element $node'; // @var Element $node |
350 | DOMCompat::setInnerHTML( $node, $h ); |
351 | }, |
352 | 'remove' => static function ( Node $node, ?string $optSelector = null ): void { |
353 | // jquery lets us specify an optional selector to further |
354 | // restrict the removed elements. |
355 | // text nodes don't have the "querySelectorAll" method, so |
356 | // just include them by default (jquery excludes them, which |
357 | // is less useful) |
358 | if ( !$optSelector ) { |
359 | $what = [ $node ]; |
360 | } elseif ( !( $node instanceof Element ) ) { |
361 | $what = [ $node ];/* text node hack! */ |
362 | } else { |
363 | '@phan-var Element $node'; // @var Element $node |
364 | $what = DOMCompat::querySelectorAll( $node, $optSelector ); |
365 | } |
366 | foreach ( $what as $node ) { |
367 | if ( $node->parentNode ) { |
368 | $node->parentNode->removeChild( $node ); |
369 | } |
370 | } |
371 | }, |
372 | 'empty' => static function ( Node $node ): void { |
373 | '@phan-var Element $node'; // @var Element $node |
374 | DOMCompat::replaceChildren( $node ); |
375 | }, |
376 | 'wrap' => static function ( Node $node, string $w ): void { |
377 | $frag = $node->ownerDocument->createElement( 'div' ); |
378 | DOMCompat::setInnerHTML( $frag, $w ); |
379 | $first = $frag->firstChild; |
380 | $node->parentNode->replaceChild( $first, $node ); |
381 | while ( $first->firstChild ) { |
382 | $first = $first->firstChild; |
383 | } |
384 | $first->appendChild( $node ); |
385 | } |
386 | ]; |
387 | |
388 | $body = DOMCompat::getBody( $doc ); |
389 | |
390 | foreach ( $changes as $change ) { |
391 | if ( $err ) { |
392 | continue; |
393 | } |
394 | if ( count( $change ) < 2 ) { |
395 | $err = new Error( 'bad change: ' . $change ); |
396 | continue; |
397 | } |
398 | // use document.querySelectorAll as a poor man's $(...) |
399 | $els = PHPUtils::iterable_to_array( |
400 | DOMCompat::querySelectorAll( $body, $change[0] ) |
401 | ); |
402 | if ( !count( $els ) ) { |
403 | $err = new Error( $change[0] . |
404 | ' did not match any elements: ' . DOMCompat::getOuterHTML( $body ) ); |
405 | continue; |
406 | } |
407 | if ( $change[1] === 'contents' ) { |
408 | $change = array_slice( $change, 1 ); |
409 | $acc = []; |
410 | foreach ( $els as $el ) { |
411 | PHPUtils::pushArray( $acc, DOMUtils::childNodes( $el ) ); |
412 | } |
413 | $els = $acc; |
414 | } |
415 | $fn = $jquery[$change[1]] ?? null; |
416 | if ( !$fn ) { |
417 | $err = new Error( 'bad mutator function: ' . $change[1] ); |
418 | continue; |
419 | } |
420 | foreach ( $els as $el ) { |
421 | $fn( $el, ...array_slice( $change, 2 ) ); |
422 | } |
423 | } |
424 | |
425 | if ( $err ) { |
426 | print TestUtils::colorString( (string)$err, "red" ) . "\n"; |
427 | throw $err; |
428 | } |
429 | } |
430 | |
431 | /** |
432 | * Make changes to a DOM in order to run a selser test on it. |
433 | */ |
434 | public function applyChanges( array $dumpOpts, Document $doc, array $changelist ): void { |
435 | $logger = $dumpOpts['logger'] ?? null; |
436 | // Seed the random-number generator based on the item title and changelist |
437 | $alea = new Alea( ( json_encode( $changelist ) ) . ( $this->testName ?? '' ) ); |
438 | |
439 | // Keep the changes in the test object |
440 | // to check for duplicates while building tasks |
441 | $this->changetree = $changelist; |
442 | |
443 | // Helper function for getting a random string |
444 | $randomString = static function () use ( &$alea ): string { |
445 | return base_convert( (string)$alea->uint32(), 10, 36 ); |
446 | }; |
447 | |
448 | $insertNewNode = static function ( Node $n ) use ( $randomString ): void { |
449 | // Insert a text node, if not in a fosterable position. |
450 | // If in foster position, enter a comment. |
451 | // In either case, dom-diff should register a new node |
452 | $str = $randomString(); |
453 | $ownerDoc = $n->ownerDocument; |
454 | $wrapperName = null; |
455 | |
456 | // Don't separate legacy IDs from their H? node. |
457 | if ( WTUtils::isFallbackIdSpan( $n ) ) { |
458 | $n = $n->nextSibling ?? $n->parentNode; |
459 | } |
460 | |
461 | // For these container nodes, it would be buggy |
462 | // to insert text nodes as children |
463 | switch ( DOMCompat::nodeName( $n->parentNode ) ) { |
464 | case 'ol': |
465 | case 'ul': |
466 | $wrapperName = 'li'; |
467 | break; |
468 | case 'dl': |
469 | $wrapperName = 'dd'; |
470 | break; |
471 | case 'tr': |
472 | $prev = DOMCompat::getPreviousElementSibling( $n ); |
473 | if ( $prev ) { |
474 | // TH or TD |
475 | $wrapperName = DOMCompat::nodeName( $prev ); |
476 | } else { |
477 | $next = DOMCompat::getNextElementSibling( $n ); |
478 | if ( $next ) { |
479 | // TH or TD |
480 | $wrapperName = DOMCompat::nodeName( $next ); |
481 | } else { |
482 | $wrapperName = 'td'; |
483 | } |
484 | } |
485 | break; |
486 | case 'body': |
487 | $wrapperName = 'p'; |
488 | break; |
489 | default: |
490 | // We're trying to determine if it is safe to place a |
491 | // bare text node. If $n is a block, wrap the text node |
492 | // we're putting beside it. |
493 | if ( DOMUtils::isWikitextBlockNode( $n ) ) { |
494 | $wrapperName = 'p'; |
495 | } |
496 | break; |
497 | } |
498 | |
499 | if ( DOMUtils::isFosterablePosition( $n ) && DOMCompat::nodeName( $n->parentNode ) !== 'tr' ) { |
500 | $newNode = $ownerDoc->createComment( $str ); |
501 | } elseif ( $wrapperName ) { |
502 | $newNode = $ownerDoc->createElement( $wrapperName ); |
503 | $newNode->appendChild( $ownerDoc->createTextNode( $str ) ); |
504 | } else { |
505 | $newNode = $ownerDoc->createTextNode( $str ); |
506 | } |
507 | |
508 | $n->parentNode->insertBefore( $newNode, $n ); |
509 | }; |
510 | |
511 | $removeNode = static function ( Node $n ): void { |
512 | $n->parentNode->removeChild( $n ); |
513 | }; |
514 | |
515 | $applyChangesInternal = static function ( Node $node, array $changes ) use ( |
516 | &$applyChangesInternal, $removeNode, $insertNewNode, |
517 | $randomString, $logger |
518 | ): void { |
519 | $nodeArray = DOMUtils::childNodes( $node ); |
520 | if ( count( $nodeArray ) < count( $changes ) ) { |
521 | throw new Error( "Error: more changes than nodes to apply them to!" ); |
522 | } |
523 | |
524 | foreach ( $changes as $i => $change ) { |
525 | $child = $nodeArray[$i]; |
526 | |
527 | if ( is_array( $change ) ) { |
528 | $applyChangesInternal( $child, $change ); |
529 | } else { |
530 | switch ( $change ) { |
531 | // No change |
532 | case 0: |
533 | break; |
534 | |
535 | // Change node wrapper |
536 | // (sufficient to insert a random attr) |
537 | case 1: |
538 | if ( $child instanceof Element ) { |
539 | $child->setAttribute( 'data-foobar', $randomString() ); |
540 | } elseif ( $logger ) { |
541 | $logger->log( |
542 | LogLevel::ERROR, |
543 | 'Buggy changetree. changetype 1 (modify attribute)' . |
544 | ' cannot be applied on text/comment nodes.' |
545 | ); |
546 | } |
547 | break; |
548 | |
549 | // Insert new node before child |
550 | case 2: |
551 | $insertNewNode( $child ); |
552 | break; |
553 | |
554 | // Delete tree rooted at child |
555 | case 3: |
556 | $removeNode( $child ); |
557 | break; |
558 | |
559 | // Change tree rooted at child |
560 | case 4: |
561 | $insertNewNode( $child ); |
562 | $removeNode( $child ); |
563 | break; |
564 | } |
565 | |
566 | } |
567 | } |
568 | }; |
569 | |
570 | $body = DOMCompat::getBody( $doc ); |
571 | |
572 | if ( $logger && ( $dumpOpts['dom:post-changes'] ?? false ) ) { |
573 | $logger->log( LogLevel::ERROR, "----- Original DOM -----" ); |
574 | $logger->log( LogLevel::ERROR, ContentUtils::dumpDOM( $body, '', [ 'quiet' => true ] ) ); |
575 | } |
576 | |
577 | if ( $this->changetree === [ 5 ] ) { |
578 | // Hack so that we can work on the parent node rather than just the |
579 | // children: Append a comment with known content. This is later |
580 | // stripped from the output, and the result is compared to the |
581 | // original wikitext rather than the non-selser wt2wt result. |
582 | $body->appendChild( $doc->createComment( self::STATIC_RANDOM_STRING ) ); |
583 | } elseif ( $this->changetree !== [] ) { |
584 | $applyChangesInternal( $body, $this->changetree ); |
585 | } |
586 | |
587 | if ( $logger && ( $dumpOpts['dom:post-changes'] ?? false ) ) { |
588 | $logger->log( LogLevel::ERROR, "----- Change Tree -----" ); |
589 | $logger->log( LogLevel::ERROR, json_encode( $this->changetree ) ); |
590 | $logger->log( LogLevel::ERROR, "----- Edited DOM -----" ); |
591 | $logger->log( LogLevel::ERROR, ContentUtils::dumpDOM( $body, '', [ 'quiet' => true ] ) ); |
592 | } |
593 | } |
594 | |
595 | /** |
596 | * For a selser test, check if a change we could make has already been |
597 | * tested in this round. |
598 | * Used for generating unique tests. |
599 | * |
600 | * @param array $change Candidate change. |
601 | * @return bool |
602 | */ |
603 | public function isDuplicateChangeTree( array $change ): bool { |
604 | $allChanges = $this->selserChangeTrees; |
605 | foreach ( $allChanges as $c ) { |
606 | if ( $c == $change ) { |
607 | return true; |
608 | } |
609 | } |
610 | return false; |
611 | } |
612 | |
613 | /** |
614 | * Generate a change object for a document, so we can apply it during a selser test. |
615 | * |
616 | * @param Document $doc |
617 | * @return array The list of changes. |
618 | */ |
619 | public function generateChanges( Document $doc ): array { |
620 | $alea = new Alea( ( $this->seed ?? '' ) . ( $this->testName ?? '' ) ); |
621 | |
622 | /** |
623 | * If no node in the DOM subtree rooted at 'node' is editable in the VE, |
624 | * this function should return false. |
625 | * |
626 | * Currently true for template and extension content, and for entities. |
627 | */ |
628 | $domSubtreeIsEditable = static function ( Node $node ): bool { |
629 | return !( $node instanceof Element ) || |
630 | ( !WTUtils::isEncapsulationWrapper( $node ) && |
631 | // These wrappers can only be edited in restricted ways. |
632 | // Simpler to just block all editing on them. |
633 | !DOMUtils::matchTypeOf( $node, |
634 | '#^mw:(Entity|Placeholder|DisplaySpace|Annotation|ExtendedAnnRange)(/|$)#' |
635 | ) && |
636 | // Deleting these wrappers is tantamount to removing the |
637 | // references-tag encapsulation wrappers, which results in errors. |
638 | !DOMUtils::hasClass( $node, 'mw-references-wrap' ) |
639 | ); |
640 | }; |
641 | |
642 | /** |
643 | * Even if a DOM subtree might be editable in the VE, |
644 | * certain nodes in the DOM might not be directly editable. |
645 | * |
646 | * Currently, this restriction is only applied to DOMs generated for images. |
647 | * Possibly, there are other candidates. |
648 | */ |
649 | $nodeIsUneditable = static function ( Node $node ) use ( &$nodeIsUneditable ): bool { |
650 | // Text and comment nodes are always editable |
651 | if ( !( $node instanceof Element ) ) { |
652 | return false; |
653 | } |
654 | |
655 | if ( WTUtils::isMarkerAnnotation( $node ) ) { |
656 | return true; |
657 | } |
658 | |
659 | // - File wrapper is an uneditable elt. |
660 | // - Any node nested in a file wrapper that is not a figcaption |
661 | // is an uneditable elt. |
662 | // - Entity spans are uneditable as well |
663 | // - Placeholder is defined to be uneditable in the spec |
664 | // - ExtendedAnnRange is an "unknown" type in the spec, and hence uneditable |
665 | return DOMUtils::matchTypeOf( $node, |
666 | '#^mw:(File|Entity|Placeholder|DisplaySpace|ExtendedAnnRange)(/|$)#' ) || ( |
667 | DOMCompat::nodeName( $node ) !== 'figcaption' && |
668 | $node->parentNode && |
669 | DOMCompat::nodeName( $node->parentNode ) !== 'body' && |
670 | $nodeIsUneditable( $node->parentNode ) |
671 | ); |
672 | }; |
673 | |
674 | $defaultChangeType = 0; |
675 | |
676 | $hasChangeMarkers = static function ( array $list ) use ( |
677 | &$hasChangeMarkers, $defaultChangeType |
678 | ): bool { |
679 | // If all recorded changes are 0, then nothing has been modified |
680 | foreach ( $list as $c ) { |
681 | if ( ( is_array( $c ) && $hasChangeMarkers( $c ) ) || |
682 | ( !is_array( $c ) && $c !== $defaultChangeType ) |
683 | ) { |
684 | return true; |
685 | } |
686 | } |
687 | return false; |
688 | }; |
689 | |
690 | $genChangesInternal = static function ( Node $node ) use ( |
691 | &$genChangesInternal, &$hasChangeMarkers, |
692 | $domSubtreeIsEditable, $nodeIsUneditable, $alea, |
693 | $defaultChangeType |
694 | ): array { |
695 | // Seed the random-number generator based on the item title |
696 | $changelist = []; |
697 | foreach ( DOMUtils::childNodes( $node ) as $child ) { |
698 | $changeType = $defaultChangeType; |
699 | if ( $domSubtreeIsEditable( $child ) ) { |
700 | if ( $nodeIsUneditable( $child ) || $alea->random() < 0.5 ) { |
701 | // This call to random is a hack to preserve the current |
702 | // determined state of our knownFailures entries after a |
703 | // refactor. |
704 | $alea->uint32(); |
705 | $changeType = $genChangesInternal( $child ); |
706 | // `$genChangesInternal` returns an array, which can be |
707 | // empty. Revert to the `$defaultChangeType` if that's |
708 | // the case. |
709 | if ( count( $changeType ) === 0 ) { |
710 | $changeType = $defaultChangeType; |
711 | } |
712 | } else { |
713 | if ( !( $child instanceof Element ) ) { |
714 | // Text or comment node -- valid changes: 2, 3, 4 |
715 | // since we cannot set attributes on these |
716 | $changeType = floor( $alea->random() * 3 ) + 2; |
717 | } else { |
718 | $changeType = floor( $alea->random() * 4 ) + 1; |
719 | } |
720 | } |
721 | } |
722 | |
723 | $changelist[] = $changeType; |
724 | |
725 | } |
726 | |
727 | return $hasChangeMarkers( $changelist ) ? $changelist : []; |
728 | }; |
729 | |
730 | $body = DOMCompat::getBody( $doc ); |
731 | |
732 | $numAttempts = 0; |
733 | do { |
734 | $numAttempts++; |
735 | $changetree = $genChangesInternal( $body ); |
736 | } while ( |
737 | $numAttempts < 1000 && |
738 | ( count( $changetree ) === 0 || |
739 | $this->isDuplicateChangeTree( $changetree ) ) |
740 | ); |
741 | |
742 | if ( $numAttempts === 1000 ) { |
743 | // couldn't generate a change ... marking as such |
744 | $this->duplicateChange = true; |
745 | } |
746 | |
747 | return $changetree; |
748 | } |
749 | |
750 | /** |
751 | * FIXME: clean up this mess! |
752 | * - generate all changes at once (generateChanges should return a tree really) |
753 | * rather than going to all these lengths of interleaving change |
754 | * generation with tests |
755 | * - set up the changes in item directly rather than juggling around with |
756 | * indexes etc |
757 | * - indicate whether to compare to wt2wt or the original input |
758 | * - maybe make a full selser test one method that uses others rather than the |
759 | * current chain of methods that sometimes do something for selser |
760 | * |
761 | * @param array $targetModes |
762 | * @param array $runnerOpts |
763 | * @param callable $runTest |
764 | */ |
765 | public function testAllModes( // phpcs:ignore MediaWiki.Commenting.MissingCovers.MissingCovers |
766 | array $targetModes, array $runnerOpts, callable $runTest |
767 | ): void { |
768 | if ( !$this->testName ) { |
769 | throw new Error( 'Missing title from test case.' ); |
770 | } |
771 | $selserNoAuto = ( ( $runnerOpts['selser'] ?? false ) === 'noauto' ); |
772 | |
773 | foreach ( $targetModes as $targetMode ) { |
774 | if ( |
775 | $targetMode === 'selser' && |
776 | !( $selserNoAuto || isset( $runnerOpts['changetree'] ) ) |
777 | ) { |
778 | // Run selser tests in the following order: |
779 | // 1. Manual changes (if provided) |
780 | // 2. changetree 5 (oracle exists for verifying output) |
781 | // 3. All other change trees (no oracle exists for verifying output) |
782 | |
783 | if ( isset( $this->options['parsoid']['changes'] ) ) { |
784 | // Mutating the item here is necessary to output 'manual' in |
785 | // the test's title and to differentiate it for knownFailures. |
786 | $this->changetree = [ 'manual' ]; |
787 | $runTest( $this, 'selser', $runnerOpts ); |
788 | } |
789 | |
790 | // Skip the rest if the test doesn't want changetrees |
791 | if ( ( $this->options['parsoid']['selser'] ?? '' ) === 'noauto' ) { |
792 | continue; |
793 | } |
794 | |
795 | // Changetree 5 (append a comment to the root node) |
796 | $this->changetree = [ 5 ]; |
797 | $runTest( $this, 'selser', $runnerOpts ); |
798 | |
799 | // Automatically generated changed trees |
800 | $this->selserChangeTrees = []; |
801 | for ( $j = 0; $j < $runnerOpts['numchanges']; $j++ ) { |
802 | // Set changetree to null to ensure we don't assume [ 5 ] in $runTest |
803 | $this->changetree = null; |
804 | $this->seed = $j . ''; |
805 | $runTest( $this, 'selser', $runnerOpts ); |
806 | if ( $this->isDuplicateChangeTree( $this->changetree ) ) { |
807 | // Once we get a duplicate change tree, we can no longer |
808 | // generate and run new tests. So, be done now! |
809 | break; |
810 | } else { |
811 | $this->selserChangeTrees[$j] = $this->changetree; |
812 | } |
813 | } |
814 | } elseif ( $targetMode === 'selser' && $selserNoAuto ) { |
815 | // Manual changes were requested on the command line, |
816 | // check that the item does have them. |
817 | if ( isset( $this->options['parsoid']['changes'] ) ) { |
818 | $this->changetree = [ 'manual' ]; |
819 | $runTest( $this, 'selser', $runnerOpts ); |
820 | } |
821 | continue; |
822 | } else { |
823 | if ( $targetMode === 'wt2html' && isset( $this->sections['html/parsoid+langconv'] ) ) { |
824 | // Since we are clobbering options and parsoidHtml, clone the test object |
825 | $testClone = clone $this; |
826 | $testClone->options['langconv'] = true; |
827 | $testClone->parsoidHtml = $this->sections['html/parsoid+langconv']; |
828 | $runTest( $testClone, $targetMode, $runnerOpts ); |
829 | if ( $this->parsoidHtml === null ) { |
830 | // Don't run the same test in non-langconv mode |
831 | // unless we have a non-langconv section |
832 | continue; |
833 | } |
834 | } |
835 | |
836 | Assert::invariant( |
837 | $targetMode !== 'selser' || |
838 | ( isset( $runnerOpts['changetree'] ) && !$selserNoAuto ), |
839 | "Unexpected target mode $targetMode" ); |
840 | |
841 | $runTest( $this, $targetMode, $runnerOpts ); |
842 | } |
843 | } |
844 | } |
845 | |
846 | /** |
847 | * Normalize expected and actual HTML to suppress irrelevant differences. |
848 | * The normalization is determined by the HTML sections present in the test |
849 | * as well as other Parsoid-specific test options. |
850 | * |
851 | * @param Element|string $actual |
852 | * @param ?string $normExpected |
853 | * @param bool $standalone |
854 | * |
855 | * @return list{string, string} |
856 | */ |
857 | public function normalizeHTML( $actual, ?string $normExpected, bool $standalone = true ): array { |
858 | $opts = $this->options; |
859 | $haveStandaloneHTML = $standalone && isset( $this->sections['html/parsoid+standalone'] ); |
860 | $haveIntegratedHTML = !$standalone && isset( $this->sections['html/parsoid+integrated'] ); |
861 | $parsoidOnly = isset( $this->sections['html/parsoid'] ) || |
862 | $haveStandaloneHTML || |
863 | $haveIntegratedHTML || |
864 | isset( $this->sections['html/parsoid+langconv'] ) || |
865 | ( isset( $opts['parsoid'] ) && !isset( $opts['parsoid']['normalizePhp'] ) ); |
866 | $externalLinkTarget = ( $opts['externallinktarget'] ?? false ) || |
867 | isset( $this->config['wgExternalLinkTarget'] ) || |
868 | isset( $this->config['wgNoFollowLinks'] ) || |
869 | isset( $this->config['wgNoFollowDomainExceptions'] ); |
870 | $normOpts = [ |
871 | 'parsoidOnly' => $parsoidOnly, |
872 | 'preserveIEW' => isset( $opts['parsoid']['preserveIEW'] ), |
873 | 'externallinktarget' => $externalLinkTarget, |
874 | ]; |
875 | |
876 | if ( $normExpected === null ) { |
877 | if ( $haveIntegratedHTML ) { |
878 | $parsoidHTML = $this->sections['html/parsoid+integrated']; |
879 | } elseif ( $haveStandaloneHTML ) { |
880 | $parsoidHTML = $this->sections['html/parsoid+standalone']; |
881 | } else { |
882 | $parsoidHTML = $this->parsoidHtml; |
883 | } |
884 | if ( $parsoidOnly ) { |
885 | $normExpected = TestUtils::normalizeOut( $parsoidHTML, $normOpts ); |
886 | } else { |
887 | $normExpected = TestUtils::normalizeHTML( $parsoidHTML ); |
888 | } |
889 | $this->cachedNormalizedHTML = $normExpected; |
890 | } |
891 | |
892 | return [ TestUtils::normalizeOut( $actual, $normOpts ), $normExpected ]; |
893 | } |
894 | |
895 | /** |
896 | * Normalize "known failure" output. |
897 | * |
898 | * This is an extremely light normalization, since the point of the |
899 | * known failure file is to catch changes in output, even if we don't |
900 | * know what "correct" is. But we do remove 'about' numbering, since |
901 | * that is not guaranteed consistent from run to run. |
902 | */ |
903 | public function normalizeKnownFailure( string $out ): string { |
904 | return TestUtils::normalizeAbout( $out ); |
905 | } |
906 | |
907 | /** |
908 | * Normalize expected and actual wikitext to suppress irrelevant differences. |
909 | * |
910 | * Because of selser as well as manual edit trees, expected wikitext isn't always |
911 | * found in the same section for all tests ending in WT (unlike normalizeHTML). |
912 | * Hence, |
913 | * (a) this code has a different structure than normalizeHTML |
914 | * (b) we cannot cache normalized wikitext |
915 | * |
916 | * @param string $actual |
917 | * @param string $expected |
918 | * @param bool $standalone |
919 | * |
920 | * @return list{string, string} |
921 | */ |
922 | public function normalizeWT( string $actual, string $expected, bool $standalone = true ): array { |
923 | // No other normalizations at this time |
924 | $normalizedActual = rtrim( $actual, "\n" ); |
925 | $normalizedExpected = rtrim( $expected, "\n" ); |
926 | |
927 | return [ $normalizedActual, $normalizedExpected ]; |
928 | } |
929 | } |