Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 402 |
|
0.00% |
0 / 12 |
CRAP | |
0.00% |
0 / 1 |
Test | |
0.00% |
0 / 402 |
|
0.00% |
0 / 12 |
19182 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
110 | |||
matchesFilter | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
pageName | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
computeTestModes | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
applyManualChanges | |
0.00% |
0 / 117 |
|
0.00% |
0 / 1 |
420 | |||
applyChanges | |
0.00% |
0 / 96 |
|
0.00% |
0 / 1 |
992 | |||
isDuplicateChangeTree | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
generateChanges | |
0.00% |
0 / 65 |
|
0.00% |
0 / 1 |
702 | |||
testAllModes | |
0.00% |
0 / 38 |
|
0.00% |
0 / 1 |
342 | |||
normalizeHTML | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
240 | |||
normalizeKnownFailure | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
normalizeWT | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\ParserTests; |
5 | |
6 | use Error; |
7 | use Psr\Log\LogLevel; |
8 | use Wikimedia\Alea\Alea; |
9 | use Wikimedia\Assert\Assert; |
10 | use Wikimedia\Parsoid\DOM\Document; |
11 | use Wikimedia\Parsoid\DOM\Element; |
12 | use Wikimedia\Parsoid\DOM\Node; |
13 | use Wikimedia\Parsoid\Utils\ContentUtils; |
14 | use Wikimedia\Parsoid\Utils\DOMCompat; |
15 | use Wikimedia\Parsoid\Utils\DOMUtils; |
16 | use Wikimedia\Parsoid\Utils\PHPUtils; |
17 | use Wikimedia\Parsoid\Utils\Utils; |
18 | use Wikimedia\Parsoid\Utils\WTUtils; |
19 | |
20 | /** |
21 | * Represents a parser test |
22 | */ |
23 | class Test extends Item { |
24 | |
25 | // 'testAllModes' and 'TestRunner::runTest' assume that test modes are added |
26 | // in this order for caching to work properly (and even when test objects are cloned). |
27 | // This ordering is enforced in computeTestModes. |
28 | public const ALL_TEST_MODES = [ 'wt2html', 'wt2wt', 'html2html', 'html2wt', 'selser' ]; |
29 | |
30 | /* --- These are test properties from the test file --- */ |
31 | |
32 | /** @var ?string This is the test name, not page title for the test */ |
33 | public $testName = null; |
34 | |
35 | /** @var array */ |
36 | public $options = []; |
37 | |
38 | /** @var array */ |
39 | public $config = []; |
40 | |
41 | /** @var array */ |
42 | public $sections = []; |
43 | |
44 | /** @var array Known failures for this test, indexed by testing mode. */ |
45 | public $knownFailures = []; |
46 | |
47 | /* --- These next are computed based on an ordered list of preferred |
48 | * section keys --- */ |
49 | |
50 | /** @var ?string */ |
51 | public $wikitext = null; |
52 | |
53 | /** @var ?string */ |
54 | public $parsoidHtml = null; |
55 | |
56 | /** @var ?string */ |
57 | public $legacyHtml = null; |
58 | |
59 | /* --- The rest below are computed by Parsoid while running tests -- */ |
60 | |
61 | /** @var string */ |
62 | private $pageName; |
63 | |
64 | /** @var int */ |
65 | private $pageNs; |
66 | |
67 | /** @var array */ |
68 | public $selserChangeTrees = []; |
69 | |
70 | /** @var ?array */ |
71 | public $changetree = null; |
72 | |
73 | /** @var bool */ |
74 | public $duplicateChange = false; |
75 | |
76 | /** @var ?string */ |
77 | public $seed = null; |
78 | |
79 | /** @var ?string */ |
80 | public $resultWT = null; |
81 | |
82 | /** @var ?bool */ |
83 | public $wt2wtPassed = null; |
84 | |
85 | /** @var ?string */ |
86 | public $wt2wtResult = null; |
87 | |
88 | /** @var ?string */ |
89 | public $selser = null; |
90 | |
91 | /** @var ?string */ |
92 | public $changedHTMLStr = null; |
93 | |
94 | /** @var ?string */ |
95 | public $cachedBODYstr = null; |
96 | |
97 | /** @var ?string */ |
98 | public $cachedWTstr = null; |
99 | |
100 | /** @var ?string */ |
101 | public $cachedNormalizedHTML = null; |
102 | |
103 | /** @var array */ |
104 | public $time = []; |
105 | |
106 | private const DIRECT_KEYS = [ |
107 | 'type', |
108 | 'filename', |
109 | 'lineNumStart', |
110 | 'lineNumEnd', |
111 | 'testName', |
112 | 'options', |
113 | 'config', |
114 | ]; |
115 | private const WIKITEXT_KEYS = [ |
116 | 'wikitext', |
117 | # deprecated |
118 | 'input', |
119 | ]; |
120 | private const LEGACY_HTML_KEYS = [ |
121 | 'html/php', 'html/*', 'html', |
122 | # deprecated |
123 | 'result', |
124 | 'html/php+tidy', |
125 | 'html/*+tidy', |
126 | 'html+tidy', |
127 | ]; |
128 | private const PARSOID_HTML_KEYS = [ |
129 | 'html/parsoid', 'html/*', 'html', |
130 | # deprecated |
131 | 'result', |
132 | 'html/*+tidy', |
133 | 'html+tidy', |
134 | ]; |
135 | private const WARN_DEPRECATED_KEYS = [ |
136 | 'input', |
137 | 'result', |
138 | 'html/php+tidy', |
139 | 'html/*+tidy', |
140 | 'html+tidy', |
141 | 'html/php+untidy', |
142 | 'html+untidy', |
143 | ]; |
144 | |
145 | /** |
146 | * @param array $testProperties key-value mapping of properties |
147 | * @param array $knownFailures Known failures for this test, indexed by testing mode |
148 | * @param ?string $comment Optional comment describing the test |
149 | * @param ?callable $warnFunc Optional callback used to emit |
150 | * deprecation warnings. |
151 | */ |
152 | public function __construct( |
153 | array $testProperties, |
154 | array $knownFailures = [], |
155 | ?string $comment = null, |
156 | ?callable $warnFunc = null |
157 | ) { |
158 | parent::__construct( $testProperties, $comment ); |
159 | $this->knownFailures = $knownFailures; |
160 | |
161 | foreach ( $testProperties as $key => $value ) { |
162 | if ( in_array( $key, self::DIRECT_KEYS, true ) ) { |
163 | $this->$key = $value; |
164 | } else { |
165 | if ( isset( $this->sections[$key] ) ) { |
166 | $this->error( "Duplicate test section", $key ); |
167 | } |
168 | $this->sections[$key] = $value; |
169 | } |
170 | } |
171 | |
172 | # Priority order for wikitext, legacyHtml, and parsoidHtml properties |
173 | $cats = [ |
174 | 'wikitext' => self::WIKITEXT_KEYS, |
175 | 'legacyHtml' => self::LEGACY_HTML_KEYS, |
176 | 'parsoidHtml' => self::PARSOID_HTML_KEYS, |
177 | ]; |
178 | foreach ( $cats as $prop => $keys ) { |
179 | foreach ( $keys as $key ) { |
180 | if ( isset( $this->sections[$key] ) ) { |
181 | $this->$prop = $this->sections[$key]; |
182 | break; |
183 | } |
184 | } |
185 | } |
186 | |
187 | # Deprecation warnings |
188 | if ( $warnFunc ) { |
189 | foreach ( self::WARN_DEPRECATED_KEYS as $key ) { |
190 | if ( isset( $this->sections[$key] ) ) { |
191 | $warnFunc( $this->errorMsg( |
192 | "Parser test section $key is deprecated" |
193 | ) ); |
194 | } |
195 | } |
196 | } |
197 | } |
198 | |
199 | /** |
200 | * @param array $testFilter Test Filter as set in TestRunner |
201 | * @return bool if test matches the filter |
202 | */ |
203 | public function matchesFilter( $testFilter ): bool { |
204 | if ( !$testFilter ) { |
205 | return true; // Trivial match |
206 | } |
207 | |
208 | if ( !empty( $testFilter['regex'] ) ) { |
209 | $regex = isset( $testFilter['raw'] ) ? |
210 | ( '/' . $testFilter['raw'] . '/' ) : |
211 | $testFilter['regex']; |
212 | return (bool)preg_match( $regex, $this->testName ); |
213 | } |
214 | |
215 | if ( !empty( $testFilter['string'] ) ) { |
216 | return strpos( $this->testName, $testFilter['raw'] ) !== false; |
217 | } |
218 | |
219 | return true; // Trivial match because of a bad test filter |
220 | } |
221 | |
222 | public function pageName(): string { |
223 | if ( !$this->pageName ) { |
224 | $this->pageName = $this->options['title'] ?? 'Parser test'; |
225 | if ( is_array( $this->pageName ) ) { |
226 | $this->pageName = 'Parser test'; |
227 | } |
228 | } |
229 | |
230 | return $this->pageName; |
231 | } |
232 | |
233 | /** |
234 | * Given a test runner that runs in a specific set of test modes ($testRunnerModes) |
235 | * compute the list of valid test modes based on what modes have been enabled on the |
236 | * test itself. |
237 | * |
238 | * @param array $testRunnerModes What test modes is the test runner running with? |
239 | * @return array |
240 | */ |
241 | public function computeTestModes( array $testRunnerModes ): array { |
242 | // Ensure we compute valid modes in the order specificed in ALL_TEST_MODES since |
243 | // caching in the presence of test cloning rely on tests running in this order. |
244 | $validModes = array_intersect( self::ALL_TEST_MODES, $testRunnerModes ); |
245 | |
246 | // Filter for modes the test has opted in for |
247 | $testModes = $this->options['parsoid']['modes'] ?? null; |
248 | if ( $testModes ) { |
249 | $selserEnabled = in_array( 'selser', $testRunnerModes, true ); |
250 | // Avoid filtering out the selser test |
251 | if ( $selserEnabled && |
252 | !in_array( 'selser', $testModes, true ) && |
253 | in_array( 'wt2wt', $testModes, true ) |
254 | ) { |
255 | $testModes[] = 'selser'; |
256 | } |
257 | |
258 | $validModes = array_intersect( $validModes, $testModes ); |
259 | } |
260 | |
261 | return $validModes; |
262 | } |
263 | |
264 | // Random string used as selser comment content |
265 | public const STATIC_RANDOM_STRING = 'ahseeyooxooZ8Oon0boh'; |
266 | |
267 | /** |
268 | * Apply manually-specified changes, which are provided in a pseudo-jQuery |
269 | * format. |
270 | * |
271 | * @param Document $doc |
272 | */ |
273 | public function applyManualChanges( Document $doc ) { |
274 | $changes = $this->options['parsoid']['changes']; |
275 | $err = null; |
276 | // changes are specified using jquery methods. |
277 | // [x,y,z...] becomes $(x)[y](z....) |
278 | // that is, ['fig', 'attr', 'width', '120'] is interpreted as |
279 | // $('fig').attr('width', '120') |
280 | // See http://api.jquery.com/ for documentation of these methods. |
281 | // "contents" as second argument calls the jquery .contents() method |
282 | // on the results of the selector in the first argument, which is |
283 | // a good way to get at the text and comment nodes |
284 | $jquery = [ |
285 | 'after' => static function ( Node $node, string $html ) { |
286 | $div = null; |
287 | $tbl = null; |
288 | if ( DOMCompat::nodeName( $node->parentNode ) === 'tbody' ) { |
289 | $tbl = $node->ownerDocument->createElement( 'table' ); |
290 | DOMCompat::setInnerHTML( $tbl, $html ); |
291 | // <tbody> is implicitly added when inner html is set to <tr>..</tr> |
292 | DOMUtils::migrateChildren( $tbl->firstChild, $node->parentNode, $node->nextSibling ); |
293 | } elseif ( DOMCompat::nodeName( $node->parentNode ) === 'tr' ) { |
294 | $tbl = $node->ownerDocument->createElement( 'table' ); |
295 | DOMCompat::setInnerHTML( $tbl, '<tbody><tr></tr></tbody>' ); |
296 | $tr = $tbl->firstChild->firstChild; |
297 | '@phan-var Element $tr'; // @var Element $tr |
298 | DOMCompat::setInnerHTML( $tr, $html ); |
299 | DOMUtils::migrateChildren( $tbl->firstChild->firstChild, |
300 | $node->parentNode, $node->nextSibling ); |
301 | } else { |
302 | $div = $node->ownerDocument->createElement( 'div' ); |
303 | DOMCompat::setInnerHTML( $div, $html ); |
304 | DOMUtils::migrateChildren( $div, $node->parentNode, $node->nextSibling ); |
305 | } |
306 | }, |
307 | 'append' => static function ( Node $node, string $html ) { |
308 | if ( DOMCompat::nodeName( $node ) === 'tr' ) { |
309 | $tbl = $node->ownerDocument->createElement( 'table' ); |
310 | DOMCompat::setInnerHTML( $tbl, $html ); |
311 | // <tbody> is implicitly added when inner html is set to <tr>..</tr> |
312 | DOMUtils::migrateChildren( $tbl->firstChild, $node ); |
313 | } else { |
314 | $div = $node->ownerDocument->createElement( 'div' ); |
315 | DOMCompat::setInnerHTML( $div, $html ); |
316 | DOMUtils::migrateChildren( $div, $node ); |
317 | } |
318 | }, |
319 | 'attr' => static function ( Node $node, string $name, string $val ) { |
320 | '@phan-var Element $node'; // @var Element $node |
321 | $node->setAttribute( $name, $val ); |
322 | }, |
323 | 'before' => static function ( Node $node, string $html ) { |
324 | $div = null; |
325 | $tbl = null; |
326 | if ( DOMCompat::nodeName( $node->parentNode ) === 'tbody' ) { |
327 | $tbl = $node->ownerDocument->createElement( 'table' ); |
328 | DOMCompat::setInnerHTML( $tbl, $html ); |
329 | // <tbody> is implicitly added when inner html is set to <tr>..</tr> |
330 | DOMUtils::migrateChildren( $tbl->firstChild, $node->parentNode, $node ); |
331 | } elseif ( DOMCompat::nodeName( $node->parentNode ) === 'tr' ) { |
332 | $tbl = $node->ownerDocument->createElement( 'table' ); |
333 | DOMCompat::setInnerHTML( $tbl, '<tbody><tr></tr></tbody>' ); |
334 | $tr = $tbl->firstChild->firstChild; |
335 | '@phan-var Element $tr'; // @var Element $tr |
336 | DOMCompat::setInnerHTML( $tr, $html ); |
337 | DOMUtils::migrateChildren( $tbl->firstChild->firstChild, $node->parentNode, $node ); |
338 | } else { |
339 | $div = $node->ownerDocument->createElement( 'div' ); |
340 | DOMCompat::setInnerHTML( $div, $html ); |
341 | DOMUtils::migrateChildren( $div, $node->parentNode, $node ); |
342 | } |
343 | }, |
344 | 'removeAttr' => static function ( Node $node, string $name ) { |
345 | '@phan-var Element $node'; // @var Element $node |
346 | $node->removeAttribute( $name ); |
347 | }, |
348 | 'removeClass' => static function ( Node $node, string $c ) { |
349 | '@phan-var Element $node'; // @var Element $node |
350 | DOMCompat::getClassList( $node )->remove( $c ); |
351 | }, |
352 | 'addClass' => static function ( Node $node, string $c ) { |
353 | '@phan-var Element $node'; // @var Element $node |
354 | DOMCompat::getClassList( $node )->add( $c ); |
355 | }, |
356 | 'text' => static function ( Node $node, string $t ) { |
357 | $node->textContent = $t; |
358 | }, |
359 | 'html' => static function ( Node $node, string $h ) { |
360 | '@phan-var Element $node'; // @var Element $node |
361 | DOMCompat::setInnerHTML( $node, $h ); |
362 | }, |
363 | 'remove' => static function ( Node $node, ?string $optSelector = null ) { |
364 | // jquery lets us specify an optional selector to further |
365 | // restrict the removed elements. |
366 | // text nodes don't have the "querySelectorAll" method, so |
367 | // just include them by default (jquery excludes them, which |
368 | // is less useful) |
369 | if ( !$optSelector ) { |
370 | $what = [ $node ]; |
371 | } elseif ( !( $node instanceof Element ) ) { |
372 | $what = [ $node ];/* text node hack! */ |
373 | } else { |
374 | '@phan-var Element $node'; // @var Element $node |
375 | $what = DOMCompat::querySelectorAll( $node, $optSelector ); |
376 | } |
377 | foreach ( $what as $node ) { |
378 | if ( $node->parentNode ) { |
379 | $node->parentNode->removeChild( $node ); |
380 | } |
381 | } |
382 | }, |
383 | 'empty' => static function ( Node $node ) { |
384 | '@phan-var Element $node'; // @var Element $node |
385 | DOMCompat::replaceChildren( $node ); |
386 | }, |
387 | 'wrap' => static function ( Node $node, string $w ) { |
388 | $frag = $node->ownerDocument->createElement( 'div' ); |
389 | DOMCompat::setInnerHTML( $frag, $w ); |
390 | $first = $frag->firstChild; |
391 | $node->parentNode->replaceChild( $first, $node ); |
392 | while ( $first->firstChild ) { |
393 | $first = $first->firstChild; |
394 | } |
395 | $first->appendChild( $node ); |
396 | } |
397 | ]; |
398 | |
399 | $body = DOMCompat::getBody( $doc ); |
400 | |
401 | foreach ( $changes as $change ) { |
402 | if ( $err ) { |
403 | continue; |
404 | } |
405 | if ( count( $change ) < 2 ) { |
406 | $err = new Error( 'bad change: ' . $change ); |
407 | continue; |
408 | } |
409 | // use document.querySelectorAll as a poor man's $(...) |
410 | $els = PHPUtils::iterable_to_array( |
411 | DOMCompat::querySelectorAll( $body, $change[0] ) |
412 | ); |
413 | if ( !count( $els ) ) { |
414 | $err = new Error( $change[0] . |
415 | ' did not match any elements: ' . DOMCompat::getOuterHTML( $body ) ); |
416 | continue; |
417 | } |
418 | if ( $change[1] === 'contents' ) { |
419 | $change = array_slice( $change, 1 ); |
420 | $acc = []; |
421 | foreach ( $els as $el ) { |
422 | PHPUtils::pushArray( $acc, iterator_to_array( $el->childNodes ) ); |
423 | } |
424 | $els = $acc; |
425 | } |
426 | $fn = $jquery[$change[1]] ?? null; |
427 | if ( !$fn ) { |
428 | $err = new Error( 'bad mutator function: ' . $change[1] ); |
429 | continue; |
430 | } |
431 | foreach ( $els as $el ) { |
432 | call_user_func_array( $fn, array_merge( [ $el ], array_slice( $change, 2 ) ) ); |
433 | } |
434 | } |
435 | |
436 | if ( $err ) { |
437 | print TestUtils::colorString( (string)$err, "red" ) . "\n"; |
438 | throw $err; |
439 | } |
440 | } |
441 | |
442 | /** |
443 | * Make changes to a DOM in order to run a selser test on it. |
444 | * |
445 | * @param array $dumpOpts |
446 | * @param Document $doc |
447 | * @param array $changelist |
448 | */ |
449 | public function applyChanges( array $dumpOpts, Document $doc, array $changelist ) { |
450 | $logger = $dumpOpts['logger'] ?? null; |
451 | // Seed the random-number generator based on the item title and changelist |
452 | $alea = new Alea( ( json_encode( $changelist ) ) . ( $this->testName ?? '' ) ); |
453 | |
454 | // Keep the changes in the test object |
455 | // to check for duplicates while building tasks |
456 | $this->changetree = $changelist; |
457 | |
458 | // Helper function for getting a random string |
459 | $randomString = static function () use ( &$alea ): string { |
460 | return base_convert( (string)$alea->uint32(), 10, 36 ); |
461 | }; |
462 | |
463 | $insertNewNode = static function ( Node $n ) use ( $randomString ): void { |
464 | // Insert a text node, if not in a fosterable position. |
465 | // If in foster position, enter a comment. |
466 | // In either case, dom-diff should register a new node |
467 | $str = $randomString(); |
468 | $ownerDoc = $n->ownerDocument; |
469 | $wrapperName = null; |
470 | $newNode = null; |
471 | |
472 | // Don't separate legacy IDs from their H? node. |
473 | if ( WTUtils::isFallbackIdSpan( $n ) ) { |
474 | $n = $n->nextSibling ?? $n->parentNode; |
475 | } |
476 | |
477 | // For these container nodes, it would be buggy |
478 | // to insert text nodes as children |
479 | switch ( DOMCompat::nodeName( $n->parentNode ) ) { |
480 | case 'ol': |
481 | case 'ul': |
482 | $wrapperName = 'li'; |
483 | break; |
484 | case 'dl': |
485 | $wrapperName = 'dd'; |
486 | break; |
487 | case 'tr': |
488 | $prev = DOMCompat::getPreviousElementSibling( $n ); |
489 | if ( $prev ) { |
490 | // TH or TD |
491 | $wrapperName = DOMCompat::nodeName( $prev ); |
492 | } else { |
493 | $next = DOMCompat::getNextElementSibling( $n ); |
494 | if ( $next ) { |
495 | // TH or TD |
496 | $wrapperName = DOMCompat::nodeName( $next ); |
497 | } else { |
498 | $wrapperName = 'td'; |
499 | } |
500 | } |
501 | break; |
502 | case 'body': |
503 | $wrapperName = 'p'; |
504 | break; |
505 | default: |
506 | // We're trying to determine if it is safe to place a |
507 | // bare text node. If $n is a block, wrap the text node |
508 | // we're putting beside it. |
509 | if ( DOMUtils::isWikitextBlockNode( $n ) ) { |
510 | $wrapperName = 'p'; |
511 | } |
512 | break; |
513 | } |
514 | |
515 | if ( DOMUtils::isFosterablePosition( $n ) && DOMCompat::nodeName( $n->parentNode ) !== 'tr' ) { |
516 | $newNode = $ownerDoc->createComment( $str ); |
517 | } elseif ( $wrapperName ) { |
518 | $newNode = $ownerDoc->createElement( $wrapperName ); |
519 | $newNode->appendChild( $ownerDoc->createTextNode( $str ) ); |
520 | } else { |
521 | $newNode = $ownerDoc->createTextNode( $str ); |
522 | } |
523 | |
524 | $n->parentNode->insertBefore( $newNode, $n ); |
525 | }; |
526 | |
527 | $removeNode = static function ( Node $n ): void { |
528 | $n->parentNode->removeChild( $n ); |
529 | }; |
530 | |
531 | $applyChangesInternal = static function ( Node $node, array $changes ) use ( |
532 | &$applyChangesInternal, $removeNode, $insertNewNode, |
533 | $randomString, $logger |
534 | ): void { |
535 | if ( count( $node->childNodes ) < count( $changes ) ) { |
536 | throw new Error( "Error: more changes than nodes to apply them to!" ); |
537 | } |
538 | |
539 | // Clone array since we are mutating the children in the changes loop below |
540 | $nodeArray = []; |
541 | foreach ( $node->childNodes as $n ) { |
542 | $nodeArray[] = $n; |
543 | } |
544 | |
545 | foreach ( $changes as $i => $change ) { |
546 | $child = $nodeArray[$i]; |
547 | |
548 | if ( is_array( $change ) ) { |
549 | $applyChangesInternal( $child, $change ); |
550 | } else { |
551 | switch ( $change ) { |
552 | // No change |
553 | case 0: |
554 | break; |
555 | |
556 | // Change node wrapper |
557 | // (sufficient to insert a random attr) |
558 | case 1: |
559 | if ( $child instanceof Element ) { |
560 | $child->setAttribute( 'data-foobar', $randomString() ); |
561 | } elseif ( $logger ) { |
562 | $logger->log( |
563 | LogLevel::ERROR, |
564 | 'Buggy changetree. changetype 1 (modify attribute)' . |
565 | ' cannot be applied on text/comment nodes.' |
566 | ); |
567 | } |
568 | break; |
569 | |
570 | // Insert new node before child |
571 | case 2: |
572 | $insertNewNode( $child ); |
573 | break; |
574 | |
575 | // Delete tree rooted at child |
576 | case 3: |
577 | $removeNode( $child ); |
578 | break; |
579 | |
580 | // Change tree rooted at child |
581 | case 4: |
582 | $insertNewNode( $child ); |
583 | $removeNode( $child ); |
584 | break; |
585 | } |
586 | |
587 | } |
588 | } |
589 | }; |
590 | |
591 | $body = DOMCompat::getBody( $doc ); |
592 | |
593 | if ( $logger && ( $dumpOpts['dom:post-changes'] ?? false ) ) { |
594 | $logger->log( LogLevel::ERROR, "----- Original DOM -----" ); |
595 | $logger->log( LogLevel::ERROR, ContentUtils::dumpDOM( $body, '', [ 'quiet' => true ] ) ); |
596 | } |
597 | |
598 | if ( $this->changetree === [ 5 ] ) { |
599 | // Hack so that we can work on the parent node rather than just the |
600 | // children: Append a comment with known content. This is later |
601 | // stripped from the output, and the result is compared to the |
602 | // original wikitext rather than the non-selser wt2wt result. |
603 | $body->appendChild( $doc->createComment( self::STATIC_RANDOM_STRING ) ); |
604 | } elseif ( $this->changetree !== [] ) { |
605 | $applyChangesInternal( $body, $this->changetree ); |
606 | } |
607 | |
608 | if ( $logger && ( $dumpOpts['dom:post-changes'] ?? false ) ) { |
609 | $logger->log( LogLevel::ERROR, "----- Change Tree -----" ); |
610 | $logger->log( LogLevel::ERROR, json_encode( $this->changetree ) ); |
611 | $logger->log( LogLevel::ERROR, "----- Edited DOM -----" ); |
612 | $logger->log( LogLevel::ERROR, ContentUtils::dumpDOM( $body, '', [ 'quiet' => true ] ) ); |
613 | } |
614 | } |
615 | |
616 | /** |
617 | * For a selser test, check if a change we could make has already been |
618 | * tested in this round. |
619 | * Used for generating unique tests. |
620 | * |
621 | * @param array $change Candidate change. |
622 | * @return bool |
623 | */ |
624 | public function isDuplicateChangeTree( array $change ): bool { |
625 | $allChanges = $this->selserChangeTrees; |
626 | foreach ( $allChanges as $c ) { |
627 | if ( $c == $change ) { |
628 | return true; |
629 | } |
630 | } |
631 | return false; |
632 | } |
633 | |
634 | /** |
635 | * Generate a change object for a document, so we can apply it during a selser test. |
636 | * |
637 | * @param Document $doc |
638 | * @return array The list of changes. |
639 | */ |
640 | public function generateChanges( Document $doc ): array { |
641 | $alea = new Alea( ( $this->seed ?? '' ) . ( $this->testName ?? '' ) ); |
642 | |
643 | /** |
644 | * If no node in the DOM subtree rooted at 'node' is editable in the VE, |
645 | * this function should return false. |
646 | * |
647 | * Currently true for template and extension content, and for entities. |
648 | */ |
649 | $domSubtreeIsEditable = static function ( Node $node ): bool { |
650 | return !( $node instanceof Element ) || |
651 | ( !WTUtils::isEncapsulationWrapper( $node ) && |
652 | // These wrappers can only be edited in restricted ways. |
653 | // Simpler to just block all editing on them. |
654 | !DOMUtils::matchTypeOf( $node, |
655 | '#^mw:(Entity|Placeholder|DisplaySpace|Annotation|ExtendedAnnRange)(/|$)#' |
656 | ) && |
657 | // Deleting these wrappers is tantamount to removing the |
658 | // references-tag encapsulation wrappers, which results in errors. |
659 | !DOMUtils::hasClass( $node, 'mw-references-wrap' ) |
660 | ); |
661 | }; |
662 | |
663 | /** |
664 | * Even if a DOM subtree might be editable in the VE, |
665 | * certain nodes in the DOM might not be directly editable. |
666 | * |
667 | * Currently, this restriction is only applied to DOMs generated for images. |
668 | * Possibly, there are other candidates. |
669 | */ |
670 | $nodeIsUneditable = static function ( Node $node ) use ( &$nodeIsUneditable ): bool { |
671 | // Text and comment nodes are always editable |
672 | if ( !( $node instanceof Element ) ) { |
673 | return false; |
674 | } |
675 | |
676 | if ( WTUtils::isMarkerAnnotation( $node ) ) { |
677 | return true; |
678 | } |
679 | |
680 | // - File wrapper is an uneditable elt. |
681 | // - Any node nested in a file wrapper that is not a figcaption |
682 | // is an uneditable elt. |
683 | // - Entity spans are uneditable as well |
684 | // - Placeholder is defined to be uneditable in the spec |
685 | // - ExtendedAnnRange is an "unknown" type in the spec, and hence uneditable |
686 | return DOMUtils::matchTypeOf( $node, |
687 | '#^mw:(File|Entity|Placeholder|DisplaySpace|ExtendedAnnRange)(/|$)#' ) || ( |
688 | DOMCompat::nodeName( $node ) !== 'figcaption' && |
689 | $node->parentNode && |
690 | DOMCompat::nodeName( $node->parentNode ) !== 'body' && |
691 | $nodeIsUneditable( $node->parentNode ) |
692 | ); |
693 | }; |
694 | |
695 | $defaultChangeType = 0; |
696 | |
697 | $hasChangeMarkers = static function ( array $list ) use ( |
698 | &$hasChangeMarkers, $defaultChangeType |
699 | ): bool { |
700 | // If all recorded changes are 0, then nothing has been modified |
701 | foreach ( $list as $c ) { |
702 | if ( ( is_array( $c ) && $hasChangeMarkers( $c ) ) || |
703 | ( !is_array( $c ) && $c !== $defaultChangeType ) |
704 | ) { |
705 | return true; |
706 | } |
707 | } |
708 | return false; |
709 | }; |
710 | |
711 | $genChangesInternal = static function ( Node $node ) use ( |
712 | &$genChangesInternal, &$hasChangeMarkers, |
713 | $domSubtreeIsEditable, $nodeIsUneditable, $alea, |
714 | $defaultChangeType |
715 | ): array { |
716 | // Seed the random-number generator based on the item title |
717 | $changelist = []; |
718 | $children = $node->childNodes ? iterator_to_array( $node->childNodes ) : []; |
719 | foreach ( $children as $child ) { |
720 | $changeType = $defaultChangeType; |
721 | if ( $domSubtreeIsEditable( $child ) ) { |
722 | if ( $nodeIsUneditable( $child ) || $alea->random() < 0.5 ) { |
723 | // This call to random is a hack to preserve the current |
724 | // determined state of our knownFailures entries after a |
725 | // refactor. |
726 | $alea->uint32(); |
727 | $changeType = $genChangesInternal( $child ); |
728 | // `$genChangesInternal` returns an array, which can be |
729 | // empty. Revert to the `$defaultChangeType` if that's |
730 | // the case. |
731 | if ( count( $changeType ) === 0 ) { |
732 | $changeType = $defaultChangeType; |
733 | } |
734 | } else { |
735 | if ( !( $child instanceof Element ) ) { |
736 | // Text or comment node -- valid changes: 2, 3, 4 |
737 | // since we cannot set attributes on these |
738 | $changeType = floor( $alea->random() * 3 ) + 2; |
739 | } else { |
740 | $changeType = floor( $alea->random() * 4 ) + 1; |
741 | } |
742 | } |
743 | } |
744 | |
745 | $changelist[] = $changeType; |
746 | |
747 | } |
748 | |
749 | return $hasChangeMarkers( $changelist ) ? $changelist : []; |
750 | }; |
751 | |
752 | $body = DOMCompat::getBody( $doc ); |
753 | |
754 | $changetree = null; |
755 | $numAttempts = 0; |
756 | do { |
757 | $numAttempts++; |
758 | $changetree = $genChangesInternal( $body ); |
759 | } while ( |
760 | $numAttempts < 1000 && |
761 | ( count( $changetree ) === 0 || |
762 | $this->isDuplicateChangeTree( $changetree ) ) |
763 | ); |
764 | |
765 | if ( $numAttempts === 1000 ) { |
766 | // couldn't generate a change ... marking as such |
767 | $this->duplicateChange = true; |
768 | } |
769 | |
770 | return $changetree; |
771 | } |
772 | |
773 | /** |
774 | * FIXME: clean up this mess! |
775 | * - generate all changes at once (generateChanges should return a tree really) |
776 | * rather than going to all these lengths of interleaving change |
777 | * generation with tests |
778 | * - set up the changes in item directly rather than juggling around with |
779 | * indexes etc |
780 | * - indicate whether to compare to wt2wt or the original input |
781 | * - maybe make a full selser test one method that uses others rather than the |
782 | * current chain of methods that sometimes do something for selser |
783 | * |
784 | * @param array $targetModes |
785 | * @param array $runnerOpts |
786 | * @param callable $runTest |
787 | */ |
788 | public function testAllModes( // phpcs:ignore MediaWiki.Commenting.MissingCovers.MissingCovers |
789 | array $targetModes, array $runnerOpts, callable $runTest |
790 | ): void { |
791 | if ( !$this->testName ) { |
792 | throw new Error( 'Missing title from test case.' ); |
793 | } |
794 | $selserNoAuto = ( ( $runnerOpts['selser'] ?? false ) === 'noauto' ); |
795 | |
796 | foreach ( $targetModes as $targetMode ) { |
797 | if ( |
798 | $targetMode === 'selser' && |
799 | !( $selserNoAuto || isset( $runnerOpts['changetree'] ) ) |
800 | ) { |
801 | // Run selser tests in the following order: |
802 | // 1. Manual changes (if provided) |
803 | // 2. changetree 5 (oracle exists for verifying output) |
804 | // 3. All other change trees (no oracle exists for verifying output) |
805 | |
806 | if ( isset( $this->options['parsoid']['changes'] ) ) { |
807 | // Mutating the item here is necessary to output 'manual' in |
808 | // the test's title and to differentiate it for knownFailures. |
809 | $this->changetree = [ 'manual' ]; |
810 | $runTest( $this, 'selser', $runnerOpts ); |
811 | } |
812 | |
813 | // Skip the rest if the test doesn't want changetrees |
814 | if ( ( $this->options['parsoid']['selser'] ?? '' ) === 'noauto' ) { |
815 | continue; |
816 | } |
817 | |
818 | // Changetree 5 (append a comment to the root node) |
819 | $this->changetree = [ 5 ]; |
820 | $runTest( $this, 'selser', $runnerOpts ); |
821 | |
822 | // Automatically generated changed trees |
823 | $this->selserChangeTrees = []; |
824 | for ( $j = 0; $j < $runnerOpts['numchanges']; $j++ ) { |
825 | // Set changetree to null to ensure we don't assume [ 5 ] in $runTest |
826 | $this->changetree = null; |
827 | $this->seed = $j . ''; |
828 | $runTest( $this, 'selser', $runnerOpts ); |
829 | if ( $this->isDuplicateChangeTree( $this->changetree ) ) { |
830 | // Once we get a duplicate change tree, we can no longer |
831 | // generate and run new tests. So, be done now! |
832 | break; |
833 | } else { |
834 | $this->selserChangeTrees[$j] = $this->changetree; |
835 | } |
836 | } |
837 | } elseif ( $targetMode === 'selser' && $selserNoAuto ) { |
838 | // Manual changes were requested on the command line, |
839 | // check that the item does have them. |
840 | if ( isset( $this->options['parsoid']['changes'] ) ) { |
841 | $this->changetree = [ 'manual' ]; |
842 | $runTest( $this, 'selser', $runnerOpts ); |
843 | } |
844 | continue; |
845 | } else { |
846 | if ( $targetMode === 'wt2html' && isset( $this->sections['html/parsoid+langconv'] ) ) { |
847 | // Since we are clobbering options and parsoidHtml, clone the test object |
848 | $testClone = Utils::clone( $this ); |
849 | $testClone->options['langconv'] = true; |
850 | $testClone->parsoidHtml = $this->sections['html/parsoid+langconv']; |
851 | $runTest( $testClone, $targetMode, $runnerOpts ); |
852 | if ( $this->parsoidHtml === null ) { |
853 | // Don't run the same test in non-langconv mode |
854 | // unless we have a non-langconv section |
855 | continue; |
856 | } |
857 | } |
858 | |
859 | Assert::invariant( |
860 | $targetMode !== 'selser' || |
861 | ( isset( $runnerOpts['changetree'] ) && !$selserNoAuto ), |
862 | "Unexpected target mode $targetMode" ); |
863 | |
864 | $runTest( $this, $targetMode, $runnerOpts ); |
865 | } |
866 | } |
867 | } |
868 | |
869 | /** |
870 | * Normalize expected and actual HTML to suppress irrelevant differences. |
871 | * The normalization is determined by the HTML sections present in the test |
872 | * as well as other Parsoid-specific test options. |
873 | * |
874 | * @param Element|string $actual |
875 | * @param ?string $normExpected |
876 | * @param bool $standalone |
877 | * @return array |
878 | */ |
879 | public function normalizeHTML( $actual, ?string $normExpected, bool $standalone = true ): array { |
880 | $opts = $this->options; |
881 | $haveStandaloneHTML = $standalone && isset( $this->sections['html/parsoid+standalone'] ); |
882 | $haveIntegratedHTML = !$standalone && isset( $this->sections['html/parsoid+integrated'] ); |
883 | $parsoidOnly = isset( $this->sections['html/parsoid'] ) || |
884 | $haveStandaloneHTML || |
885 | $haveIntegratedHTML || |
886 | isset( $this->sections['html/parsoid+langconv'] ) || |
887 | ( isset( $opts['parsoid'] ) && !isset( $opts['parsoid']['normalizePhp'] ) ); |
888 | $externalLinkTarget = ( $opts['externallinktarget'] ?? false ) || |
889 | isset( $this->config['wgExternalLinkTarget'] ) || |
890 | isset( $this->config['wgNoFollowLinks'] ) || |
891 | isset( $this->config['wgNoFollowDomainExceptions'] ); |
892 | $normOpts = [ |
893 | 'parsoidOnly' => $parsoidOnly, |
894 | 'preserveIEW' => isset( $opts['parsoid']['preserveIEW'] ), |
895 | 'externallinktarget' => $externalLinkTarget, |
896 | ]; |
897 | |
898 | if ( $normExpected === null ) { |
899 | if ( $haveIntegratedHTML ) { |
900 | $parsoidHTML = $this->sections['html/parsoid+integrated']; |
901 | } elseif ( $haveStandaloneHTML ) { |
902 | $parsoidHTML = $this->sections['html/parsoid+standalone']; |
903 | } else { |
904 | $parsoidHTML = $this->parsoidHtml; |
905 | } |
906 | if ( $parsoidOnly ) { |
907 | $normExpected = TestUtils::normalizeOut( $parsoidHTML, $normOpts ); |
908 | } else { |
909 | $normExpected = TestUtils::normalizeHTML( $parsoidHTML ); |
910 | } |
911 | $this->cachedNormalizedHTML = $normExpected; |
912 | } |
913 | |
914 | return [ TestUtils::normalizeOut( $actual, $normOpts ), $normExpected ]; |
915 | } |
916 | |
917 | /** |
918 | * Normalize "known failure" output. |
919 | * |
920 | * This is an extremely light normalization, since the point of the |
921 | * known failure file is to catch changes in output, even if we don't |
922 | * know what "correct" is. But we do remove 'about' numbering, since |
923 | * that is not guaranteed consistent from run to run. |
924 | */ |
925 | public function normalizeKnownFailure( string $out ): string { |
926 | return TestUtils::normalizeAbout( $out ); |
927 | } |
928 | |
929 | /** |
930 | * Normalize expected and actual wikitext to suppress irrelevant differences. |
931 | * |
932 | * Because of selser as well as manual edit trees, expected wikitext isn't always |
933 | * found in the same section for all tests ending in WT (unlike normalizeHTML). |
934 | * Hence, |
935 | * (a) this code has a different structure than normalizeHTML |
936 | * (b) we cannot cache normalized wikitext |
937 | * |
938 | * @param string $actual |
939 | * @param string $expected |
940 | * @param bool $standalone |
941 | * @return array |
942 | */ |
943 | public function normalizeWT( string $actual, string $expected, bool $standalone = true ): array { |
944 | // No other normalizations at this time |
945 | $normalizedActual = rtrim( $actual, "\n" ); |
946 | $normalizedExpected = rtrim( $expected, "\n" ); |
947 | |
948 | return [ $normalizedActual, $normalizedExpected ]; |
949 | } |
950 | } |