Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.70% |
3 / 431 |
|
6.25% |
1 / 16 |
CRAP | |
0.00% |
0 / 1 |
References | |
0.70% |
3 / 431 |
|
6.25% |
1 / 16 |
15425.99 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
hasRef | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
30 | |||
createReferences | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
56 | |||
extractRefFromNode | |
0.00% |
0 / 186 |
|
0.00% |
0 / 1 |
3080 | |||
setMisnested | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
addErrorsToNode | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
insertReferencesIntoDOM | |
0.00% |
0 / 52 |
|
0.00% |
0 / 1 |
380 | |||
insertMissingReferencesIntoDOM | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
6 | |||
processEmbeddedRefs | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
processRefs | |
0.00% |
0 / 27 |
|
0.00% |
0 / 1 |
90 | |||
addEmbeddedErrors | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
42 | |||
sourceToDom | |
0.00% |
0 / 33 |
|
0.00% |
0 / 1 |
30 | |||
processAttributeEmbeddedHTML | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
domToWikitext | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
30 | |||
lintHandler | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
diffHandler | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | // phpcs:disable MediaWiki.WhiteSpace.SpaceBeforeSingleLineComment.NewLineComment |
4 | // @phan-file-suppress MediaWikiNoEmptyIfDefined Phan thinks the @property docs in the DataParsoid |
5 | // class are a guarantee, but they aren't |
6 | |
7 | namespace Cite\Parsoid; |
8 | |
9 | use Cite\Cite; |
10 | use Cite\MarkSymbolRenderer; |
11 | use Closure; |
12 | use MediaWiki\Config\Config; |
13 | use MediaWiki\MediaWikiServices; |
14 | use stdClass; |
15 | use Wikimedia\Message\MessageValue; |
16 | use Wikimedia\Parsoid\Core\DomSourceRange; |
17 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
18 | use Wikimedia\Parsoid\DOM\Element; |
19 | use Wikimedia\Parsoid\DOM\Node; |
20 | use Wikimedia\Parsoid\Ext\DOMDataUtils; |
21 | use Wikimedia\Parsoid\Ext\DOMUtils; |
22 | use Wikimedia\Parsoid\Ext\ExtensionTagHandler; |
23 | use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; |
24 | use Wikimedia\Parsoid\Ext\PHPUtils; |
25 | use Wikimedia\Parsoid\Ext\WTUtils; |
26 | use Wikimedia\Parsoid\NodeData\DataMw; |
27 | use Wikimedia\Parsoid\NodeData\DataMwError; |
28 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
29 | use Wikimedia\Parsoid\Utils\DOMCompat; |
30 | |
31 | /** |
32 | * @license GPL-2.0-or-later |
33 | */ |
34 | class References extends ExtensionTagHandler { |
35 | private Config $mainConfig; |
36 | private MarkSymbolRenderer $markSymbolRenderer; |
37 | |
38 | public function __construct( Config $mainConfig ) { |
39 | $this->mainConfig = $mainConfig; |
40 | |
41 | $this->markSymbolRenderer = MediaWikiServices::getInstance()->getService( 'Cite.MarkSymbolRenderer' ); |
42 | } |
43 | |
44 | private static function hasRef( Node $node ): bool { |
45 | $c = $node->firstChild; |
46 | while ( $c ) { |
47 | if ( $c instanceof Element ) { |
48 | if ( WTUtils::isSealedFragmentOfType( $c, 'ref' ) ) { |
49 | return true; |
50 | } |
51 | if ( self::hasRef( $c ) ) { |
52 | return true; |
53 | } |
54 | } |
55 | $c = $c->nextSibling; |
56 | } |
57 | return false; |
58 | } |
59 | |
60 | private function createReferences( |
61 | ParsoidExtensionAPI $extApi, DocumentFragment $domFragment, |
62 | array $refsOpts, ?callable $modifyDp, bool $autoGenerated = false |
63 | ): Element { |
64 | $doc = $domFragment->ownerDocument; |
65 | |
66 | $ol = $doc->createElement( 'ol' ); |
67 | DOMCompat::getClassList( $ol )->add( 'mw-references', 'references' ); |
68 | |
69 | DOMUtils::migrateChildren( $domFragment, $ol ); |
70 | |
71 | // Support the `responsive` parameter |
72 | if ( $refsOpts['responsive'] !== null ) { |
73 | $responsiveWrap = $refsOpts['responsive'] !== '0'; |
74 | } else { |
75 | $responsiveWrap = (bool)$this->mainConfig->get( 'CiteResponsiveReferences' ); |
76 | } |
77 | |
78 | if ( $responsiveWrap ) { |
79 | $div = $doc->createElement( 'div' ); |
80 | DOMCompat::getClassList( $div )->add( 'mw-references-wrap' ); |
81 | $div->appendChild( $ol ); |
82 | $frag = $div; |
83 | } else { |
84 | $frag = $ol; |
85 | } |
86 | |
87 | if ( $autoGenerated ) { |
88 | // FIXME: This is very much trying to copy ExtensionHandler::onDocument |
89 | DOMUtils::addAttributes( $frag, [ |
90 | 'typeof' => 'mw:Extension/references', |
91 | 'about' => $extApi->newAboutId() |
92 | ] ); |
93 | $dataMw = new DataMw( [ |
94 | 'name' => 'references', |
95 | 'attrs' => new stdClass |
96 | ] ); |
97 | // Dont emit empty keys |
98 | if ( $refsOpts['group'] ) { |
99 | $dataMw->attrs->group = $refsOpts['group']; |
100 | } |
101 | DOMDataUtils::setDataMw( $frag, $dataMw ); |
102 | } |
103 | |
104 | $dp = DOMDataUtils::getDataParsoid( $frag ); |
105 | if ( $refsOpts['group'] ) { // No group for the empty string either |
106 | $dp->group = $refsOpts['group']; |
107 | $ol->setAttribute( 'data-mw-group', $refsOpts['group'] ); |
108 | } |
109 | if ( $modifyDp ) { |
110 | $modifyDp( $dp ); |
111 | } |
112 | |
113 | // These module namess are copied from Cite extension. |
114 | // They are hardcoded there as well. |
115 | $metadata = $extApi->getMetadata(); |
116 | $metadata->addModules( [ 'ext.cite.ux-enhancements' ] ); |
117 | $metadata->addModuleStyles( [ 'ext.cite.parsoid.styles', 'ext.cite.styles' ] ); |
118 | |
119 | return $frag; |
120 | } |
121 | |
122 | private function extractRefFromNode( |
123 | ParsoidExtensionAPI $extApi, Element $node, ReferencesData $refsData |
124 | ): void { |
125 | $doc = $node->ownerDocument; |
126 | $errs = []; |
127 | |
128 | // This is data-parsoid from the dom fragment node that's gone through |
129 | // dsr computation and template wrapping. |
130 | $nodeDp = DOMDataUtils::getDataParsoid( $node ); |
131 | $isTplWrapper = DOMUtils::hasTypeOf( $node, 'mw:Transclusion' ); |
132 | $contentId = $nodeDp->html; |
133 | $tplDmw = $isTplWrapper ? DOMDataUtils::getDataMw( $node ) : null; |
134 | |
135 | // This is the <sup> that's the meat of the sealed fragment |
136 | $c = $extApi->getContentDOM( $contentId )->firstChild; |
137 | DOMUtils::assertElt( $c ); |
138 | $cDp = DOMDataUtils::getDataParsoid( $c ); |
139 | $refDmw = DOMDataUtils::getDataMw( $c ); |
140 | |
141 | // Use the about attribute on the wrapper with priority, since it's |
142 | // only added when the wrapper is a template sibling. |
143 | $about = DOMCompat::getAttribute( $node, 'about' ) ?? |
144 | DOMCompat::getAttribute( $c, 'about' ); |
145 | '@phan-var string $about'; // assert that $about is non-null |
146 | |
147 | // FIXME(SSS): Need to clarify semantics here. |
148 | // If both the containing <references> elt as well as the nested <ref> |
149 | // elt has a group attribute, what takes precedence? |
150 | $groupName = $refDmw->attrs->group ?? $refsData->referencesGroup; |
151 | $group = $refsData->getRefGroup( $groupName ); |
152 | |
153 | if ( |
154 | $refsData->inReferencesContent() && |
155 | $groupName !== $refsData->referencesGroup |
156 | ) { |
157 | $errs[] = new DataMwError( |
158 | 'cite_error_references_group_mismatch', |
159 | [ $refDmw->attrs->group ] |
160 | ); |
161 | } |
162 | |
163 | static $validAttributes = [ |
164 | 'group' => true, |
165 | 'name' => true, |
166 | Cite::SUBREF_ATTRIBUTE => true, |
167 | 'follow' => true, |
168 | 'dir' => true |
169 | ]; |
170 | |
171 | if ( array_diff_key( (array)$refDmw->attrs, $validAttributes ) !== [] ) { |
172 | $errs[] = new DataMwError( 'cite_error_ref_too_many_keys' ); |
173 | } |
174 | |
175 | // NOTE: This will have been trimmed in Utils::getExtArgInfo()'s call |
176 | // to TokenUtils::kvToHash() and ExtensionHandler::normalizeExtOptions() |
177 | $refName = $refDmw->attrs->name ?? ''; |
178 | $followName = $refDmw->attrs->follow ?? ''; |
179 | $refDir = strtolower( $refDmw->attrs->dir ?? '' ); |
180 | $extendsRef = $refDmw->attrs->extends ?? null; |
181 | |
182 | // Add ref-index linkback |
183 | $linkBack = $doc->createElement( 'sup' ); |
184 | |
185 | $ref = null; |
186 | |
187 | $hasRefName = strlen( $refName ) > 0; |
188 | $hasFollow = strlen( $followName ) > 0; |
189 | |
190 | $validFollow = false; |
191 | |
192 | if ( $hasFollow ) { |
193 | // Always wrap follows content so that there's no ambiguity |
194 | // where to find it when roundtripping |
195 | $span = $doc->createElement( 'span' ); |
196 | DOMUtils::addTypeOf( $span, 'mw:Cite/Follow' ); |
197 | $span->setAttribute( 'about', $about ); |
198 | $span->appendChild( |
199 | $doc->createTextNode( ' ' ) |
200 | ); |
201 | DOMUtils::migrateChildren( $c, $span ); |
202 | $c->appendChild( $span ); |
203 | } |
204 | |
205 | $html = ''; |
206 | $contentDiffers = false; |
207 | |
208 | if ( $hasRefName ) { |
209 | if ( $hasFollow ) { |
210 | // Presumably, "name" has higher precedence |
211 | $errs[] = new DataMwError( 'cite_error_ref_follow_conflicts' ); |
212 | } |
213 | if ( isset( $group->indexByName[$refName] ) ) { |
214 | $ref = $group->indexByName[$refName]; |
215 | // If there are multiple <ref>s with the same name, but different content, |
216 | // the content of the first <ref> shows up in the <references> section. |
217 | // in order to ensure lossless RT-ing for later <refs>, we have to record |
218 | // HTML inline for all of them. |
219 | if ( $ref->contentId ) { |
220 | if ( $ref->cachedHtml === null ) { |
221 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable False positive |
222 | $refContent = $extApi->getContentDOM( $ref->contentId )->firstChild; |
223 | $ref->cachedHtml = $extApi->domToHtml( $refContent, true, false ); |
224 | } |
225 | $html = $extApi->domToHtml( $c, true, false ); |
226 | $contentDiffers = ( $html !== $ref->cachedHtml ); |
227 | } |
228 | } else { |
229 | if ( $refsData->inReferencesContent() ) { |
230 | $errs[] = new DataMwError( |
231 | 'cite_error_references_missing_key', |
232 | [ $refDmw->attrs->name ] |
233 | ); |
234 | } |
235 | } |
236 | } else { |
237 | if ( $hasFollow ) { |
238 | // This is a follows ref, so check that a named ref has already |
239 | // been defined |
240 | if ( isset( $group->indexByName[$followName] ) ) { |
241 | $validFollow = true; |
242 | $ref = $group->indexByName[$followName]; |
243 | } else { |
244 | // FIXME: This key isn't exactly appropriate since this |
245 | // is more general than just being in a <references> |
246 | // section and it's the $followName we care about, but the |
247 | // extension to the legacy parser doesn't have an |
248 | // equivalent key and just outputs something wacky. |
249 | $errs[] = new DataMwError( |
250 | 'cite_error_references_missing_key', |
251 | [ $refDmw->attrs->follow ] |
252 | ); |
253 | } |
254 | } elseif ( $refsData->inReferencesContent() ) { |
255 | $errs[] = new DataMwError( 'cite_error_references_no_key' ); |
256 | } |
257 | } |
258 | |
259 | // Process nested ref-in-ref |
260 | // |
261 | // Do this before possibly adding the a ref below or |
262 | // migrating contents out of $c if we have a valid follow |
263 | if ( empty( $cDp->empty ) && self::hasRef( $c ) ) { |
264 | if ( $contentDiffers ) { |
265 | $refsData->pushEmbeddedContentFlag(); |
266 | } |
267 | $this->processRefs( $extApi, $refsData, $c ); |
268 | if ( $contentDiffers ) { |
269 | $refsData->popEmbeddedContentFlag(); |
270 | // If we have refs and the content differs, we need to |
271 | // reserialize now that we processed the refs. Unfortunately, |
272 | // the cachedHtml we compared against already had its refs |
273 | // processed so that would presumably never match and this will |
274 | // always be considered a redefinition. The implementation for |
275 | // the legacy parser also considers this a redefinition so |
276 | // there is likely little content out there like this :) |
277 | $html = $extApi->domToHtml( $c, true, true ); |
278 | } |
279 | } |
280 | |
281 | if ( $validFollow ) { |
282 | // Migrate content from the follow to the ref |
283 | if ( $ref->contentId ) { |
284 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable False positive |
285 | $refContent = $extApi->getContentDOM( $ref->contentId )->firstChild; |
286 | DOMUtils::migrateChildren( $c, $refContent ); |
287 | } else { |
288 | // Otherwise, we have a follow that comes after a named |
289 | // ref without content so use the follow fragment as |
290 | // the content |
291 | // This will be set below with `$ref->contentId = $contentId;` |
292 | } |
293 | } else { |
294 | // If we have !$ref, one might have been added in the call to |
295 | // processRefs, ie. a self-referential ref. We could try to look |
296 | // it up again, but Parsoid is choosing not to support that. |
297 | // Even worse would be if it tried to redefine itself! |
298 | |
299 | if ( !$ref ) { |
300 | $ref = $refsData->add( $extApi, $groupName, $refName, $extendsRef, $refDir ); |
301 | } |
302 | |
303 | // Handle linkbacks |
304 | if ( $refsData->inEmbeddedContent() ) { |
305 | $ref->embeddedNodes[] = $about; |
306 | } else { |
307 | $ref->nodes[] = $linkBack; |
308 | $ref->linkbacks[] = $ref->key . '-' . count( $ref->linkbacks ); |
309 | } |
310 | } |
311 | |
312 | if ( isset( $refDmw->attrs->dir ) ) { |
313 | if ( $refDir !== 'rtl' && $refDir !== 'ltr' ) { |
314 | $errs[] = new DataMwError( 'cite_error_ref_invalid_dir', [ $refDir ] ); |
315 | } elseif ( $ref->dir !== '' && $ref->dir !== $refDir ) { |
316 | $errs[] = new DataMwError( 'cite_error_ref_conflicting_dir', [ $ref->name ] ); |
317 | } |
318 | } |
319 | |
320 | // FIXME: At some point this error message can be changed to a warning, as Parsoid Cite now |
321 | // supports numerals as a name without it being an actual error, but core Cite does not. |
322 | // Follow refs do not duplicate the error which can be correlated with the original ref. |
323 | if ( ctype_digit( $refName ) ) { |
324 | $errs[] = new DataMwError( 'cite_error_ref_numeric_key' ); |
325 | } |
326 | |
327 | // Check for missing content, added ?? '' to fix T259676 crasher |
328 | // FIXME: See T260082 for a more complete description of cause and deeper fix |
329 | $missingContent = ( !empty( $cDp->empty ) || trim( $refDmw->body->extsrc ?? '' ) === '' ); |
330 | |
331 | if ( $missingContent ) { |
332 | // Check for missing name and content to generate error code |
333 | // |
334 | // In references content, refs should be used for definition so missing content |
335 | // is an error. It's possible that no name is present (!hasRefName), which also |
336 | // gets the error "cite_error_references_no_key" above, so protect against that. |
337 | if ( $refsData->inReferencesContent() ) { |
338 | $errs[] = new DataMwError( |
339 | 'cite_error_empty_references_define', |
340 | [ $refDmw->attrs->name ?? '', $refDmw->attrs->group ?? '' ] |
341 | ); |
342 | } elseif ( !$hasRefName ) { |
343 | if ( !empty( $cDp->selfClose ) ) { |
344 | $errs[] = new DataMwError( 'cite_error_ref_no_key' ); |
345 | } else { |
346 | $errs[] = new DataMwError( 'cite_error_ref_no_input' ); |
347 | } |
348 | } |
349 | |
350 | if ( !empty( $cDp->selfClose ) ) { |
351 | unset( $refDmw->body ); |
352 | } else { |
353 | // Empty the <sup> since we've serialized its children and |
354 | // removing it below asserts everything has been migrated out |
355 | DOMCompat::replaceChildren( $c ); |
356 | $refDmw->body = (object)[ 'html' => $refDmw->body->extsrc ?? '' ]; |
357 | } |
358 | } else { |
359 | if ( $ref->contentId && !$validFollow ) { |
360 | // Empty the <sup> since we've serialized its children and |
361 | // removing it below asserts everything has been migrated out |
362 | DOMCompat::replaceChildren( $c ); |
363 | } |
364 | if ( $contentDiffers ) { |
365 | // TODO: Since this error is being placed on the ref, the |
366 | // key should arguably be "cite_error_ref_duplicate_key" |
367 | $errs[] = new DataMwError( |
368 | 'cite_error_references_duplicate_key', |
369 | [ $refDmw->attrs->name ] |
370 | ); |
371 | $refDmw->body = (object)[ 'html' => $html ]; |
372 | } else { |
373 | $refDmw->body = (object)[ 'id' => 'mw-reference-text-' . $ref->target ]; |
374 | } |
375 | } |
376 | |
377 | $class = 'mw-ref reference'; |
378 | if ( $validFollow ) { |
379 | $class .= ' mw-ref-follow'; |
380 | } |
381 | |
382 | $lastLinkback = $ref->linkbacks[count( $ref->linkbacks ) - 1] ?? null; |
383 | DOMUtils::addAttributes( $linkBack, [ |
384 | 'about' => $about, |
385 | 'class' => $class, |
386 | 'id' => ( $refsData->inEmbeddedContent() || $validFollow ) ? |
387 | null : ( $ref->name ? $lastLinkback : $ref->id ), |
388 | 'rel' => 'dc:references', |
389 | 'typeof' => DOMCompat::getAttribute( $node, 'typeof' ), |
390 | ] |
391 | ); |
392 | DOMUtils::removeTypeOf( $linkBack, 'mw:DOMFragment/sealed/ref' ); |
393 | DOMUtils::addTypeOf( $linkBack, 'mw:Extension/ref' ); |
394 | |
395 | $dataParsoid = new DataParsoid; |
396 | if ( isset( $nodeDp->src ) ) { |
397 | $dataParsoid->src = $nodeDp->src; |
398 | } |
399 | if ( isset( $nodeDp->dsr ) ) { |
400 | $dataParsoid->dsr = $nodeDp->dsr; |
401 | } |
402 | if ( isset( $nodeDp->pi ) ) { |
403 | $dataParsoid->pi = $nodeDp->pi; |
404 | } |
405 | DOMDataUtils::setDataParsoid( $linkBack, $dataParsoid ); |
406 | |
407 | $dmw = $isTplWrapper ? $tplDmw : $refDmw; |
408 | DOMDataUtils::setDataMw( $linkBack, $dmw ); |
409 | |
410 | // FIXME(T214241): Should the errors be added to data-mw if |
411 | // $isTplWrapper? Here and other calls to addErrorsToNode. |
412 | if ( $errs ) { |
413 | self::addErrorsToNode( $linkBack, $errs ); |
414 | } |
415 | |
416 | // refLink is the link to the citation |
417 | $refLink = $doc->createElement( 'a' ); |
418 | DOMUtils::addAttributes( $refLink, [ |
419 | 'href' => $extApi->getPageUri() . '#' . $ref->target, |
420 | 'style' => 'counter-reset: mw-Ref ' . $ref->groupIndex . ';', |
421 | ] ); |
422 | if ( $ref->group ) { |
423 | $refLink->setAttribute( 'data-mw-group', $ref->group ); |
424 | } |
425 | |
426 | // refLink-span which will contain a default rendering of the cite link |
427 | // for browsers that don't support counters |
428 | $refLinkSpan = $doc->createElement( 'span' ); |
429 | $refLinkSpan->setAttribute( 'class', 'mw-reflink-text' ); |
430 | $refLinkSpan->appendChild( $doc->createTextNode( |
431 | '[' . $this->markSymbolRenderer->makeLabel( $ref->group, $ref->groupIndex ) . ']' |
432 | ) ); |
433 | |
434 | $refLink->appendChild( $refLinkSpan ); |
435 | $linkBack->appendChild( $refLink ); |
436 | |
437 | // Checking if the <ref> is nested in a link |
438 | $aParent = DOMUtils::findAncestorOfName( $node, 'a' ); |
439 | if ( $aParent !== null ) { |
440 | // If we find a parent link, we hoist the reference up, just after the link |
441 | // But if there's multiple references in a single link, we want to insert in order - |
442 | // so we look for other misnested references before inserting |
443 | $insertionPoint = $aParent->nextSibling; |
444 | while ( $insertionPoint instanceof Element && |
445 | DOMCompat::nodeName( $insertionPoint ) === 'sup' && |
446 | !empty( DOMDataUtils::getDataParsoid( $insertionPoint )->misnested ) |
447 | ) { |
448 | $insertionPoint = $insertionPoint->nextSibling; |
449 | } |
450 | $aParent->parentNode->insertBefore( $linkBack, $insertionPoint ); |
451 | // set misnested to true and DSR to zero-sized to avoid round-tripping issues |
452 | $dsrOffset = DOMDataUtils::getDataParsoid( $aParent )->dsr->end ?? null; |
453 | // we created that node hierarchy above, so we know that it only contains these nodes, |
454 | // hence there's no need for a visitor |
455 | self::setMisnested( $linkBack, $dsrOffset ); |
456 | self::setMisnested( $refLink, $dsrOffset ); |
457 | self::setMisnested( $refLinkSpan, $dsrOffset ); |
458 | $parentAbout = DOMCompat::getAttribute( $aParent, 'about' ); |
459 | if ( $parentAbout !== null ) { |
460 | $linkBack->setAttribute( 'about', $parentAbout ); |
461 | } |
462 | $node->parentNode->removeChild( $node ); |
463 | } else { |
464 | // if not, we insert it where we planned in the first place |
465 | $node->parentNode->replaceChild( $linkBack, $node ); |
466 | } |
467 | |
468 | // Keep the first content to compare multiple <ref>s with the same name. |
469 | if ( $ref->contentId === null && !$missingContent ) { |
470 | $ref->contentId = $contentId; |
471 | // Use the dir parameter only from the full definition of a named ref tag |
472 | $ref->dir = $refDir; |
473 | } else { |
474 | DOMCompat::remove( $c ); |
475 | $extApi->clearContentDOM( $contentId ); |
476 | } |
477 | } |
478 | |
479 | /** |
480 | * Sets a node as misnested and its DSR as zero-width. |
481 | */ |
482 | private static function setMisnested( Element $node, ?int $offset ) { |
483 | $dataParsoid = DOMDataUtils::getDataParsoid( $node ); |
484 | $dataParsoid->misnested = true; |
485 | $dataParsoid->dsr = new DomSourceRange( $offset, $offset, null, null ); |
486 | } |
487 | |
488 | /** |
489 | * @param Element $node |
490 | * @param list<DataMwError> $errs |
491 | */ |
492 | private static function addErrorsToNode( Element $node, array $errs ): void { |
493 | DOMUtils::addTypeOf( $node, 'mw:Error' ); |
494 | $dmw = DOMDataUtils::getDataMw( $node ); |
495 | $dmw->errors = is_array( $dmw->errors ?? null ) ? |
496 | array_merge( $dmw->errors, $errs ) : $errs; |
497 | } |
498 | |
499 | private function insertReferencesIntoDOM( |
500 | ParsoidExtensionAPI $extApi, Element $refsNode, |
501 | ReferencesData $refsData, bool $autoGenerated = false |
502 | ): void { |
503 | $isTplWrapper = DOMUtils::hasTypeOf( $refsNode, 'mw:Transclusion' ); |
504 | $dp = DOMDataUtils::getDataParsoid( $refsNode ); |
505 | $group = $dp->group ?? ''; |
506 | $refGroup = $refsData->getRefGroup( $group ); |
507 | |
508 | // Iterate through the ref list to back-patch typeof and data-mw error |
509 | // information into ref for errors only known at time of references |
510 | // insertion. Refs in the top level dom will be processed immediately, |
511 | // whereas embedded refs will be gathered for batch processing, since |
512 | // we need to parse embedded content to find them. |
513 | if ( $refGroup ) { |
514 | $autoGeneratedWithGroup = ( $autoGenerated && $group !== '' ); |
515 | foreach ( $refGroup->refs as $ref ) { |
516 | $errs = []; |
517 | // Mark all refs that are part of a group that is autogenerated |
518 | if ( $autoGeneratedWithGroup ) { |
519 | $errs[] = new DataMwError( |
520 | 'cite_error_group_refs_without_references', |
521 | [ $group ] |
522 | ); |
523 | } |
524 | // Mark all refs that are named without content |
525 | if ( ( $ref->name !== '' ) && $ref->contentId === null ) { |
526 | // TODO: Since this error is being placed on the ref, |
527 | // the key should arguably be "cite_error_ref_no_text" |
528 | $errs[] = new DataMwError( |
529 | 'cite_error_references_no_text', |
530 | [ $ref->name ] |
531 | ); |
532 | } |
533 | if ( $errs ) { |
534 | foreach ( $ref->nodes as $node ) { |
535 | self::addErrorsToNode( $node, $errs ); |
536 | } |
537 | foreach ( $ref->embeddedNodes as $about ) { |
538 | $refsData->embeddedErrors[$about] = $errs; |
539 | } |
540 | } |
541 | } |
542 | } |
543 | |
544 | // Note that `$sup`s here are probably all we really need to check for |
545 | // errors caught with `$refsData->inReferencesContent()` but it's |
546 | // probably easier to just know that state while they're being |
547 | // constructed. |
548 | $nestedRefsHTML = array_map( |
549 | static function ( Element $sup ) use ( $extApi ) { |
550 | return $extApi->domToHtml( $sup, false, true ) . "\n"; |
551 | }, |
552 | PHPUtils::iterable_to_array( DOMCompat::querySelectorAll( |
553 | $refsNode, 'sup[typeof~=\'mw:Extension/ref\']' |
554 | ) ) |
555 | ); |
556 | |
557 | if ( !$isTplWrapper ) { |
558 | $dataMw = DOMDataUtils::getDataMw( $refsNode ); |
559 | // Mark this auto-generated so that we can skip this during |
560 | // html -> wt and so that clients can strip it if necessary. |
561 | if ( $autoGenerated ) { |
562 | $dataMw->autoGenerated = true; |
563 | } elseif ( $nestedRefsHTML ) { |
564 | $dataMw->body = (object)[ 'html' => "\n" . implode( $nestedRefsHTML ) ]; |
565 | } elseif ( empty( $dp->selfClose ) ) { |
566 | $dataMw->body = (object)[ 'html' => '' ]; |
567 | } else { |
568 | unset( $dataMw->body ); |
569 | } |
570 | unset( $dp->selfClose ); |
571 | } |
572 | |
573 | // Deal with responsive wrapper |
574 | if ( DOMUtils::hasClass( $refsNode, 'mw-references-wrap' ) ) { |
575 | // NOTE: The default Cite implementation hardcodes this threshold to 10. |
576 | // We use a configurable parameter here primarily for test coverage purposes. |
577 | // See citeParserTests.txt where we set a threshold of 1 or 2. |
578 | $rrThreshold = $this->mainConfig->get( 'CiteResponsiveReferencesThreshold' ) ?? 10; |
579 | if ( $refGroup && count( $refGroup->refs ) > $rrThreshold ) { |
580 | DOMCompat::getClassList( $refsNode )->add( 'mw-references-columns' ); |
581 | } |
582 | $refsNode = $refsNode->firstChild; |
583 | } |
584 | DOMUtils::assertElt( $refsNode ); |
585 | |
586 | // Remove all children from the references node |
587 | // |
588 | // Ex: When {{Reflist}} is reused from the cache, it comes with |
589 | // a bunch of references as well. We have to remove all those cached |
590 | // references before generating fresh references. |
591 | DOMCompat::replaceChildren( $refsNode ); |
592 | |
593 | if ( $refGroup ) { |
594 | foreach ( $refGroup->refs as $ref ) { |
595 | $refGroup->renderLine( $extApi, $refsNode, $ref ); |
596 | } |
597 | } |
598 | |
599 | // Remove the group from refsData |
600 | $refsData->removeRefGroup( $group ); |
601 | } |
602 | |
603 | /** |
604 | * Process `<ref>`s left behind after the DOM is fully processed. |
605 | * We process them as if there was an implicit `<references />` tag at |
606 | * the end of the DOM. |
607 | * |
608 | * @param ParsoidExtensionAPI $extApi |
609 | * @param ReferencesData $refsData |
610 | * @param Node $node |
611 | */ |
612 | public function insertMissingReferencesIntoDOM( |
613 | ParsoidExtensionAPI $extApi, ReferencesData $refsData, Node $node |
614 | ): void { |
615 | $doc = $node->ownerDocument; |
616 | foreach ( $refsData->getRefGroups() as $groupName => $refsGroup ) { |
617 | $domFragment = $doc->createDocumentFragment(); |
618 | $frag = $this->createReferences( |
619 | $extApi, |
620 | $domFragment, |
621 | [ |
622 | // Force string cast here since in the foreach above, $groupName |
623 | // is an array key. In that context, number-like strings are |
624 | // silently converted to a numeric value! |
625 | // Ex: In <ref group="2" />, the "2" becomes 2 in the foreach |
626 | 'group' => (string)$groupName, |
627 | 'responsive' => null, |
628 | ], |
629 | static function ( $dp ) use ( $extApi ) { |
630 | // The new references come out of "nowhere", so to make selser work |
631 | // properly, add a zero-sized DSR pointing to the end of the document. |
632 | $content = $extApi->getPageConfig()->getRevisionContent()->getContent( 'main' ); |
633 | $contentLength = strlen( $content ); |
634 | $dp->dsr = new DomSourceRange( $contentLength, $contentLength, 0, 0 ); |
635 | }, |
636 | true |
637 | ); |
638 | |
639 | // Add a \n before the <ol> so that when serialized to wikitext, |
640 | // each <references /> tag appears on its own line. |
641 | $node->appendChild( $doc->createTextNode( "\n" ) ); |
642 | $node->appendChild( $frag ); |
643 | |
644 | $this->insertReferencesIntoDOM( $extApi, $frag, $refsData, true ); |
645 | } |
646 | } |
647 | |
648 | private function processEmbeddedRefs( |
649 | ParsoidExtensionAPI $extApi, ReferencesData $refsData, string $str |
650 | ): string { |
651 | $domFragment = $extApi->htmlToDom( $str ); |
652 | $this->processRefs( $extApi, $refsData, $domFragment ); |
653 | return $extApi->domToHtml( $domFragment, true, true ); |
654 | } |
655 | |
656 | public function processRefs( |
657 | ParsoidExtensionAPI $extApi, ReferencesData $refsData, Node $node |
658 | ): void { |
659 | $child = $node->firstChild; |
660 | while ( $child !== null ) { |
661 | $nextChild = $child->nextSibling; |
662 | if ( $child instanceof Element ) { |
663 | if ( WTUtils::isSealedFragmentOfType( $child, 'ref' ) ) { |
664 | $this->extractRefFromNode( $extApi, $child, $refsData ); |
665 | } elseif ( DOMUtils::hasTypeOf( $child, 'mw:Extension/references' ) ) { |
666 | if ( !$refsData->inReferencesContent() ) { |
667 | $refsData->referencesGroup = |
668 | DOMDataUtils::getDataParsoid( $child )->group ?? ''; |
669 | } |
670 | $refsData->pushEmbeddedContentFlag( 'references' ); |
671 | if ( $child->hasChildNodes() ) { |
672 | $this->processRefs( $extApi, $refsData, $child ); |
673 | } |
674 | $refsData->popEmbeddedContentFlag(); |
675 | if ( !$refsData->inReferencesContent() ) { |
676 | $refsData->referencesGroup = ''; |
677 | $this->insertReferencesIntoDOM( $extApi, $child, $refsData, false ); |
678 | } |
679 | } else { |
680 | $refsData->pushEmbeddedContentFlag(); |
681 | // Look for <ref>s embedded in data attributes |
682 | $extApi->processAttributeEmbeddedHTML( $child, |
683 | function ( string $html ) use ( $extApi, $refsData ) { |
684 | return $this->processEmbeddedRefs( $extApi, $refsData, $html ); |
685 | } |
686 | ); |
687 | $refsData->popEmbeddedContentFlag(); |
688 | if ( $child->hasChildNodes() ) { |
689 | $this->processRefs( $extApi, $refsData, $child ); |
690 | } |
691 | } |
692 | } |
693 | $child = $nextChild; |
694 | } |
695 | } |
696 | |
697 | /** |
698 | * Traverse into all the embedded content and mark up the refs in there |
699 | * that have errors that weren't known before the content was serialized. |
700 | * |
701 | * Some errors are only known at the time when we're inserting the |
702 | * references lists, at which point, embedded content has already been |
703 | * serialized and stored, so we no longer have live access to it. We |
704 | * therefore map about ids to errors for a ref at that time, and then do |
705 | * one final walk of the dom to peak into all the embedded content and |
706 | * mark up the errors where necessary. |
707 | * |
708 | * @param ParsoidExtensionAPI $extApi |
709 | * @param ReferencesData $refsData |
710 | * @param Node $node |
711 | */ |
712 | public function addEmbeddedErrors( |
713 | ParsoidExtensionAPI $extApi, ReferencesData $refsData, Node $node |
714 | ): void { |
715 | $processEmbeddedErrors = function ( string $html ) use ( $extApi, $refsData ) { |
716 | // Similar to processEmbeddedRefs |
717 | $domFragment = $extApi->htmlToDom( $html ); |
718 | $this->addEmbeddedErrors( $extApi, $refsData, $domFragment ); |
719 | return $extApi->domToHtml( $domFragment, true, true ); |
720 | }; |
721 | $child = $node->firstChild; |
722 | while ( $child !== null ) { |
723 | $nextChild = $child->nextSibling; |
724 | if ( $child instanceof Element ) { |
725 | $extApi->processAttributeEmbeddedHTML( |
726 | $child, $processEmbeddedErrors |
727 | ); |
728 | if ( DOMUtils::hasTypeOf( $child, 'mw:Extension/ref' ) ) { |
729 | $about = DOMCompat::getAttribute( $child, 'about' ); |
730 | '@phan-var string $about'; // assert $about is non-null |
731 | $errs = $refsData->embeddedErrors[$about] ?? null; |
732 | if ( $errs ) { |
733 | self::addErrorsToNode( $child, $errs ); |
734 | } |
735 | } |
736 | if ( $child->hasChildNodes() ) { |
737 | $this->addEmbeddedErrors( $extApi, $refsData, $child ); |
738 | } |
739 | } |
740 | $child = $nextChild; |
741 | } |
742 | } |
743 | |
744 | /** @inheritDoc */ |
745 | public function sourceToDom( |
746 | ParsoidExtensionAPI $extApi, string $txt, array $extArgs |
747 | ): DocumentFragment { |
748 | $domFragment = $extApi->extTagToDOM( |
749 | $extArgs, |
750 | $txt, |
751 | [ |
752 | 'parseOpts' => [ 'extTag' => 'references' ], |
753 | ] |
754 | ); |
755 | |
756 | $refsOpts = $extApi->extArgsToArray( $extArgs ) + [ |
757 | 'group' => null, |
758 | 'responsive' => null, |
759 | ]; |
760 | |
761 | // Detect invalid parameters on the references tag |
762 | $knownAttributes = [ 'group', 'responsive' ]; |
763 | foreach ( $refsOpts as $key => $value ) { |
764 | if ( !in_array( strtolower( (string)$key ), $knownAttributes, true ) ) { |
765 | $extApi->pushError( 'cite_error_references_invalid_parameters' ); |
766 | $error = new MessageValue( 'cite_error_references_invalid_parameters' ); |
767 | break; |
768 | } |
769 | } |
770 | |
771 | $frag = $this->createReferences( |
772 | $extApi, |
773 | $domFragment, |
774 | $refsOpts, |
775 | static function ( $dp ) use ( $extApi ) { |
776 | $dp->src = $extApi->extTag->getSource(); |
777 | // Setting redundant info on fragment. |
778 | // $docBody->firstChild info feels cumbersome to use downstream. |
779 | if ( $extApi->extTag->isSelfClosed() ) { |
780 | $dp->selfClose = true; |
781 | } |
782 | } |
783 | ); |
784 | $domFragment->appendChild( $frag ); |
785 | |
786 | if ( isset( $error ) ) { |
787 | $errorFragment = ErrorUtils::renderParsoidErrorSpan( $extApi, $error ); |
788 | // we're pushing it after the reference block while it tends to be before in legacy (error + rerender) |
789 | $extApi->addTrackingCategory( 'cite-tracking-category-cite-diffing-error' ); |
790 | $frag->appendChild( $errorFragment ); |
791 | } |
792 | |
793 | return $domFragment; |
794 | } |
795 | |
796 | /** @inheritDoc */ |
797 | public function processAttributeEmbeddedHTML( |
798 | ParsoidExtensionAPI $extApi, Element $elt, Closure $proc |
799 | ): void { |
800 | $dataMw = DOMDataUtils::getDataMw( $elt ); |
801 | if ( isset( $dataMw->body->html ) ) { |
802 | $dataMw->body->html = $proc( $dataMw->body->html ); |
803 | } |
804 | } |
805 | |
806 | /** @inheritDoc */ |
807 | public function domToWikitext( |
808 | ParsoidExtensionAPI $extApi, Element $node, bool $wrapperUnmodified |
809 | ) { |
810 | $dataMw = DOMDataUtils::getDataMw( $node ); |
811 | // Autogenerated references aren't considered erroneous (the extension to the legacy |
812 | // parser also generates them) and are not suppressed when serializing because apparently |
813 | // that's the behaviour Parsoid clients want. However, autogenerated references *with |
814 | // group attributes* are errors (the legacy extension doesn't generate them at all) and |
815 | // are suppressed when serialized since we considered them an error while parsing and |
816 | // don't want them to persist in the content. |
817 | if ( !empty( $dataMw->autoGenerated ) && ( $dataMw->attrs->group ?? '' ) !== '' ) { |
818 | return ''; |
819 | } else { |
820 | $startTagSrc = $extApi->extStartTagToWikitext( $node ); |
821 | if ( empty( $dataMw->body ) ) { |
822 | return $startTagSrc; // We self-closed this already. |
823 | } else { |
824 | if ( isset( $dataMw->body->html ) ) { |
825 | $src = $extApi->htmlToWikitext( |
826 | [ 'extName' => $dataMw->name ], |
827 | $dataMw->body->html |
828 | ); |
829 | return $startTagSrc . $src . '</' . $dataMw->name . '>'; |
830 | } else { |
831 | $extApi->log( 'error', |
832 | 'References body unavailable for: ' . DOMCompat::getOuterHTML( $node ) |
833 | ); |
834 | return ''; // Drop it! |
835 | } |
836 | } |
837 | } |
838 | } |
839 | |
840 | /** @inheritDoc */ |
841 | public function lintHandler( |
842 | ParsoidExtensionAPI $extApi, Element $refs, callable $defaultHandler |
843 | ): bool { |
844 | $dataMw = DOMDataUtils::getDataMw( $refs ); |
845 | if ( isset( $dataMw->body->html ) ) { |
846 | $fragment = $extApi->htmlToDom( $dataMw->body->html ); |
847 | $defaultHandler( $fragment ); |
848 | } |
849 | return true; |
850 | } |
851 | |
852 | /** @inheritDoc */ |
853 | public function diffHandler( |
854 | ParsoidExtensionAPI $extApi, callable $domDiff, Element $origNode, |
855 | Element $editedNode |
856 | ): bool { |
857 | $origDataMw = DOMDataUtils::getDataMw( $origNode ); |
858 | $editedDataMw = DOMDataUtils::getDataMw( $editedNode ); |
859 | |
860 | if ( isset( $origDataMw->body->html ) && isset( $editedDataMw->body->html ) ) { |
861 | $origFragment = $extApi->htmlToDom( |
862 | $origDataMw->body->html, $origNode->ownerDocument, |
863 | [ 'markNew' => true ] |
864 | ); |
865 | $editedFragment = $extApi->htmlToDom( |
866 | $editedDataMw->body->html, $editedNode->ownerDocument, |
867 | [ 'markNew' => true ] |
868 | ); |
869 | return call_user_func( $domDiff, $origFragment, $editedFragment ); |
870 | } |
871 | |
872 | // FIXME: Similar to DOMDiff::subtreeDiffers, maybe $editNode should |
873 | // be marked as inserted to avoid losing any edits, at the cost of |
874 | // more normalization |
875 | |
876 | return false; |
877 | } |
878 | } |