Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.73% |
3 / 413 |
|
6.67% |
1 / 15 |
CRAP | |
0.00% |
0 / 1 |
References | |
0.73% |
3 / 413 |
|
6.67% |
1 / 15 |
15411.97 | |
0.00% |
0 / 1 |
hasRef | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
30 | |||
createReferences | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
56 | |||
extractRefFromNode | |
0.00% |
0 / 170 |
|
0.00% |
0 / 1 |
3080 | |||
setMisnested | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
addErrorsToNode | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
insertReferencesIntoDOM | |
0.00% |
0 / 47 |
|
0.00% |
0 / 1 |
380 | |||
insertMissingReferencesIntoDOM | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
6 | |||
processEmbeddedRefs | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
processRefs | |
0.00% |
0 / 27 |
|
0.00% |
0 / 1 |
90 | |||
addEmbeddedErrors | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
72 | |||
sourceToDom | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
20 | |||
processAttributeEmbeddedHTML | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
domToWikitext | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
30 | |||
lintHandler | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
diffHandler | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | // phpcs:disable MediaWiki.WhiteSpace.SpaceBeforeSingleLineComment.NewLineComment |
4 | // @phan-file-suppress MediaWikiNoEmptyIfDefined Phan thinks the @property docs in the DataParsoid |
5 | // class are a guarantee, but they aren't |
6 | |
7 | namespace Cite\Parsoid; |
8 | |
9 | use Closure; |
10 | use stdClass; |
11 | use Wikimedia\Parsoid\Core\DomSourceRange; |
12 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
13 | use Wikimedia\Parsoid\DOM\Element; |
14 | use Wikimedia\Parsoid\DOM\Node; |
15 | use Wikimedia\Parsoid\Ext\DOMDataUtils; |
16 | use Wikimedia\Parsoid\Ext\DOMUtils; |
17 | use Wikimedia\Parsoid\Ext\ExtensionTagHandler; |
18 | use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; |
19 | use Wikimedia\Parsoid\Ext\PHPUtils; |
20 | use Wikimedia\Parsoid\Ext\WTUtils; |
21 | use Wikimedia\Parsoid\NodeData\DataMw; |
22 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
23 | use Wikimedia\Parsoid\Utils\DOMCompat; |
24 | |
25 | /** |
26 | * @license GPL-2.0-or-later |
27 | */ |
28 | class References extends ExtensionTagHandler { |
29 | |
30 | private static function hasRef( Node $node ): bool { |
31 | $c = $node->firstChild; |
32 | while ( $c ) { |
33 | if ( $c instanceof Element ) { |
34 | if ( WTUtils::isSealedFragmentOfType( $c, 'ref' ) ) { |
35 | return true; |
36 | } |
37 | if ( self::hasRef( $c ) ) { |
38 | return true; |
39 | } |
40 | } |
41 | $c = $c->nextSibling; |
42 | } |
43 | return false; |
44 | } |
45 | |
46 | private static function createReferences( |
47 | ParsoidExtensionAPI $extApi, DocumentFragment $domFragment, |
48 | array $refsOpts, ?callable $modifyDp, bool $autoGenerated = false |
49 | ): Element { |
50 | $doc = $domFragment->ownerDocument; |
51 | |
52 | $ol = $doc->createElement( 'ol' ); |
53 | DOMCompat::getClassList( $ol )->add( 'mw-references', 'references' ); |
54 | |
55 | DOMUtils::migrateChildren( $domFragment, $ol ); |
56 | |
57 | // Support the `responsive` parameter |
58 | if ( $refsOpts['responsive'] !== null ) { |
59 | $responsiveWrap = $refsOpts['responsive'] !== '0'; |
60 | } else { |
61 | $responsiveWrap = (bool)$extApi->getSiteConfig()->getMWConfigValue( 'CiteResponsiveReferences' ); |
62 | } |
63 | |
64 | if ( $responsiveWrap ) { |
65 | $div = $doc->createElement( 'div' ); |
66 | DOMCompat::getClassList( $div )->add( 'mw-references-wrap' ); |
67 | $div->appendChild( $ol ); |
68 | $frag = $div; |
69 | } else { |
70 | $frag = $ol; |
71 | } |
72 | |
73 | if ( $autoGenerated ) { |
74 | // FIXME: This is very much trying to copy ExtensionHandler::onDocument |
75 | DOMUtils::addAttributes( $frag, [ |
76 | 'typeof' => 'mw:Extension/references', |
77 | 'about' => $extApi->newAboutId() |
78 | ] ); |
79 | $dataMw = new DataMw( [ |
80 | 'name' => 'references', |
81 | 'attrs' => new stdClass |
82 | ] ); |
83 | // Dont emit empty keys |
84 | if ( $refsOpts['group'] ) { |
85 | $dataMw->attrs->group = $refsOpts['group']; |
86 | } |
87 | DOMDataUtils::setDataMw( $frag, $dataMw ); |
88 | } |
89 | |
90 | $dp = DOMDataUtils::getDataParsoid( $frag ); |
91 | if ( $refsOpts['group'] ) { // No group for the empty string either |
92 | $dp->group = $refsOpts['group']; |
93 | $ol->setAttribute( 'data-mw-group', $refsOpts['group'] ); |
94 | } |
95 | if ( $modifyDp ) { |
96 | $modifyDp( $dp ); |
97 | } |
98 | |
99 | // These module namess are copied from Cite extension. |
100 | // They are hardcoded there as well. |
101 | $metadata = $extApi->getMetadata(); |
102 | $metadata->addModules( [ 'ext.cite.ux-enhancements' ] ); |
103 | $metadata->addModuleStyles( [ 'ext.cite.parsoid.styles', 'ext.cite.styles' ] ); |
104 | |
105 | return $frag; |
106 | } |
107 | |
108 | private static function extractRefFromNode( |
109 | ParsoidExtensionAPI $extApi, Element $node, ReferencesData $refsData |
110 | ): void { |
111 | $doc = $node->ownerDocument; |
112 | $errs = []; |
113 | |
114 | // This is data-parsoid from the dom fragment node that's gone through |
115 | // dsr computation and template wrapping. |
116 | $nodeDp = DOMDataUtils::getDataParsoid( $node ); |
117 | $isTplWrapper = DOMUtils::hasTypeOf( $node, 'mw:Transclusion' ); |
118 | $contentId = $nodeDp->html; |
119 | $tplDmw = $isTplWrapper ? DOMDataUtils::getDataMw( $node ) : null; |
120 | |
121 | // This is the <sup> that's the meat of the sealed fragment |
122 | $c = $extApi->getContentDOM( $contentId )->firstChild; |
123 | DOMUtils::assertElt( $c ); |
124 | $cDp = DOMDataUtils::getDataParsoid( $c ); |
125 | $refDmw = DOMDataUtils::getDataMw( $c ); |
126 | |
127 | // Use the about attribute on the wrapper with priority, since it's |
128 | // only added when the wrapper is a template sibling. |
129 | $about = DOMCompat::getAttribute( $node, 'about' ) ?? |
130 | DOMCompat::getAttribute( $c, 'about' ); |
131 | '@phan-var string $about'; // assert that $about is non-null |
132 | |
133 | // FIXME(SSS): Need to clarify semantics here. |
134 | // If both the containing <references> elt as well as the nested <ref> |
135 | // elt has a group attribute, what takes precedence? |
136 | $groupName = $refDmw->attrs->group ?? $refsData->referencesGroup; |
137 | $group = $refsData->getRefGroup( $groupName ); |
138 | |
139 | if ( |
140 | $refsData->inReferencesContent() && |
141 | $groupName !== $refsData->referencesGroup |
142 | ) { |
143 | $errs[] = [ 'key' => 'cite_error_references_group_mismatch', |
144 | 'params' => [ $refDmw->attrs->group ] ]; |
145 | } |
146 | |
147 | // NOTE: This will have been trimmed in Utils::getExtArgInfo()'s call |
148 | // to TokenUtils::kvToHash() and ExtensionHandler::normalizeExtOptions() |
149 | $refName = $refDmw->attrs->name ?? ''; |
150 | $followName = $refDmw->attrs->follow ?? ''; |
151 | $refDir = strtolower( $refDmw->attrs->dir ?? '' ); |
152 | |
153 | // Add ref-index linkback |
154 | $linkBack = $doc->createElement( 'sup' ); |
155 | |
156 | $ref = null; |
157 | |
158 | $hasRefName = strlen( $refName ) > 0; |
159 | $hasFollow = strlen( $followName ) > 0; |
160 | |
161 | $validFollow = false; |
162 | |
163 | if ( $hasFollow ) { |
164 | // Always wrap follows content so that there's no ambiguity |
165 | // where to find it when roundtripping |
166 | $span = $doc->createElement( 'span' ); |
167 | DOMUtils::addTypeOf( $span, 'mw:Cite/Follow' ); |
168 | $span->setAttribute( 'about', $about ); |
169 | $span->appendChild( |
170 | $doc->createTextNode( ' ' ) |
171 | ); |
172 | DOMUtils::migrateChildren( $c, $span ); |
173 | $c->appendChild( $span ); |
174 | } |
175 | |
176 | $html = ''; |
177 | $contentDiffers = false; |
178 | |
179 | if ( $hasRefName ) { |
180 | if ( $hasFollow ) { |
181 | // Presumably, "name" has higher precedence |
182 | $errs[] = [ 'key' => 'cite_error_ref_follow_conflicts' ]; |
183 | } |
184 | if ( isset( $group->indexByName[$refName] ) ) { |
185 | $ref = $group->indexByName[$refName]; |
186 | // If there are multiple <ref>s with the same name, but different content, |
187 | // the content of the first <ref> shows up in the <references> section. |
188 | // in order to ensure lossless RT-ing for later <refs>, we have to record |
189 | // HTML inline for all of them. |
190 | if ( $ref->contentId ) { |
191 | if ( $ref->cachedHtml === null ) { |
192 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable False positive |
193 | $refContent = $extApi->getContentDOM( $ref->contentId )->firstChild; |
194 | $ref->cachedHtml = $extApi->domToHtml( $refContent, true, false ); |
195 | } |
196 | // See the test, "Forward-referenced ref with magical follow edge case" |
197 | // Ideally, we should strip the mw:Cite/Follow wrappers before comparing |
198 | // But, we are going to ignore this edge case as not worth the complexity. |
199 | $html = $extApi->domToHtml( $c, true, false ); |
200 | $contentDiffers = ( $html !== $ref->cachedHtml ); |
201 | } |
202 | } else { |
203 | if ( $refsData->inReferencesContent() ) { |
204 | $errs[] = [ |
205 | 'key' => 'cite_error_references_missing_key', |
206 | 'params' => [ $refDmw->attrs->name ] |
207 | ]; |
208 | } |
209 | } |
210 | } else { |
211 | if ( $hasFollow ) { |
212 | // This is a follows ref, so check that a named ref has already |
213 | // been defined |
214 | if ( isset( $group->indexByName[$followName] ) ) { |
215 | $validFollow = true; |
216 | $ref = $group->indexByName[$followName]; |
217 | } else { |
218 | // FIXME: This key isn't exactly appropriate since this |
219 | // is more general than just being in a <references> |
220 | // section and it's the $followName we care about, but the |
221 | // extension to the legacy parser doesn't have an |
222 | // equivalent key and just outputs something wacky. |
223 | $errs[] = [ 'key' => 'cite_error_references_missing_key', |
224 | 'params' => [ $refDmw->attrs->follow ] ]; |
225 | } |
226 | } elseif ( $refsData->inReferencesContent() ) { |
227 | $errs[] = [ 'key' => 'cite_error_references_no_key' ]; |
228 | } |
229 | } |
230 | |
231 | // Process nested ref-in-ref |
232 | // |
233 | // Do this before possibly adding the a ref below or |
234 | // migrating contents out of $c if we have a valid follow |
235 | if ( empty( $cDp->empty ) && self::hasRef( $c ) ) { |
236 | if ( $contentDiffers ) { |
237 | $refsData->pushEmbeddedContentFlag(); |
238 | } |
239 | self::processRefs( $extApi, $refsData, $c ); |
240 | if ( $contentDiffers ) { |
241 | $refsData->popEmbeddedContentFlag(); |
242 | // If we have refs and the content differs, we need to |
243 | // reserialize now that we processed the refs. Unfortunately, |
244 | // the cachedHtml we compared against already had its refs |
245 | // processed so that would presumably never match and this will |
246 | // always be considered a redefinition. The implementation for |
247 | // the legacy parser also considers this a redefinition so |
248 | // there is likely little content out there like this :) |
249 | $html = $extApi->domToHtml( $c, true, true ); |
250 | } |
251 | } |
252 | |
253 | if ( $validFollow ) { |
254 | // Migrate content from the follow to the ref |
255 | if ( $ref->contentId ) { |
256 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable False positive |
257 | $refContent = $extApi->getContentDOM( $ref->contentId )->firstChild; |
258 | DOMUtils::migrateChildren( $c, $refContent ); |
259 | } else { |
260 | // Otherwise, we have a follow that comes after a named |
261 | // ref without content so use the follow fragment as |
262 | // the content |
263 | // This will be set below with `$ref->contentId = $contentId;` |
264 | } |
265 | } else { |
266 | // If we have !$ref, one might have been added in the call to |
267 | // processRefs, ie. a self-referential ref. We could try to look |
268 | // it up again, but Parsoid is choosing not to support that. |
269 | // Even worse would be if it tried to redefine itself! |
270 | |
271 | if ( !$ref ) { |
272 | $ref = $refsData->add( $extApi, $groupName, $refName, $refDir ); |
273 | } |
274 | |
275 | // Handle linkbacks |
276 | if ( $refsData->inEmbeddedContent() ) { |
277 | $ref->embeddedNodes[] = $about; |
278 | } else { |
279 | $ref->nodes[] = $linkBack; |
280 | $ref->linkbacks[] = $ref->key . '-' . count( $ref->linkbacks ); |
281 | } |
282 | } |
283 | |
284 | if ( isset( $refDmw->attrs->dir ) ) { |
285 | if ( $refDir !== 'rtl' && $refDir !== 'ltr' ) { |
286 | $errs[] = [ 'key' => 'cite_error_ref_invalid_dir', 'params' => [ $refDir ] ]; |
287 | } elseif ( $ref->dir !== '' && $ref->dir !== $refDir ) { |
288 | $errs[] = [ 'key' => 'cite_error_ref_conflicting_dir', 'params' => [ $ref->name ] ]; |
289 | } |
290 | } |
291 | |
292 | // FIXME: At some point this error message can be changed to a warning, as Parsoid Cite now |
293 | // supports numerals as a name without it being an actual error, but core Cite does not. |
294 | // Follow refs do not duplicate the error which can be correlated with the original ref. |
295 | if ( ctype_digit( $refName ) ) { |
296 | $errs[] = [ 'key' => 'cite_error_ref_numeric_key' ]; |
297 | } |
298 | |
299 | // Check for missing content, added ?? '' to fix T259676 crasher |
300 | // FIXME: See T260082 for a more complete description of cause and deeper fix |
301 | $missingContent = ( !empty( $cDp->empty ) || trim( $refDmw->body->extsrc ?? '' ) === '' ); |
302 | |
303 | if ( $missingContent ) { |
304 | // Check for missing name and content to generate error code |
305 | // |
306 | // In references content, refs should be used for definition so missing content |
307 | // is an error. It's possible that no name is present (!hasRefName), which also |
308 | // gets the error "cite_error_references_no_key" above, so protect against that. |
309 | if ( $refsData->inReferencesContent() ) { |
310 | $errs[] = [ 'key' => 'cite_error_empty_references_define', |
311 | 'params' => [ $refDmw->attrs->name ?? '' ] ]; |
312 | } elseif ( !$hasRefName ) { |
313 | if ( !empty( $cDp->selfClose ) ) { |
314 | $errs[] = [ 'key' => 'cite_error_ref_no_key' ]; |
315 | } else { |
316 | $errs[] = [ 'key' => 'cite_error_ref_no_input' ]; |
317 | } |
318 | } |
319 | |
320 | if ( !empty( $cDp->selfClose ) ) { |
321 | unset( $refDmw->body ); |
322 | } else { |
323 | // Empty the <sup> since we've serialized its children and |
324 | // removing it below asserts everything has been migrated out |
325 | DOMCompat::replaceChildren( $c ); |
326 | $refDmw->body = (object)[ 'html' => $refDmw->body->extsrc ?? '' ]; |
327 | } |
328 | } else { |
329 | if ( $ref->contentId && !$validFollow ) { |
330 | // Empty the <sup> since we've serialized its children and |
331 | // removing it below asserts everything has been migrated out |
332 | DOMCompat::replaceChildren( $c ); |
333 | } |
334 | if ( $contentDiffers ) { |
335 | // TODO: Since this error is being placed on the ref, the |
336 | // key should arguably be "cite_error_ref_duplicate_key" |
337 | $errs[] = [ |
338 | 'key' => 'cite_error_references_duplicate_key', |
339 | 'params' => [ $refDmw->attrs->name ] |
340 | ]; |
341 | $refDmw->body = (object)[ 'html' => $html ]; |
342 | } else { |
343 | $refDmw->body = (object)[ 'id' => 'mw-reference-text-' . $ref->target ]; |
344 | } |
345 | } |
346 | |
347 | $class = 'mw-ref reference'; |
348 | if ( $validFollow ) { |
349 | $class .= ' mw-ref-follow'; |
350 | } |
351 | |
352 | $lastLinkback = $ref->linkbacks[count( $ref->linkbacks ) - 1] ?? null; |
353 | DOMUtils::addAttributes( $linkBack, [ |
354 | 'about' => $about, |
355 | 'class' => $class, |
356 | 'id' => ( $refsData->inEmbeddedContent() || $validFollow ) ? |
357 | null : ( $ref->name ? $lastLinkback : $ref->id ), |
358 | 'rel' => 'dc:references', |
359 | 'typeof' => DOMCompat::getAttribute( $node, 'typeof' ), |
360 | ] |
361 | ); |
362 | DOMUtils::removeTypeOf( $linkBack, 'mw:DOMFragment/sealed/ref' ); |
363 | DOMUtils::addTypeOf( $linkBack, 'mw:Extension/ref' ); |
364 | |
365 | $dataParsoid = new DataParsoid; |
366 | if ( isset( $nodeDp->src ) ) { |
367 | $dataParsoid->src = $nodeDp->src; |
368 | } |
369 | if ( isset( $nodeDp->dsr ) ) { |
370 | $dataParsoid->dsr = $nodeDp->dsr; |
371 | } |
372 | if ( isset( $nodeDp->pi ) ) { |
373 | $dataParsoid->pi = $nodeDp->pi; |
374 | } |
375 | DOMDataUtils::setDataParsoid( $linkBack, $dataParsoid ); |
376 | |
377 | $dmw = $isTplWrapper ? $tplDmw : $refDmw; |
378 | DOMDataUtils::setDataMw( $linkBack, $dmw ); |
379 | |
380 | // FIXME(T214241): Should the errors be added to data-mw if |
381 | // $isTplWrapper? Here and other calls to addErrorsToNode. |
382 | if ( $errs ) { |
383 | self::addErrorsToNode( $linkBack, $errs ); |
384 | } |
385 | |
386 | // refLink is the link to the citation |
387 | $refLink = $doc->createElement( 'a' ); |
388 | DOMUtils::addAttributes( $refLink, [ |
389 | 'href' => $extApi->getPageUri() . '#' . $ref->target, |
390 | 'style' => 'counter-reset: mw-Ref ' . $ref->groupIndex . ';', |
391 | ] ); |
392 | if ( $ref->group ) { |
393 | $refLink->setAttribute( 'data-mw-group', $ref->group ); |
394 | } |
395 | |
396 | // refLink-span which will contain a default rendering of the cite link |
397 | // for browsers that don't support counters |
398 | $refLinkSpan = $doc->createElement( 'span' ); |
399 | $refLinkSpan->setAttribute( 'class', 'mw-reflink-text' ); |
400 | $refLinkSpan->appendChild( $doc->createTextNode( |
401 | '[' . ( $ref->group ? $ref->group . ' ' : '' ) . $ref->groupIndex . ']' |
402 | ) ); |
403 | |
404 | $refLink->appendChild( $refLinkSpan ); |
405 | $linkBack->appendChild( $refLink ); |
406 | |
407 | // Checking if the <ref> is nested in a link |
408 | $aParent = DOMUtils::findAncestorOfName( $node, 'a' ); |
409 | if ( $aParent !== null ) { |
410 | // If we find a parent link, we hoist the reference up, just after the link |
411 | // But if there's multiple references in a single link, we want to insert in order - |
412 | // so we look for other misnested references before inserting |
413 | $insertionPoint = $aParent->nextSibling; |
414 | while ( $insertionPoint instanceof Element && |
415 | DOMCompat::nodeName( $insertionPoint ) === 'sup' && |
416 | !empty( DOMDataUtils::getDataParsoid( $insertionPoint )->misnested ) |
417 | ) { |
418 | $insertionPoint = $insertionPoint->nextSibling; |
419 | } |
420 | $aParent->parentNode->insertBefore( $linkBack, $insertionPoint ); |
421 | // set misnested to true and DSR to zero-sized to avoid round-tripping issues |
422 | $dsrOffset = DOMDataUtils::getDataParsoid( $aParent )->dsr->end ?? null; |
423 | // we created that node hierarchy above, so we know that it only contains these nodes, |
424 | // hence there's no need for a visitor |
425 | self::setMisnested( $linkBack, $dsrOffset ); |
426 | self::setMisnested( $refLink, $dsrOffset ); |
427 | self::setMisnested( $refLinkSpan, $dsrOffset ); |
428 | $parentAbout = DOMCompat::getAttribute( $aParent, 'about' ); |
429 | if ( $parentAbout !== null ) { |
430 | $linkBack->setAttribute( 'about', $parentAbout ); |
431 | } |
432 | $node->parentNode->removeChild( $node ); |
433 | } else { |
434 | // if not, we insert it where we planned in the first place |
435 | $node->parentNode->replaceChild( $linkBack, $node ); |
436 | } |
437 | |
438 | // Keep the first content to compare multiple <ref>s with the same name. |
439 | if ( $ref->contentId === null && !$missingContent ) { |
440 | $ref->contentId = $contentId; |
441 | // Use the dir parameter only from the full definition of a named ref tag |
442 | $ref->dir = $refDir; |
443 | } else { |
444 | DOMCompat::remove( $c ); |
445 | $extApi->clearContentDOM( $contentId ); |
446 | } |
447 | } |
448 | |
449 | /** |
450 | * Sets a node as misnested and its DSR as zero-width. |
451 | */ |
452 | private static function setMisnested( Element $node, ?int $offset ) { |
453 | $dataParsoid = DOMDataUtils::getDataParsoid( $node ); |
454 | $dataParsoid->misnested = true; |
455 | $dataParsoid->dsr = new DomSourceRange( $offset, $offset, null, null ); |
456 | } |
457 | |
458 | private static function addErrorsToNode( Element $node, array $errs ): void { |
459 | DOMUtils::addTypeOf( $node, 'mw:Error' ); |
460 | $dmw = DOMDataUtils::getDataMw( $node ); |
461 | $dmw->errors = is_array( $dmw->errors ?? null ) ? |
462 | array_merge( $dmw->errors, $errs ) : $errs; |
463 | } |
464 | |
465 | private static function insertReferencesIntoDOM( |
466 | ParsoidExtensionAPI $extApi, Element $refsNode, |
467 | ReferencesData $refsData, bool $autoGenerated = false |
468 | ): void { |
469 | $isTplWrapper = DOMUtils::hasTypeOf( $refsNode, 'mw:Transclusion' ); |
470 | $dp = DOMDataUtils::getDataParsoid( $refsNode ); |
471 | $group = $dp->group ?? ''; |
472 | $refGroup = $refsData->getRefGroup( $group ); |
473 | |
474 | // Iterate through the ref list to back-patch typeof and data-mw error |
475 | // information into ref for errors only known at time of references |
476 | // insertion. Refs in the top level dom will be processed immediately, |
477 | // whereas embedded refs will be gathered for batch processing, since |
478 | // we need to parse embedded content to find them. |
479 | if ( $refGroup ) { |
480 | $autoGeneratedWithGroup = ( $autoGenerated && $group !== '' ); |
481 | foreach ( $refGroup->refs as $ref ) { |
482 | $errs = []; |
483 | // Mark all refs that are part of a group that is autogenerated |
484 | if ( $autoGeneratedWithGroup ) { |
485 | $errs[] = [ 'key' => 'cite_error_group_refs_without_references', |
486 | 'params' => [ $group ] ]; |
487 | } |
488 | // Mark all refs that are named without content |
489 | if ( ( $ref->name !== '' ) && $ref->contentId === null ) { |
490 | // TODO: Since this error is being placed on the ref, |
491 | // the key should arguably be "cite_error_ref_no_text" |
492 | $errs[] = [ 'key' => 'cite_error_references_no_text' ]; |
493 | } |
494 | if ( $errs ) { |
495 | foreach ( $ref->nodes as $node ) { |
496 | self::addErrorsToNode( $node, $errs ); |
497 | } |
498 | foreach ( $ref->embeddedNodes as $about ) { |
499 | $refsData->embeddedErrors[$about] = $errs; |
500 | } |
501 | } |
502 | } |
503 | } |
504 | |
505 | // Note that `$sup`s here are probably all we really need to check for |
506 | // errors caught with `$refsData->inReferencesContent()` but it's |
507 | // probably easier to just know that state while they're being |
508 | // constructed. |
509 | $nestedRefsHTML = array_map( |
510 | static function ( Element $sup ) use ( $extApi ) { |
511 | return $extApi->domToHtml( $sup, false, true ) . "\n"; |
512 | }, |
513 | PHPUtils::iterable_to_array( DOMCompat::querySelectorAll( |
514 | $refsNode, 'sup[typeof~=\'mw:Extension/ref\']' |
515 | ) ) |
516 | ); |
517 | |
518 | if ( !$isTplWrapper ) { |
519 | $dataMw = DOMDataUtils::getDataMw( $refsNode ); |
520 | // Mark this auto-generated so that we can skip this during |
521 | // html -> wt and so that clients can strip it if necessary. |
522 | if ( $autoGenerated ) { |
523 | $dataMw->autoGenerated = true; |
524 | } elseif ( $nestedRefsHTML ) { |
525 | $dataMw->body = (object)[ 'html' => "\n" . implode( $nestedRefsHTML ) ]; |
526 | } elseif ( empty( $dp->selfClose ) ) { |
527 | $dataMw->body = (object)[ 'html' => '' ]; |
528 | } else { |
529 | unset( $dataMw->body ); |
530 | } |
531 | unset( $dp->selfClose ); |
532 | } |
533 | |
534 | // Deal with responsive wrapper |
535 | if ( DOMUtils::hasClass( $refsNode, 'mw-references-wrap' ) ) { |
536 | // NOTE: The default Cite implementation hardcodes this threshold to 10. |
537 | // We use a configurable parameter here primarily for test coverage purposes. |
538 | // See citeParserTests.txt where we set a threshold of 1 or 2. |
539 | $rrThreshold = $extApi->getSiteConfig()->getMWConfigValue( 'CiteResponsiveReferencesThreshold' ) ?? 10; |
540 | if ( $refGroup && count( $refGroup->refs ) > $rrThreshold ) { |
541 | DOMCompat::getClassList( $refsNode )->add( 'mw-references-columns' ); |
542 | } |
543 | $refsNode = $refsNode->firstChild; |
544 | } |
545 | DOMUtils::assertElt( $refsNode ); |
546 | |
547 | // Remove all children from the references node |
548 | // |
549 | // Ex: When {{Reflist}} is reused from the cache, it comes with |
550 | // a bunch of references as well. We have to remove all those cached |
551 | // references before generating fresh references. |
552 | DOMCompat::replaceChildren( $refsNode ); |
553 | |
554 | if ( $refGroup ) { |
555 | foreach ( $refGroup->refs as $ref ) { |
556 | $refGroup->renderLine( $extApi, $refsNode, $ref ); |
557 | } |
558 | } |
559 | |
560 | // Remove the group from refsData |
561 | $refsData->removeRefGroup( $group ); |
562 | } |
563 | |
564 | /** |
565 | * Process `<ref>`s left behind after the DOM is fully processed. |
566 | * We process them as if there was an implicit `<references />` tag at |
567 | * the end of the DOM. |
568 | * |
569 | * @param ParsoidExtensionAPI $extApi |
570 | * @param ReferencesData $refsData |
571 | * @param Node $node |
572 | */ |
573 | public static function insertMissingReferencesIntoDOM( |
574 | ParsoidExtensionAPI $extApi, ReferencesData $refsData, Node $node |
575 | ): void { |
576 | $doc = $node->ownerDocument; |
577 | foreach ( $refsData->getRefGroups() as $groupName => $refsGroup ) { |
578 | $domFragment = $doc->createDocumentFragment(); |
579 | $frag = self::createReferences( |
580 | $extApi, |
581 | $domFragment, |
582 | [ |
583 | // Force string cast here since in the foreach above, $groupName |
584 | // is an array key. In that context, number-like strings are |
585 | // silently converted to a numeric value! |
586 | // Ex: In <ref group="2" />, the "2" becomes 2 in the foreach |
587 | 'group' => (string)$groupName, |
588 | 'responsive' => null, |
589 | ], |
590 | static function ( $dp ) use ( $extApi ) { |
591 | // The new references come out of "nowhere", so to make selser work |
592 | // properly, add a zero-sized DSR pointing to the end of the document. |
593 | $content = $extApi->getPageConfig()->getRevisionContent()->getContent( 'main' ); |
594 | $contentLength = strlen( $content ); |
595 | $dp->dsr = new DomSourceRange( $contentLength, $contentLength, 0, 0 ); |
596 | }, |
597 | true |
598 | ); |
599 | |
600 | // Add a \n before the <ol> so that when serialized to wikitext, |
601 | // each <references /> tag appears on its own line. |
602 | $node->appendChild( $doc->createTextNode( "\n" ) ); |
603 | $node->appendChild( $frag ); |
604 | |
605 | self::insertReferencesIntoDOM( $extApi, $frag, $refsData, true ); |
606 | } |
607 | } |
608 | |
609 | private static function processEmbeddedRefs( |
610 | ParsoidExtensionAPI $extApi, ReferencesData $refsData, string $str |
611 | ): string { |
612 | $domFragment = $extApi->htmlToDom( $str ); |
613 | self::processRefs( $extApi, $refsData, $domFragment ); |
614 | return $extApi->domToHtml( $domFragment, true, true ); |
615 | } |
616 | |
617 | public static function processRefs( |
618 | ParsoidExtensionAPI $extApi, ReferencesData $refsData, Node $node |
619 | ): void { |
620 | $child = $node->firstChild; |
621 | while ( $child !== null ) { |
622 | $nextChild = $child->nextSibling; |
623 | if ( $child instanceof Element ) { |
624 | if ( WTUtils::isSealedFragmentOfType( $child, 'ref' ) ) { |
625 | self::extractRefFromNode( $extApi, $child, $refsData ); |
626 | } elseif ( DOMUtils::hasTypeOf( $child, 'mw:Extension/references' ) ) { |
627 | if ( !$refsData->inReferencesContent() ) { |
628 | $refsData->referencesGroup = |
629 | DOMDataUtils::getDataParsoid( $child )->group ?? ''; |
630 | } |
631 | $refsData->pushEmbeddedContentFlag( 'references' ); |
632 | if ( $child->hasChildNodes() ) { |
633 | self::processRefs( $extApi, $refsData, $child ); |
634 | } |
635 | $refsData->popEmbeddedContentFlag(); |
636 | if ( !$refsData->inReferencesContent() ) { |
637 | $refsData->referencesGroup = ''; |
638 | self::insertReferencesIntoDOM( $extApi, $child, $refsData, false ); |
639 | } |
640 | } else { |
641 | $refsData->pushEmbeddedContentFlag(); |
642 | // Look for <ref>s embedded in data attributes |
643 | $extApi->processAttributeEmbeddedHTML( $child, |
644 | function ( string $html ) use ( $extApi, $refsData ) { |
645 | return self::processEmbeddedRefs( $extApi, $refsData, $html ); |
646 | } |
647 | ); |
648 | $refsData->popEmbeddedContentFlag(); |
649 | if ( $child->hasChildNodes() ) { |
650 | self::processRefs( $extApi, $refsData, $child ); |
651 | } |
652 | } |
653 | } |
654 | $child = $nextChild; |
655 | } |
656 | } |
657 | |
658 | /** |
659 | * Traverse into all the embedded content and mark up the refs in there |
660 | * that have errors that weren't known before the content was serialized. |
661 | * |
662 | * Some errors are only known at the time when we're inserting the |
663 | * references lists, at which point, embedded content has already been |
664 | * serialized and stored, so we no longer have live access to it. We |
665 | * therefore map about ids to errors for a ref at that time, and then do |
666 | * one final walk of the dom to peak into all the embedded content and |
667 | * mark up the errors where necessary. |
668 | * |
669 | * @param ParsoidExtensionAPI $extApi |
670 | * @param ReferencesData $refsData |
671 | * @param Node $node |
672 | */ |
673 | public static function addEmbeddedErrors( |
674 | ParsoidExtensionAPI $extApi, ReferencesData $refsData, Node $node |
675 | ): void { |
676 | $processEmbeddedErrors = function ( string $html ) use ( $extApi, $refsData ) { |
677 | // Similar to processEmbeddedRefs |
678 | $domFragment = $extApi->htmlToDom( $html ); |
679 | self::addEmbeddedErrors( $extApi, $refsData, $domFragment ); |
680 | return $extApi->domToHtml( $domFragment, true, true ); |
681 | }; |
682 | $processBodyHtml = static function ( Element $n ) use ( $processEmbeddedErrors ) { |
683 | $dataMw = DOMDataUtils::getDataMw( $n ); |
684 | if ( isset( $dataMw->body->html ) ) { |
685 | $dataMw->body->html = $processEmbeddedErrors( |
686 | $dataMw->body->html |
687 | ); |
688 | } |
689 | }; |
690 | $child = $node->firstChild; |
691 | while ( $child !== null ) { |
692 | $nextChild = $child->nextSibling; |
693 | if ( $child instanceof Element ) { |
694 | if ( DOMUtils::hasTypeOf( $child, 'mw:Extension/ref' ) ) { |
695 | $processBodyHtml( $child ); |
696 | $about = DOMCompat::getAttribute( $child, 'about' ); |
697 | '@phan-var string $about'; // assert $about is non-null |
698 | $errs = $refsData->embeddedErrors[$about] ?? null; |
699 | if ( $errs ) { |
700 | self::addErrorsToNode( $child, $errs ); |
701 | } |
702 | } elseif ( DOMUtils::hasTypeOf( $child, 'mw:Extension/references' ) ) { |
703 | $processBodyHtml( $child ); |
704 | } else { |
705 | $extApi->processAttributeEmbeddedHTML( |
706 | $child, $processEmbeddedErrors |
707 | ); |
708 | } |
709 | if ( $child->hasChildNodes() ) { |
710 | self::addEmbeddedErrors( $extApi, $refsData, $child ); |
711 | } |
712 | } |
713 | $child = $nextChild; |
714 | } |
715 | } |
716 | |
717 | /** @inheritDoc */ |
718 | public function sourceToDom( |
719 | ParsoidExtensionAPI $extApi, string $txt, array $extArgs |
720 | ): DocumentFragment { |
721 | $domFragment = $extApi->extTagToDOM( |
722 | $extArgs, |
723 | $txt, |
724 | [ |
725 | 'parseOpts' => [ 'extTag' => 'references' ], |
726 | ] |
727 | ); |
728 | |
729 | $refsOpts = $extApi->extArgsToArray( $extArgs ) + [ |
730 | 'group' => null, |
731 | 'responsive' => null, |
732 | ]; |
733 | |
734 | // Detect invalid parameters on the references tag |
735 | $knownAttributes = [ 'group', 'responsive' ]; |
736 | foreach ( $refsOpts as $key => $value ) { |
737 | if ( !in_array( strtolower( (string)$key ), $knownAttributes, true ) ) { |
738 | $extApi->pushError( 'cite_error_references_invalid_parameters' ); |
739 | break; |
740 | } |
741 | } |
742 | |
743 | $frag = self::createReferences( |
744 | $extApi, |
745 | $domFragment, |
746 | $refsOpts, |
747 | static function ( $dp ) use ( $extApi ) { |
748 | $dp->src = $extApi->extTag->getSource(); |
749 | // Setting redundant info on fragment. |
750 | // $docBody->firstChild info feels cumbersome to use downstream. |
751 | if ( $extApi->extTag->isSelfClosed() ) { |
752 | $dp->selfClose = true; |
753 | } |
754 | } |
755 | ); |
756 | $domFragment->appendChild( $frag ); |
757 | return $domFragment; |
758 | } |
759 | |
760 | /** @inheritDoc */ |
761 | public function processAttributeEmbeddedHTML( |
762 | ParsoidExtensionAPI $extApi, Element $elt, Closure $proc |
763 | ): void { |
764 | $dataMw = DOMDataUtils::getDataMw( $elt ); |
765 | if ( isset( $dataMw->body->html ) ) { |
766 | $dataMw->body->html = $proc( $dataMw->body->html ); |
767 | } |
768 | } |
769 | |
770 | /** @inheritDoc */ |
771 | public function domToWikitext( |
772 | ParsoidExtensionAPI $extApi, Element $node, bool $wrapperUnmodified |
773 | ) { |
774 | $dataMw = DOMDataUtils::getDataMw( $node ); |
775 | // Autogenerated references aren't considered erroneous (the extension to the legacy |
776 | // parser also generates them) and are not suppressed when serializing because apparently |
777 | // that's the behaviour Parsoid clients want. However, autogenerated references *with |
778 | // group attributes* are errors (the legacy extension doesn't generate them at all) and |
779 | // are suppressed when serialized since we considered them an error while parsing and |
780 | // don't want them to persist in the content. |
781 | if ( !empty( $dataMw->autoGenerated ) && ( $dataMw->attrs->group ?? '' ) !== '' ) { |
782 | return ''; |
783 | } else { |
784 | $startTagSrc = $extApi->extStartTagToWikitext( $node ); |
785 | if ( empty( $dataMw->body ) ) { |
786 | return $startTagSrc; // We self-closed this already. |
787 | } else { |
788 | if ( isset( $dataMw->body->html ) ) { |
789 | $src = $extApi->htmlToWikitext( |
790 | [ 'extName' => $dataMw->name ], |
791 | $dataMw->body->html |
792 | ); |
793 | return $startTagSrc . $src . '</' . $dataMw->name . '>'; |
794 | } else { |
795 | $extApi->log( 'error', |
796 | 'References body unavailable for: ' . DOMCompat::getOuterHTML( $node ) |
797 | ); |
798 | return ''; // Drop it! |
799 | } |
800 | } |
801 | } |
802 | } |
803 | |
804 | /** @inheritDoc */ |
805 | public function lintHandler( |
806 | ParsoidExtensionAPI $extApi, Element $refs, callable $defaultHandler |
807 | ): bool { |
808 | $dataMw = DOMDataUtils::getDataMw( $refs ); |
809 | if ( isset( $dataMw->body->html ) ) { |
810 | $fragment = $extApi->htmlToDom( $dataMw->body->html ); |
811 | $defaultHandler( $fragment ); |
812 | } |
813 | return true; |
814 | } |
815 | |
816 | /** @inheritDoc */ |
817 | public function diffHandler( |
818 | ParsoidExtensionAPI $extApi, callable $domDiff, Element $origNode, |
819 | Element $editedNode |
820 | ): bool { |
821 | $origDataMw = DOMDataUtils::getDataMw( $origNode ); |
822 | $editedDataMw = DOMDataUtils::getDataMw( $editedNode ); |
823 | |
824 | if ( isset( $origDataMw->body->html ) && isset( $editedDataMw->body->html ) ) { |
825 | $origFragment = $extApi->htmlToDom( |
826 | $origDataMw->body->html, $origNode->ownerDocument, |
827 | [ 'markNew' => true ] |
828 | ); |
829 | $editedFragment = $extApi->htmlToDom( |
830 | $editedDataMw->body->html, $editedNode->ownerDocument, |
831 | [ 'markNew' => true ] |
832 | ); |
833 | return call_user_func( $domDiff, $origFragment, $editedFragment ); |
834 | } |
835 | |
836 | // FIXME: Similar to DOMDiff::subtreeDiffers, maybe $editNode should |
837 | // be marked as inserted to avoid losing any edits, at the cost of |
838 | // more normalization |
839 | |
840 | return false; |
841 | } |
842 | } |