Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
79.36% |
296 / 373 |
|
27.78% |
5 / 18 |
CRAP | |
0.00% |
0 / 1 |
| WrapSectionsState | |
79.36% |
296 / 373 |
|
27.78% |
5 / 18 |
305.53 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| computeSectionMetadata | |
76.32% |
29 / 38 |
|
0.00% |
0 / 1 |
11.33 | |||
| shouldOmitFromTOC | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
3.04 | |||
| createNewSection | |
78.26% |
18 / 23 |
|
0.00% |
0 / 1 |
12.24 | |||
| isEmptySpan | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
30 | |||
| wrapSectionsInDOM | |
88.00% |
66 / 75 |
|
0.00% |
0 / 1 |
32.66 | |||
| isParsoidSection | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
| findSectionAncestor | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| getDSR | |
54.55% |
12 / 22 |
|
0.00% |
0 / 1 |
22.36 | |||
| fillDSRGap | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
4.25 | |||
| collapseWrappers | |
84.38% |
27 / 32 |
|
0.00% |
0 / 1 |
8.24 | |||
| resolveTplExtSectionConflicts | |
80.36% |
45 / 56 |
|
0.00% |
0 / 1 |
14.28 | |||
| convertTOCOffsets | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
| findTOCInsertionPoint | |
84.62% |
11 / 13 |
|
0.00% |
0 / 1 |
7.18 | |||
| insertSyntheticSection | |
94.12% |
16 / 17 |
|
0.00% |
0 / 1 |
7.01 | |||
| addSyntheticTOCMarker | |
83.78% |
31 / 37 |
|
0.00% |
0 / 1 |
15.96 | |||
| addSectionInfo | |
40.00% |
6 / 15 |
|
0.00% |
0 / 1 |
7.46 | |||
| run | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Processors; |
| 5 | |
| 6 | use Wikimedia\Assert\Assert; |
| 7 | use Wikimedia\Assert\UnreachableException; |
| 8 | use Wikimedia\Parsoid\Config\Env; |
| 9 | use Wikimedia\Parsoid\Core\DomSourceRange; |
| 10 | use Wikimedia\Parsoid\Core\InternalException; |
| 11 | use Wikimedia\Parsoid\Core\SectionMetadata; |
| 12 | use Wikimedia\Parsoid\DOM\Comment; |
| 13 | use Wikimedia\Parsoid\DOM\Document; |
| 14 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
| 15 | use Wikimedia\Parsoid\DOM\Element; |
| 16 | use Wikimedia\Parsoid\DOM\Node; |
| 17 | use Wikimedia\Parsoid\DOM\Text; |
| 18 | use Wikimedia\Parsoid\NodeData\DataMw; |
| 19 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
| 20 | use Wikimedia\Parsoid\NodeData\TemplateInfo; |
| 21 | use Wikimedia\Parsoid\Utils\DOMCompat; |
| 22 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
| 23 | use Wikimedia\Parsoid\Utils\DOMUtils; |
| 24 | use Wikimedia\Parsoid\Utils\PHPUtils; |
| 25 | use Wikimedia\Parsoid\Utils\TokenUtils; |
| 26 | use Wikimedia\Parsoid\Utils\Utils; |
| 27 | use Wikimedia\Parsoid\Utils\WTUtils; |
| 28 | use Wikimedia\Parsoid\Wt2Html\Frame; |
| 29 | |
| 30 | class WrapSectionsState { |
| 31 | private Env $env; |
| 32 | private Frame $frame; |
| 33 | |
| 34 | /** @var Element|DocumentFragment */ |
| 35 | private $rootNode; |
| 36 | |
| 37 | /** |
| 38 | * The next section debug ID |
| 39 | */ |
| 40 | private int $count = 1; |
| 41 | |
| 42 | /** |
| 43 | * Pseudo section count is needed to determine TOC rendering |
| 44 | */ |
| 45 | private int $pseudoSectionCount = 0; |
| 46 | private Document $doc; |
| 47 | |
| 48 | /** |
| 49 | * Map of about ID to first element |
| 50 | * @var Element[] |
| 51 | */ |
| 52 | private array $aboutIdMap = []; |
| 53 | private int $sectionNumber = 0; |
| 54 | private ?WrapSectionsTplInfo $tplInfo = null; |
| 55 | |
| 56 | /** @var WrapSectionsTplInfo[] */ |
| 57 | private array $tplsAndExtsToExamine = []; |
| 58 | private int $oldLevel = 0; |
| 59 | |
| 60 | public function __construct( |
| 61 | Env $env, |
| 62 | Frame $frame, |
| 63 | Node $rootNode |
| 64 | ) { |
| 65 | $this->env = $env; |
| 66 | $this->frame = $frame; |
| 67 | $this->rootNode = $rootNode; |
| 68 | $this->doc = $rootNode->ownerDocument; |
| 69 | } |
| 70 | |
| 71 | /** |
| 72 | * Update section metadata needed to generate TOC. |
| 73 | * |
| 74 | * @param SectionMetadata $metadata |
| 75 | * @param Element $heading |
| 76 | * @param int $newLevel |
| 77 | */ |
| 78 | private function computeSectionMetadata( |
| 79 | SectionMetadata $metadata, Element $heading, int $newLevel |
| 80 | ): void { |
| 81 | if ( !$this->env->getPageConfig()->getSuppressTOC() ) { |
| 82 | $tocData = $this->env->getTOCData(); |
| 83 | $tocData->addSection( $metadata ); |
| 84 | $tocData->processHeading( $this->oldLevel, $newLevel, $metadata ); |
| 85 | } |
| 86 | $this->oldLevel = $newLevel; |
| 87 | |
| 88 | if ( WTUtils::isLiteralHTMLNode( $heading ) ) { |
| 89 | // Literal HTML tags in wikitext don't get section edit links |
| 90 | $metadata->fromTitle = null; |
| 91 | $metadata->index = ''; |
| 92 | $metadata->codepointOffset = null; |
| 93 | } elseif ( $this->tplInfo !== null ) { |
| 94 | $dmw = DOMDataUtils::getDataMw( $this->tplInfo->first ); |
| 95 | $metadata->index = ''; // Match legacy parser |
| 96 | if ( !isset( $dmw->parts ) ) { |
| 97 | // Extension or language-variant |
| 98 | // Need to determine what the output should be here |
| 99 | $metadata->fromTitle = null; |
| 100 | } elseif ( count( $dmw->parts ) > 1 ) { |
| 101 | // Multi-part content -- cannot pick a title |
| 102 | $metadata->fromTitle = null; |
| 103 | } else { |
| 104 | $p0 = $dmw->parts[0]; |
| 105 | if ( !( $p0 instanceof TemplateInfo ) ) { |
| 106 | throw new UnreachableException( |
| 107 | "a single part will always be a TemplateInfo not a string" |
| 108 | ); |
| 109 | } |
| 110 | if ( $p0->type === 'templatearg' ) { |
| 111 | // Since we currently don't process templates in Parsoid, |
| 112 | // this has to be a top-level {{{...}}} and so the content |
| 113 | // comes from the current page. But, legacy parser returns 'false' |
| 114 | // for this, so we'll return null as well instead of current title. |
| 115 | $metadata->fromTitle = null; |
| 116 | } elseif ( !empty( $p0->href ) ) { |
| 117 | // Pick template title, but strip leading "./" prefix |
| 118 | $tplHref = Utils::decodeURIComponent( $p0->href ); |
| 119 | $metadata->fromTitle = PHPUtils::stripPrefix( $tplHref, './' ); |
| 120 | if ( $this->sectionNumber >= 0 ) { |
| 121 | // Legacy parser sets this to '' in some cases |
| 122 | // See "Templated sections (heading from template arg)" parser test |
| 123 | $metadata->index = 'T-' . $this->sectionNumber; |
| 124 | } |
| 125 | } else { |
| 126 | // Legacy parser return null here |
| 127 | $metadata->fromTitle = null; |
| 128 | } |
| 129 | } |
| 130 | $metadata->codepointOffset = null; |
| 131 | } else { |
| 132 | $title = $this->env->getContextTitle(); |
| 133 | // Use the dbkey (underscores) instead of text (spaces) |
| 134 | $metadata->fromTitle = $title->getPrefixedDBKey(); |
| 135 | $metadata->index = (string)$this->sectionNumber; |
| 136 | // Note that our DSR counts *are* byte counts, while this core |
| 137 | // interface expects *codepoint* counts. We are going to convert |
| 138 | // these in a batch (for efficiency) in ::convertTOCOffsets() below |
| 139 | $metadata->codepointOffset = DOMDataUtils::getDataParsoid( $heading )->dsr->start ?? -1; |
| 140 | } |
| 141 | |
| 142 | $metadata->anchor = DOMCompat::getAttribute( $heading, 'id' ); |
| 143 | $section = DOMDataUtils::getDataParsoid( $heading )->getTemp()->section; |
| 144 | $metadata->line = $section['line']; |
| 145 | $metadata->linkAnchor = $section['linkAnchor']; |
| 146 | } |
| 147 | |
| 148 | /** |
| 149 | * Should we omit this heading from TOC? |
| 150 | * Yes if $heading is: |
| 151 | * - generated by an extensoin |
| 152 | */ |
| 153 | private function shouldOmitFromTOC( Element $heading ): bool { |
| 154 | $node = $heading->parentNode; |
| 155 | while ( $node ) { |
| 156 | // NOTE: Here, we are making the assumption that extensions never |
| 157 | // emit a DOM forest and only ever have a single wrapper node. |
| 158 | // While ExtensionHandler doesn't assume that, this seems to be borne out |
| 159 | // in reality. But, if this assumption were not true, we would be adding |
| 160 | // TOC entries from extension-generated about siblings into the TOC. |
| 161 | // In scenarios where templates generated the extension and the extension |
| 162 | // is part of the template's wrapper, we cannot reliably determine what |
| 163 | // part of the output came from extensions in that case (because the |
| 164 | // template wrapping clobbers that information). So, for now, we ignore |
| 165 | // this edge case where extensions generate multiple DOM nodes (that also |
| 166 | // have headings). Later on, we may enforce a single-wrapper-node |
| 167 | // requirement for extensions. |
| 168 | if ( WTUtils::isFirstExtensionWrapperNode( $node ) ) { |
| 169 | return true; |
| 170 | } |
| 171 | $node = $node->parentNode; |
| 172 | } |
| 173 | |
| 174 | return false; |
| 175 | } |
| 176 | |
| 177 | /** |
| 178 | * Create a new section element |
| 179 | * |
| 180 | * @param Element|DocumentFragment $rootNode |
| 181 | * @param array<Section> &$sectionStack |
| 182 | * @param ?Section $currSection |
| 183 | * @param Element $heading the heading node |
| 184 | * @param int $newLevel |
| 185 | * @param bool $pseudoSection |
| 186 | * @return Section |
| 187 | */ |
| 188 | private function createNewSection( |
| 189 | Node $rootNode, array &$sectionStack, |
| 190 | ?Section $currSection, Element $heading, int $newLevel, |
| 191 | bool $pseudoSection |
| 192 | ): Section { |
| 193 | /* Structure for regular (editable or not) sections |
| 194 | * <section data-mw-section-id=".."> |
| 195 | * <h*>..</h*> |
| 196 | * .. |
| 197 | * </section> |
| 198 | * |
| 199 | * Lead sections and pseudo-sections won't have <h*> or <div> tags |
| 200 | */ |
| 201 | $section = new Section( $newLevel, $this->count++, $this->doc ); |
| 202 | |
| 203 | /* Step 1. Get section stack to the right nesting level |
| 204 | * 1a. Pop stack till we have a higher-level section. |
| 205 | */ |
| 206 | $stack = &$sectionStack; |
| 207 | $sc = count( $stack ); |
| 208 | while ( $sc > 0 && !( $stack[$sc - 1]->hasNestedLevel( $newLevel ) ) ) { |
| 209 | array_pop( $stack ); |
| 210 | $sc--; |
| 211 | } |
| 212 | |
| 213 | /* 1b. Push current section onto stack if it is a higher-level section */ |
| 214 | if ( $currSection && $currSection->hasNestedLevel( $newLevel ) ) { |
| 215 | $stack[] = $currSection; |
| 216 | $sc++; |
| 217 | } |
| 218 | |
| 219 | /* Step 2: Add new section where it belongs: a parent section OR body */ |
| 220 | $parentSection = $sc > 0 ? $stack[$sc - 1] : null; |
| 221 | if ( $parentSection ) { |
| 222 | $parentSection->addSection( $section ); |
| 223 | } else { |
| 224 | $rootNode->insertBefore( $section->container, $heading ); |
| 225 | } |
| 226 | |
| 227 | /* Step 3: Add <h*> to the <section> */ |
| 228 | $section->addNode( $heading ); |
| 229 | |
| 230 | /* Step 4: Assign data-mw-section-id attribute |
| 231 | * |
| 232 | * CX wants <section> tags with a distinguishing attribute so that |
| 233 | * it can differentiate between its internal use of <section> tags |
| 234 | * with what Parsoid adds. So, we will add a data-mw-section-id |
| 235 | * attribute always. |
| 236 | * |
| 237 | * data-mw-section-id = 0 for the lead section |
| 238 | * data-mw-section-id = -1 for non-editable sections |
| 239 | * Note that templated content cannot be edited directly. |
| 240 | * data-mw-section-id = -2 for pseudo sections |
| 241 | * data-mw-section-id > 0 for everything else and this number |
| 242 | * matches PHP parser / MediaWiki's notion of that section. |
| 243 | * |
| 244 | * The code here handles uneditable sections because of templating. |
| 245 | */ |
| 246 | if ( $pseudoSection ) { |
| 247 | $this->pseudoSectionCount++; |
| 248 | $section->setId( -2 ); |
| 249 | } elseif ( $this->tplInfo !== null ) { |
| 250 | $section->setId( -1 ); |
| 251 | } else { |
| 252 | $section->setId( $this->sectionNumber ); |
| 253 | } |
| 254 | |
| 255 | // Sections from extensions shouldn't show up in TOC |
| 256 | if ( !$pseudoSection && !$this->shouldOmitFromTOC( $heading ) ) { |
| 257 | $this->computeSectionMetadata( $section->metadata, $heading, $newLevel ); |
| 258 | } |
| 259 | |
| 260 | return $section; |
| 261 | } |
| 262 | |
| 263 | private function isEmptySpan( Element $span ): bool { |
| 264 | $n = $span->firstChild; |
| 265 | while ( $n ) { |
| 266 | if ( $n instanceof Element ) { |
| 267 | return false; |
| 268 | } elseif ( $n instanceof Text && !preg_match( '/^\s*$/D', $n->nodeValue ) ) { |
| 269 | return false; |
| 270 | } |
| 271 | $n = $n->nextSibling; |
| 272 | } |
| 273 | return true; |
| 274 | } |
| 275 | |
| 276 | /** |
| 277 | * Walk the DOM and add <section> wrappers where required. |
| 278 | * This is the workhorse code that wrapSections relies on. |
| 279 | * |
| 280 | * @param ?Section $currSection |
| 281 | * @param Element|DocumentFragment $rootNode |
| 282 | * @return int |
| 283 | */ |
| 284 | private function wrapSectionsInDOM( |
| 285 | ?Section $currSection, Node $rootNode |
| 286 | ): int { |
| 287 | // Since template wrapping is done and template wrappers are well-nested, |
| 288 | // we can reset template state for every subtree. |
| 289 | $tplInfo = null; |
| 290 | $sectionStack = []; |
| 291 | $highestSectionLevel = 7; |
| 292 | $node = $rootNode->firstChild; |
| 293 | while ( $node ) { |
| 294 | $next = $node->nextSibling; |
| 295 | $addedNode = false; |
| 296 | $expandSectionBoundary = false; |
| 297 | |
| 298 | // Track entry into templated and extension output |
| 299 | if ( !$this->tplInfo && WTUtils::isFirstEncapsulationWrapperNode( $node ) ) { |
| 300 | DOMUtils::assertElt( $node ); |
| 301 | $this->tplInfo = $tplInfo = new WrapSectionsTplInfo; |
| 302 | $tplInfo->first = $node; |
| 303 | $about = DOMCompat::getAttribute( $node, 'about' ); |
| 304 | // NOTE: could be null because of language variant markup! |
| 305 | $tplInfo->about = $about; |
| 306 | $aboutSiblings = WTUtils::getAboutSiblings( $node, $about ); |
| 307 | $tplInfo->last = end( $aboutSiblings ); |
| 308 | $this->aboutIdMap[$about] = $node; |
| 309 | |
| 310 | // Collect a sequence of rendering transparent nodes starting at $node. |
| 311 | // This could be while ( true ), but being defensive. |
| 312 | while ( $node ) { |
| 313 | // If we hit the end of the template, we are done! |
| 314 | // - If this is a heading, we'll process it below. |
| 315 | // - If not, the template never had a heading, so |
| 316 | // we can continue default section wrapping behavior. |
| 317 | if ( $tplInfo->last === $node ) { |
| 318 | break; |
| 319 | } |
| 320 | |
| 321 | // If we hit a non-rendering-transparent node or a non-empty span, |
| 322 | // we are done! We cannot expand the section boundary any further. |
| 323 | if ( !WTUtils::isRenderingTransparentNode( $node ) && |
| 324 | !( |
| 325 | DOMCompat::nodeName( $node ) === 'span' && |
| 326 | !WTUtils::isLiteralHTMLNode( $node ) && |
| 327 | $this->isEmptySpan( $node ) |
| 328 | ) |
| 329 | ) { |
| 330 | break; |
| 331 | } |
| 332 | |
| 333 | // Accumulate the rendering-transparent node and loop |
| 334 | $tplInfo->rtContentNodes[] = $node; |
| 335 | $node = $node->nextSibling; |
| 336 | } |
| 337 | |
| 338 | if ( count( $tplInfo->rtContentNodes ) > 0 && DOMUtils::isHeading( $node ) ) { |
| 339 | // In this scenario, we can expand the section boundary to include these nodes |
| 340 | // rather than start with the heading. This eliminates unnecessary conflicts |
| 341 | // between section & template boundaries. |
| 342 | $expandSectionBoundary = true; |
| 343 | $next = $node->nextSibling; |
| 344 | } else { |
| 345 | // Reset to normal sectioning behavior! |
| 346 | $node = $tplInfo->first; |
| 347 | $tplInfo->rtContentNodes = []; |
| 348 | } |
| 349 | } |
| 350 | |
| 351 | if ( DOMUtils::isHeading( $node ) ) { |
| 352 | DOMUtils::assertElt( $node ); // headings are elements |
| 353 | $level = (int)DOMCompat::nodeName( $node )[1]; |
| 354 | |
| 355 | $dp = DOMDataUtils::getDataParsoid( $node ); |
| 356 | if ( WTUtils::isLiteralHTMLNode( $node ) ) { |
| 357 | // HTML <h*> tags get section wrappers, but the sections are uneditable |
| 358 | // via the section editing API. |
| 359 | $this->sectionNumber = -1; |
| 360 | } elseif ( isset( $dp->tmp->headingIndex ) ) { |
| 361 | // This could be just `$this->sectionNumber++` without the |
| 362 | // complicated if-guard if T214538 were fixed in core; |
| 363 | // see T213468 where this more-complicated behavior was |
| 364 | // added to match core's eccentricities. |
| 365 | $this->sectionNumber = $dp->tmp->headingIndex; |
| 366 | } |
| 367 | if ( $level < $highestSectionLevel ) { |
| 368 | $highestSectionLevel = $level; |
| 369 | } |
| 370 | $currSection = $this->createNewSection( |
| 371 | $rootNode, $sectionStack, |
| 372 | $currSection, $node, $level, false |
| 373 | ); |
| 374 | if ( $tplInfo && $expandSectionBoundary ) { |
| 375 | foreach ( $tplInfo->rtContentNodes as $rtn ) { |
| 376 | $currSection->container->insertBefore( $rtn, $node ); |
| 377 | } |
| 378 | $tplInfo->firstSection = $currSection; |
| 379 | } |
| 380 | $addedNode = true; |
| 381 | } elseif ( $node instanceof Element ) { |
| 382 | $nestedHighestSectionLevel = $this->wrapSectionsInDOM( null, $node ); |
| 383 | if ( $currSection && !$currSection->hasNestedLevel( $nestedHighestSectionLevel ) ) { |
| 384 | // If we find a higher level nested section, |
| 385 | // (a) Make current section non-editable |
| 386 | // (b) There are 2 options here best illustrated with an example. |
| 387 | // Consider the wiktiext below. |
| 388 | // <div> |
| 389 | // =1= |
| 390 | // b |
| 391 | // </div> |
| 392 | // c |
| 393 | // =2= |
| 394 | // 1. Create a new pseudo-section to wrap '$node' |
| 395 | // There will be a <section> around the <div> which includes 'c'. |
| 396 | // 2. Don't create the pseudo-section by setting '$currSection = null' |
| 397 | // But, this can leave some content outside any top-level section. |
| 398 | // 'c' will not be in any section. |
| 399 | // The code below implements strategy 1. |
| 400 | $currSection->setId( -1 ); |
| 401 | $currSection = $this->createNewSection( |
| 402 | $rootNode, $sectionStack, |
| 403 | $currSection, $node, $nestedHighestSectionLevel, true |
| 404 | ); |
| 405 | $addedNode = true; |
| 406 | } |
| 407 | } |
| 408 | |
| 409 | if ( $currSection && !$addedNode ) { |
| 410 | $currSection->addNode( $node ); |
| 411 | } |
| 412 | |
| 413 | if ( $tplInfo && $tplInfo->first === $node ) { |
| 414 | $tplInfo->firstSection = $currSection; |
| 415 | } |
| 416 | |
| 417 | // Track exit from templated output |
| 418 | if ( $tplInfo && $tplInfo->last === $node ) { |
| 419 | if ( $currSection !== $tplInfo->firstSection ) { |
| 420 | // The opening $node and closing $node of the template |
| 421 | // are in different sections! This might require resolution. |
| 422 | // While 'firstSection' could be null, if we get here, |
| 423 | // 'lastSection' is guaranteed to always be non-null. |
| 424 | $tplInfo->lastSection = $currSection; |
| 425 | $this->tplsAndExtsToExamine[] = $tplInfo; |
| 426 | } |
| 427 | |
| 428 | $this->tplInfo = $tplInfo = null; |
| 429 | } |
| 430 | |
| 431 | $node = $next; |
| 432 | } |
| 433 | |
| 434 | // The last section embedded in a non-body DOM element |
| 435 | // should always be marked non-editable since it will have |
| 436 | // the closing tag (ex: </div>) showing up in the source editor |
| 437 | // which we cannot support in a visual editing $environment. |
| 438 | if ( $currSection && !DOMUtils::atTheTop( $rootNode ) ) { |
| 439 | $currSection->setId( -1 ); |
| 440 | } |
| 441 | |
| 442 | return $highestSectionLevel; |
| 443 | } |
| 444 | |
| 445 | /** |
| 446 | * Is this a Parsoid-inserted section (vs. a section node generated by |
| 447 | * other page-components / content-generators like extensions)? |
| 448 | * |
| 449 | * @param Element $n |
| 450 | * @return bool |
| 451 | */ |
| 452 | private static function isParsoidSection( Element $n ): bool { |
| 453 | return DOMCompat::nodeName( $n ) === 'section' && $n->hasAttribute( 'data-mw-section-id' ); |
| 454 | } |
| 455 | |
| 456 | /** |
| 457 | * Find an ancestor that is a Parsoid-inserted section |
| 458 | * |
| 459 | * @param Node $n |
| 460 | * @return Element |
| 461 | */ |
| 462 | private static function findSectionAncestor( Node $n ): Element { |
| 463 | do { |
| 464 | $n = DOMUtils::findAncestorOfName( $n, 'section' ); |
| 465 | } while ( $n && !self::isParsoidSection( $n ) ); |
| 466 | |
| 467 | Assert::invariant( $n instanceof Element, "Expected to find Parsoid-section ancestor" ); |
| 468 | return $n; |
| 469 | } |
| 470 | |
| 471 | /** |
| 472 | * Get opening/closing DSR offset for the subtree rooted at $node. |
| 473 | * This handles scenarios where $node is a section or template wrapper |
| 474 | * and if a section, when it has leading/trailing non-element nodes |
| 475 | * that don't have recorded DSR values. |
| 476 | * |
| 477 | * @param Element $node |
| 478 | * @param bool $start |
| 479 | * @return ?int |
| 480 | */ |
| 481 | private function getDSR( Element $node, bool $start ): ?int { |
| 482 | if ( !self::isParsoidSection( $node ) ) { |
| 483 | $dsr = DOMDataUtils::getDataParsoid( $node )->dsr ?? null; |
| 484 | if ( !$dsr ) { |
| 485 | Assert::invariant( |
| 486 | $node->hasAttribute( 'about' ), |
| 487 | 'Expected an about id' |
| 488 | ); |
| 489 | $about = DOMCompat::getAttribute( $node, 'about' ); |
| 490 | $dsr = DOMDataUtils::getDataParsoid( $this->aboutIdMap[$about] )->dsr; |
| 491 | } |
| 492 | |
| 493 | return $start ? $dsr->start : $dsr->end; |
| 494 | } |
| 495 | |
| 496 | $offset = 0; |
| 497 | $c = $start ? $node->firstChild : $node->lastChild; |
| 498 | while ( $c ) { |
| 499 | if ( $c instanceof Text ) { |
| 500 | $offset += strlen( $c->textContent ); |
| 501 | } elseif ( $c instanceof Comment ) { |
| 502 | $offset += WTUtils::decodedCommentLength( $c ); |
| 503 | } else { |
| 504 | DOMUtils::assertElt( $c ); |
| 505 | $ret = $this->getDSR( $c, $start ); |
| 506 | return $ret === null ? null : $ret + ( $start ? -$offset : $offset ); |
| 507 | } |
| 508 | $c = $start ? $c->nextSibling : $c->previousSibling; |
| 509 | } |
| 510 | |
| 511 | return -1; |
| 512 | } |
| 513 | |
| 514 | /** |
| 515 | * FIXME: Duplicated with TableFixups code. |
| 516 | * @param list<string|TemplateInfo> &$parts |
| 517 | * @param ?int $offset1 |
| 518 | * @param ?int $offset2 |
| 519 | * @throws InternalException |
| 520 | */ |
| 521 | private function fillDSRGap( array &$parts, ?int $offset1, ?int $offset2 ): void { |
| 522 | if ( $offset1 === null || $offset2 === null ) { |
| 523 | throw new InternalException(); |
| 524 | } |
| 525 | if ( $offset1 < $offset2 ) { |
| 526 | $parts[] = PHPUtils::safeSubstr( $this->frame->getSrcText(), $offset1, $offset2 - $offset1 ); |
| 527 | } |
| 528 | } |
| 529 | |
| 530 | /** |
| 531 | * FIXME: There is strong overlap with TableFixups code. |
| 532 | * |
| 533 | * $wrapper will hold tpl/ext encap info for the array of tpls/exts as well as |
| 534 | * content before, after and in between them. Right now, this will always be a |
| 535 | * <section> node, but not asserting this since code doesn't depend on it being so. |
| 536 | * |
| 537 | * @param Element $wrapper |
| 538 | * @param array $encapWrappers |
| 539 | */ |
| 540 | private function collapseWrappers( Element $wrapper, array $encapWrappers ): void { |
| 541 | $wrapperDp = DOMDataUtils::getDataParsoid( $wrapper ); |
| 542 | |
| 543 | // Build up $parts, $pi to set up the combined transclusion info on $wrapper |
| 544 | $parts = []; |
| 545 | $pi = []; |
| 546 | $index = 0; |
| 547 | $prevDp = null; |
| 548 | $haveTemplate = false; |
| 549 | try { |
| 550 | foreach ( $encapWrappers as $encapNode ) { |
| 551 | $dp = DOMDataUtils::getDataParsoid( $encapNode ); |
| 552 | |
| 553 | // Plug DSR gaps between encapWrappers |
| 554 | if ( !$prevDp ) { |
| 555 | $this->fillDSRGap( $parts, $wrapperDp->dsr->start, $dp->dsr->start ); |
| 556 | } else { |
| 557 | $this->fillDSRGap( $parts, $prevDp->dsr->end, $dp->dsr->start ); |
| 558 | } |
| 559 | |
| 560 | if ( DOMUtils::hasTypeOf( $encapNode, "mw:Transclusion" ) ) { |
| 561 | $haveTemplate = true; |
| 562 | // Assimilate $encapNode's data-mw and data-parsoid pi info |
| 563 | $dmw = DOMDataUtils::getDataMw( $encapNode ); |
| 564 | foreach ( $dmw->parts ?? [] as $part ) { |
| 565 | // Template index is relative to other transclusions. |
| 566 | // This index is used to extract whitespace information from |
| 567 | // data-parsoid and that array only includes info for templates. |
| 568 | // So skip over strings here. |
| 569 | if ( !is_string( $part ) ) { |
| 570 | $part = clone $part; |
| 571 | $part->i = $index++; |
| 572 | } |
| 573 | $parts[] = $part; |
| 574 | } |
| 575 | PHPUtils::pushArray( $pi, $dp->pi ?? [ [] ] ); |
| 576 | } else { |
| 577 | // Where a non-template type is present, we are going to treat that |
| 578 | // segment as a "string" in the parts array. So, we effectively treat |
| 579 | // "mw:Transclusion" as a generic type that covers a single template |
| 580 | // as well as a run of segments where at least one segment comes from |
| 581 | // a template but others may be from other generators (ex: extensions). |
| 582 | $this->fillDSRGap( $parts, $dp->dsr->start, $dp->dsr->end ); |
| 583 | } |
| 584 | |
| 585 | $prevDp = $dp; |
| 586 | } |
| 587 | |
| 588 | if ( !$haveTemplate ) { |
| 589 | throw new InternalException(); |
| 590 | } |
| 591 | |
| 592 | DOMUtils::addTypeOf( $wrapper, "mw:Transclusion" ); |
| 593 | $wrapperDp->pi = $pi; |
| 594 | $this->fillDSRGap( $parts, $prevDp->dsr->end, $wrapperDp->dsr->end ); |
| 595 | $dataMw = new DataMw( [] ); |
| 596 | $dataMw->parts = $parts; |
| 597 | DOMDataUtils::setDataMw( $wrapper, $dataMw ); |
| 598 | } catch ( InternalException $e ) { |
| 599 | // We don't have accurate template wrapping information. |
| 600 | // Set typeof to 'mw:Placeholder' since 'mw:Transclusion' |
| 601 | // typeof is not actionable without valid data-mw. |
| 602 | // |
| 603 | // FIXME: |
| 604 | // 1. If we stop stripping section wrappers in the html->wt direction, |
| 605 | // we will need to add a DOMHandler for <section> or mw:Placeholder typeof |
| 606 | // on arbitrary Elements to traverse into children and serialize and |
| 607 | // prevent page corruption. |
| 608 | // 2. This may be a good place to collect stats for T191641#6357136 |
| 609 | // 3. Maybe we need a special error typeof rather than mw:Placeholder |
| 610 | $wrapper->setAttribute( 'typeof', 'mw:Placeholder' ); |
| 611 | } |
| 612 | } |
| 613 | |
| 614 | /** |
| 615 | * Section wrappers and encapsulation wrappers can conflict because of |
| 616 | * partial overlaps. This method identifies those conflicts and fixes up |
| 617 | * the encapsulation by expanding those ranges as necessary. |
| 618 | */ |
| 619 | private function resolveTplExtSectionConflicts(): void { |
| 620 | $secRanges = []; |
| 621 | '@phan-var array[] $secRanges'; |
| 622 | foreach ( $this->tplsAndExtsToExamine as $tplInfo ) { |
| 623 | $s1 = $tplInfo->firstSection->container ?? |
| 624 | self::findSectionAncestor( $tplInfo->first ); |
| 625 | |
| 626 | // guaranteed to be non-null |
| 627 | $s2 = $tplInfo->lastSection->container; |
| 628 | |
| 629 | // Find a common ancestor of s1 and s2 (could be s1 or s2) |
| 630 | $s2Ancestors = DOMUtils::pathToRoot( $s2 ); |
| 631 | $s1Ancestors = []; |
| 632 | $n = 0; |
| 633 | $ancestor = $s1; |
| 634 | while ( !in_array( $ancestor, $s2Ancestors, true ) ) { |
| 635 | $s1Ancestors[] = $ancestor; |
| 636 | $ancestor = $ancestor->parentNode; |
| 637 | $n++; |
| 638 | } |
| 639 | |
| 640 | // ancestor is now the common ancestor of s1 and s2 |
| 641 | $s1Ancestors[] = $ancestor; |
| 642 | $n++; |
| 643 | |
| 644 | // Set up start/end of the new encapsulation range |
| 645 | if ( $ancestor === $s1 || $ancestor === $s2 ) { |
| 646 | $start = $ancestor; |
| 647 | $end = $ancestor; |
| 648 | } else { |
| 649 | // While creating a new section (see createNewSection), it only |
| 650 | // gets added where its parent is either another section, |
| 651 | // or body, so all ancestors are themselves sections, or body. |
| 652 | $start = $s1Ancestors[$n - 2]; |
| 653 | $i = array_search( $ancestor, $s2Ancestors, true ); |
| 654 | $end = $s2Ancestors[$i - 1]; |
| 655 | } |
| 656 | |
| 657 | '@phan-var Element $start'; // @var Element $start |
| 658 | '@phan-var Element $end'; // @var Element $end |
| 659 | |
| 660 | // Add new OR update existing range |
| 661 | if ( $start->hasAttribute( 'about' ) ) { |
| 662 | // Overlaps with an existing range. |
| 663 | $about = DOMCompat::getAttribute( $start, 'about' ); |
| 664 | if ( !$end->hasAttribute( 'about' ) ) { |
| 665 | // Extend existing range till $end |
| 666 | $secRanges[$about]['end'] = $end; |
| 667 | $end->setAttribute( 'about', $about ); |
| 668 | } else { |
| 669 | Assert::invariant( DOMCompat::getAttribute( $end, 'about' ) === $about, |
| 670 | "Expected end-range about id to be $about instead of " . |
| 671 | DOMCompat::getAttribute( $end, 'about' ) . " in the overlap scenario." ); |
| 672 | } |
| 673 | } else { |
| 674 | // Check for nesting in another range. Since $start and $end |
| 675 | // are siblings, this is sufficient to know the entire range |
| 676 | // is nested |
| 677 | $about = null; |
| 678 | $n = $start->parentNode; |
| 679 | $body = DOMCompat::getBody( $start->ownerDocument ); |
| 680 | while ( $n !== $body ) { |
| 681 | '@phan-var Element $n'; // @var Element $n |
| 682 | if ( self::isParsoidSection( $n ) && $n->hasAttribute( 'about' ) ) { |
| 683 | $about = DOMCompat::getAttribute( $n, 'about' ); |
| 684 | break; |
| 685 | } |
| 686 | $n = $n->parentNode; |
| 687 | } |
| 688 | |
| 689 | if ( !$about ) { |
| 690 | // Not overlapping, not nested => new range |
| 691 | $about = $this->env->newAboutId(); |
| 692 | $start->setAttribute( 'about', $about ); |
| 693 | $end->setAttribute( 'about', $about ); |
| 694 | $secRanges[$about] = [ 'start' => $start, 'end' => $end, 'encapWrappers' => [] ]; |
| 695 | } |
| 696 | } |
| 697 | $secRanges[$about]['encapWrappers'][] = $tplInfo->first; |
| 698 | } |
| 699 | |
| 700 | // Process recorded ranges into new encapsulation information |
| 701 | // that spans all content in that range. |
| 702 | foreach ( $secRanges as $about => $range ) { |
| 703 | // Ensure that all top level nodes of the range have the same about id |
| 704 | for ( $n = $range['start']; $n !== $range['end']->nextSibling; $n = $n->nextSibling ) { |
| 705 | Assert::invariant( self::isParsoidSection( $n ), |
| 706 | "Encountered non-Parsoid-section node (" . |
| 707 | DOMCompat::nodeName( $n ) . |
| 708 | ") while updating template wrappers" ); |
| 709 | $n->setAttribute( 'about', $about ); |
| 710 | } |
| 711 | |
| 712 | $dsr1 = $this->getDSR( $range['start'], true ); // Traverses non-tpl content => will succeed |
| 713 | $dsr2 = $this->getDSR( $range['end'], false ); // Traverses non-tpl content => will succeed |
| 714 | $dp = new DataParsoid; |
| 715 | $dp->dsr = new DomSourceRange( $dsr1, $dsr2, null, null ); |
| 716 | DOMDataUtils::setDataParsoid( $range['start'], $dp ); |
| 717 | |
| 718 | $this->collapseWrappers( $range['start'], $range['encapWrappers'] ); |
| 719 | } |
| 720 | } |
| 721 | |
| 722 | private function convertTOCOffsets() { |
| 723 | // Create reference array from all the codepointOffsets |
| 724 | $offsets = []; |
| 725 | foreach ( $this->env->getTOCData()->getSections() as $section ) { |
| 726 | if ( $section->codepointOffset !== null ) { |
| 727 | $offsets[] = &$section->codepointOffset; |
| 728 | } |
| 729 | } |
| 730 | TokenUtils::convertOffsets( |
| 731 | $this->env->topFrame->getSrcText(), |
| 732 | $this->env->getCurrentOffsetType(), |
| 733 | 'char', |
| 734 | $offsets |
| 735 | ); |
| 736 | } |
| 737 | |
| 738 | /** |
| 739 | * In core, Parser.php adds a TOC marker before the *first* heading element |
| 740 | * independent of how that heading element is nested. In the common case, |
| 741 | * that insertion point corresponds to the last element of the lead section |
| 742 | * as computed by section wrapping code in this file. In the edge case, when |
| 743 | * a <div> wraps the heading, the insertion point lies inside the <div> and |
| 744 | * has no relation to the lead section. |
| 745 | */ |
| 746 | private static function findTOCInsertionPoint( Node $elt ): ?Element { |
| 747 | while ( $elt ) { |
| 748 | // Ignore extension content while finding TOC insertion point |
| 749 | if ( WTUtils::isFirstExtensionWrapperNode( $elt ) ) { |
| 750 | $elt = WTUtils::skipOverEncapsulatedContent( $elt ); |
| 751 | continue; |
| 752 | } |
| 753 | if ( $elt instanceof Element ) { |
| 754 | if ( DOMUtils::isHeading( $elt ) ) { |
| 755 | return $elt; |
| 756 | } elseif ( $elt->firstChild ) { |
| 757 | $tocIP = self::findTOCInsertionPoint( $elt->firstChild ); |
| 758 | if ( $tocIP ) { |
| 759 | return $tocIP; |
| 760 | } |
| 761 | } |
| 762 | } |
| 763 | $elt = $elt->nextSibling; |
| 764 | } |
| 765 | return null; |
| 766 | } |
| 767 | |
| 768 | /** |
| 769 | * Insert a synthetic section in which to place the TOC |
| 770 | */ |
| 771 | private function insertSyntheticSection( |
| 772 | Element $syntheticTocMeta, Element $insertionPoint |
| 773 | ): Element { |
| 774 | $prev = $insertionPoint->previousSibling; |
| 775 | |
| 776 | // Create a pseudo-section contaning the TOC |
| 777 | $syntheticTocSection = $this->doc->createElement( 'section' ); |
| 778 | $syntheticTocSection->setAttribute( 'data-mw-section-id', '-2' ); |
| 779 | $insertionPoint->parentNode->insertBefore( $syntheticTocSection, $insertionPoint ); |
| 780 | $this->pseudoSectionCount++; |
| 781 | $syntheticTocSection->appendChild( $syntheticTocMeta ); |
| 782 | |
| 783 | // Ensure template continuity is not broken! |
| 784 | // If $prev is not an encapsulation wrapper, nothing to do! |
| 785 | if ( $prev && WTUtils::isEncapsulationWrapper( $prev ) ) { |
| 786 | '@phan-var Element $prev'; |
| 787 | $prevAbout = DOMCompat::getAttribute( $prev, 'about' ); |
| 788 | |
| 789 | // First, handle the case of section-tag-stripping that VE does. |
| 790 | // So, find the leftmost non-section-wrapper node since we want |
| 791 | // If the about ids are different, $next & $prev belong to |
| 792 | // different transclusions and the TOC meta can be left alone. |
| 793 | $next = $insertionPoint->firstChild; |
| 794 | $nextAbout = $next instanceof Element ? DOMCompat::getAttribute( $next, 'about' ) : null; |
| 795 | if ( $prevAbout === $nextAbout ) { |
| 796 | $syntheticTocMeta->setAttribute( 'about', $prevAbout ); |
| 797 | } |
| 798 | |
| 799 | // Now handle case of section-tags not being stripped |
| 800 | // NOTE that $syntheticMeta is before $insertipnPoint |
| 801 | // If it is not-null, it is known to be a <section>. |
| 802 | $next = $insertionPoint; |
| 803 | '@phan-var Element $next'; |
| 804 | $nextAbout = $next ? DOMCompat::getAttribute( $next, 'about' ) : null; |
| 805 | if ( $prevAbout === $nextAbout ) { |
| 806 | $syntheticTocSection->setAttribute( 'about', $prevAbout ); |
| 807 | } |
| 808 | } |
| 809 | |
| 810 | return $syntheticTocSection; |
| 811 | } |
| 812 | |
| 813 | private function addSyntheticTOCMarker(): void { |
| 814 | // Add a synthetic TOC at the end of the first section, if necessary |
| 815 | $tocBS = $this->env->getBehaviorSwitch( 'toc' ); |
| 816 | $noTocBS = $this->env->getBehaviorSwitch( 'notoc' ); |
| 817 | $forceTocBS = $this->env->getBehaviorSwitch( 'forcetoc' ); |
| 818 | |
| 819 | $showToc = true; |
| 820 | if ( $noTocBS && !$tocBS ) { |
| 821 | $showToc = false; |
| 822 | } |
| 823 | $numHeadings = $this->count - 1 - $this->pseudoSectionCount; // $this->count is initialized to 1 |
| 824 | $enoughToc = $showToc && ( $numHeadings >= 4 || $tocBS ); |
| 825 | if ( $forceTocBS ) { |
| 826 | $showToc = true; |
| 827 | $enoughToc = true; |
| 828 | } |
| 829 | if ( $numHeadings == 0 ) { |
| 830 | $enoughToc = false; |
| 831 | } |
| 832 | |
| 833 | if ( !$this->env->getPageConfig()->getSuppressTOC() ) { |
| 834 | if ( $enoughToc ) { |
| 835 | // ParserOutputFlags::SHOW_TOC |
| 836 | $this->env->getMetadata()->setOutputFlag( 'show-toc' ); |
| 837 | if ( !$tocBS ) { |
| 838 | $syntheticTocMeta = $this->doc->createElement( 'meta' ); |
| 839 | $syntheticTocMeta->setAttribute( 'property', 'mw:PageProp/toc' ); |
| 840 | $dmw = DOMDataUtils::getDataMw( $syntheticTocMeta ); |
| 841 | $dmw->autoGenerated = true; |
| 842 | $tocIP = $this->findTOCInsertionPoint( DOMCompat::getBody( $this->doc ) ); |
| 843 | if ( $tocIP === null ) { |
| 844 | // should not happen, but nothing to do here! |
| 845 | return; |
| 846 | } |
| 847 | |
| 848 | // NOTE: Given how <section>s are computed in this file, headings |
| 849 | // will never have previous siblings. So, we look at $eltSection's |
| 850 | // previous siblings always. |
| 851 | $insertionPoint = self::findSectionAncestor( $tocIP ); |
| 852 | |
| 853 | $insertionContainer = $insertionPoint->previousSibling; |
| 854 | if ( !$insertionContainer || DOMCompat::nodeName( $insertionContainer ) !== 'section' ) { |
| 855 | $insertionContainer = $this->insertSyntheticSection( |
| 856 | $syntheticTocMeta, $insertionPoint |
| 857 | ); |
| 858 | } |
| 859 | $insertionContainer->appendChild( $syntheticTocMeta ); |
| 860 | |
| 861 | // Set a synthetic zero-length dsr to suppress noisy warnings |
| 862 | // from the round trip testing script. |
| 863 | $syntheticOffset = DOMDataUtils::getDataParsoid( $tocIP )->dsr->start ?? null; |
| 864 | if ( $syntheticOffset !== null ) { |
| 865 | $dp = DOMDataUtils::getDataParsoid( $syntheticTocMeta ); |
| 866 | $dp->dsr = new DomSourceRange( $syntheticOffset, $syntheticOffset, 0, 0 ); |
| 867 | } |
| 868 | } |
| 869 | } |
| 870 | if ( !$showToc ) { |
| 871 | // ParserOutputFlags::NO_TOC |
| 872 | $this->env->getMetadata()->setOutputFlag( 'no-toc' ); |
| 873 | } |
| 874 | } |
| 875 | } |
| 876 | |
| 877 | /** Transfer information about section links from behaviour switches to CMC */ |
| 878 | private function addSectionInfo() { |
| 879 | $newSectionLink = $this->env->getBehaviorSwitch( 'newsectionlink' ); |
| 880 | if ( $newSectionLink !== null ) { |
| 881 | // ParserOutputFlags::NEW_SECTION |
| 882 | $this->env->getMetadata()->setOutputFlag( |
| 883 | 'mw-NewSection', $newSectionLink |
| 884 | ); |
| 885 | } |
| 886 | $noNewSectionLink = $this->env->getBehaviorSwitch( 'nonewsectionlink' ); |
| 887 | if ( $noNewSectionLink !== null ) { |
| 888 | // ParserOutputFlags::HIDE_NEW_SECTION |
| 889 | $this->env->getMetadata()->setOutputFlag( |
| 890 | 'mw-HideNewSection', $noNewSectionLink |
| 891 | ); |
| 892 | } |
| 893 | $noEditSection = $this->env->getBehaviorSwitch( 'noeditsection' ); |
| 894 | if ( $noEditSection !== null ) { |
| 895 | // ParserOutputFlags::NO_SECTION_EDIT_LINKS |
| 896 | $this->env->getMetadata()->setOutputFlag( |
| 897 | 'no-section-edit-links', $noEditSection |
| 898 | ); |
| 899 | } |
| 900 | } |
| 901 | |
| 902 | /** |
| 903 | * DOM Postprocessor entry function to walk DOM rooted at $root |
| 904 | * and add <section> wrappers as necessary. |
| 905 | * Implements the algorithm documented @ mw:Parsing/Notes/Section_Wrapping |
| 906 | */ |
| 907 | public function run(): void { |
| 908 | // 6 is the lowest possible level since we don't want |
| 909 | // any nesting of h-tags in the lead section |
| 910 | $leadSection = new Section( 6, 0, $this->doc ); |
| 911 | $leadSection->setId( 0 ); |
| 912 | |
| 913 | $this->wrapSectionsInDOM( $leadSection, $this->rootNode ); |
| 914 | |
| 915 | // There will always be a lead section, even if sometimes it only |
| 916 | // contains whitespace + comments. |
| 917 | $this->rootNode->insertBefore( $leadSection->container, $this->rootNode->firstChild ); |
| 918 | |
| 919 | // Resolve template conflicts after all sections have been added to the DOM |
| 920 | $this->resolveTplExtSectionConflicts(); |
| 921 | |
| 922 | // Convert byte offsets to codepoint offsets in TOCData |
| 923 | // (done in a batch to avoid O(N^2) string traversals) |
| 924 | $this->convertTOCOffsets(); |
| 925 | |
| 926 | $this->addSyntheticTOCMarker(); |
| 927 | |
| 928 | $this->addSectionInfo(); |
| 929 | } |
| 930 | } |