Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 125 |
|
0.00% |
0 / 12 |
CRAP | |
0.00% |
0 / 1 |
AnnotationDOMRangeBuilder | |
0.00% |
0 / 125 |
|
0.00% |
0 / 12 |
2550 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
wrapAnnotationsInTree | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
56 | |||
makeUneditable | |
0.00% |
0 / 40 |
|
0.00% |
0 / 1 |
272 | |||
moveRangeStart | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 | |||
moveRangeEnd | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
42 | |||
isExtended | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
setMetaDataMwForRange | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
matchMetaType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
verifyTplInfoExpectation | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getRangeId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
updateDSRForFirstRangeNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
42 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\PP\Processors; |
5 | |
6 | use SplObjectStorage; |
7 | use Wikimedia\Parsoid\Core\DomSourceRange; |
8 | use Wikimedia\Parsoid\DOM\Document; |
9 | use Wikimedia\Parsoid\DOM\Element; |
10 | use Wikimedia\Parsoid\DOM\Node; |
11 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
12 | use Wikimedia\Parsoid\NodeData\TempData; |
13 | use Wikimedia\Parsoid\NodeData\TemplateInfo; |
14 | use Wikimedia\Parsoid\Utils\DOMCompat; |
15 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
16 | use Wikimedia\Parsoid\Utils\DOMUtils; |
17 | use Wikimedia\Parsoid\Utils\WTUtils; |
18 | use Wikimedia\Parsoid\Wt2Html\Frame; |
19 | |
20 | /** |
21 | * The handling of annotation ranges and transclusion ranges are somewhat different for a number of reasons. |
22 | * - Annotation ranges can be (and typically are) nested: we want to handle a <tvar> range inside a <translate> |
23 | * range (whereas non-top-level transclusions are ignored). That said, this only applies to annotations of |
24 | * different types, so finding/handling top-level ranges of a given type is useful (hence extending the |
25 | * DOMRangeBuilder, still.) |
26 | * |
27 | * - Annotation ranges are not represented in the final document in the same way as transclusions. In an ideal |
28 | * world, annotations are well-nested and the corresponding range is not extended; in this case, the annotation |
29 | * range is only delimited by a pair of <meta> tags (that can then be displayed by VE, or ignored by |
30 | * read-views). The annotated content stays editable; whereas editing of templated content is always prevented. |
31 | * |
32 | * - Relatedly: annotation meta tags are NOT removed from the output (whereas transclusion meta tags are an |
33 | * intermediary state). This has an impact on fostering. It is safe to bypass the fostering of meta tags in the |
34 | * template case, because the meta tags will disappear anyway, and their presence in a fostering position only |
35 | * marks the whole table as template content. Annotation tags do not benefit from the same leeway: they will need |
36 | * to be moved in the right place (and, for end tags, "the right place" means the end of the table, not the start |
37 | * of the table - which we can handle more consistently if the meta tag ends up in the FosterBox). Hence, |
38 | * there is little reason to not use the general fostering pass for annotation meta tags as well (except for |
39 | * the consistency with transclusion meta tags). |
40 | * |
41 | * The assumptions here are consequently as follows: |
42 | * - annotation <meta> tags are not in a fosterable position (they have been moved out of it in the |
43 | * TreeBuilderStage) |
44 | * - during the MarkFosteredContent pass, end annotation meta tags are moved from the foster box to after the |
45 | * table. |
46 | * This should guarantee that no range is reversed (so that's a case we do not have to worry about). |
47 | */ |
48 | class AnnotationDOMRangeBuilder extends DOMRangeBuilder { |
49 | /** @var MigrateTrailingNLs */ |
50 | private $migrateTrailingNls; |
51 | |
52 | /** |
53 | * AnnotationDOMRangeBuilder constructor. |
54 | * @param Document $document |
55 | * @param Frame $frame |
56 | */ |
57 | public function __construct( Document $document, Frame $frame ) { |
58 | parent::__construct( $document, $frame ); |
59 | $this->traceType = "annwrap"; |
60 | $this->migrateTrailingNls = new MigrateTrailingNLs(); |
61 | } |
62 | |
63 | private function wrapAnnotationsInTree( array $annRanges ): void { |
64 | foreach ( $annRanges as $range ) { |
65 | if ( $range->startElem !== $range->start ) { |
66 | $this->moveRangeStart( $range, $range->start ); |
67 | } |
68 | if ( $range->endElem !== $range->end ) { |
69 | $this->moveRangeEnd( $range, $range->end ); |
70 | } |
71 | |
72 | // It can happen that marking range uneditable adds another layer of nesting that is not captured |
73 | // by the initial range detection (since it's not there at that time). To avoid that, we check whether |
74 | // both nodes have the same parent and, if not, we hoist them to a common ancestor. |
75 | $startParent = DOMCompat::getParentElement( $range->start ); |
76 | $endParent = DOMCompat::getParentElement( $range->end ); |
77 | if ( $startParent !== $endParent ) { |
78 | $correctedRange = self::findEnclosingRange( $range->start, $range->end ); |
79 | if ( $range->start !== $correctedRange->start ) { |
80 | $this->moveRangeStart( $range, $correctedRange->start ); |
81 | } |
82 | if ( $range->end !== $correctedRange->end ) { |
83 | $this->moveRangeEnd( $range, $correctedRange->end ); |
84 | } |
85 | } |
86 | } |
87 | } |
88 | |
89 | /** |
90 | * Makes the DOM range between $range->startElem and $range->endElem uneditable by wrapping |
91 | * it into a <div> (for block ranges) or <span> (for inline ranges) with the mw:ExtendedAnnRange |
92 | * type. |
93 | * @param DOMRangeInfo $range |
94 | * @param int|null $actualRangeStart |
95 | * @param int|null $actualRangeEnd |
96 | */ |
97 | private function makeUneditable( DOMRangeInfo $range, ?int $actualRangeStart, ?int $actualRangeEnd ) { |
98 | $parent = $range->startElem->parentNode; |
99 | |
100 | $node = $range->startElem; |
101 | $inline = true; |
102 | while ( $node !== $range->endElem && $node !== null ) { |
103 | if ( DOMUtils::hasBlockTag( $node ) ) { |
104 | $inline = false; |
105 | break; |
106 | } |
107 | $node = $node->nextSibling; |
108 | } |
109 | if ( $inline && $node !== null && DOMUtils::hasBlockTag( $node ) ) { |
110 | $inline = false; |
111 | } |
112 | |
113 | $wrap = $parent->ownerDocument->createElement( $inline ? 'span' : 'div' ); |
114 | $parent->insertBefore( $wrap, $range->startElem ); |
115 | |
116 | $toMove = $range->startElem; |
117 | while ( $toMove !== $range->endElem && $toMove !== null ) { |
118 | $nextToMove = $toMove->nextSibling; |
119 | $wrap->appendChild( $toMove ); |
120 | $toMove = $nextToMove; |
121 | } |
122 | |
123 | if ( $toMove !== null ) { |
124 | $wrap->appendChild( $toMove ); |
125 | } else { |
126 | $this->env->log( 'warn', "End of annotation range [$actualRangeStart, $actualRangeEnd] not found. " . |
127 | "Document marked uneditable until its end." ); |
128 | } |
129 | |
130 | $wrap->setAttribute( "typeof", "mw:ExtendedAnnRange" ); |
131 | |
132 | // Ensure template continuity is not broken |
133 | $about = DOMCompat::getAttribute( $range->startElem, "about" ); |
134 | $continuity = ( |
135 | ( |
136 | DOMCompat::getPreviousElementSibling( $range->startElem ) && |
137 | DOMCompat::getPreviousElementSibling( $range->startElem )->hasAttribute( "about" ) |
138 | ) || |
139 | ( DOMCompat::getNextElementSibling( $range->endElem ) && |
140 | DOMCompat::getNextElementSibling( $range->endElem )->hasAttribute( "about" ) |
141 | ) |
142 | ); |
143 | if ( $about && $continuity ) { |
144 | $wrap->setAttribute( "about", $about ); |
145 | } |
146 | $dp = new DataParsoid(); |
147 | $dp->autoInsertedStart = true; |
148 | $dp->autoInsertedEnd = true; |
149 | $dp->dsr = new DomSourceRange( $actualRangeStart, $actualRangeEnd, 0, 0 ); |
150 | DOMDataUtils::setDataParsoid( $wrap, $dp ); |
151 | $openRanges = []; |
152 | } |
153 | |
154 | /** |
155 | * Moves the start of the range to the designated node |
156 | * @param DOMRangeInfo $range the range to modify |
157 | * @param Node $node the new start of the range |
158 | */ |
159 | private function moveRangeStart( DOMRangeInfo $range, Node $node ): void { |
160 | $startMeta = $range->startElem; |
161 | $startDataParsoid = DOMDataUtils::getDataParsoid( $startMeta ); |
162 | if ( $node instanceof Element ) { |
163 | if ( DOMCompat::nodeName( $node ) === "p" && $node->firstChild === $startMeta ) { |
164 | // If the first child of "p" is the meta, and it gets moved, then it got mistakenly |
165 | // pulled inside the paragraph, and the paragraph dsr that gets computed includes |
166 | // it - which may lead to the tag getting duplicated on roundtrip. Hence, we |
167 | // adjust the dsr of the paragraph in that case. We also don't consider the meta |
168 | // tag to have been moved in that case. |
169 | $pDataParsoid = DOMDataUtils::getDataParsoid( $node ); |
170 | $pDataParsoid->dsr->start = $startDataParsoid->dsr->end; |
171 | } else { |
172 | $startDataParsoid->wasMoved = true; |
173 | } |
174 | } |
175 | $node = $this->getStartConsideringFosteredContent( $node ); |
176 | $node->parentNode->insertBefore( $startMeta, $node ); |
177 | if ( $node instanceof Element ) { |
178 | // Ensure template continuity is not broken |
179 | $about = DOMCompat::getAttribute( $node, "about" ); |
180 | if ( $about !== null ) { |
181 | $startMeta->setAttribute( "about", $about ); |
182 | } |
183 | } |
184 | $range->start = $startMeta; |
185 | } |
186 | |
187 | /** |
188 | * Moves the start of the range to the designated node |
189 | * @param DOMRangeInfo $range the range to modify |
190 | * @param Node $node the new start of the range |
191 | */ |
192 | private function moveRangeEnd( DOMRangeInfo $range, Node $node ): void { |
193 | $endMeta = $range->endElem; |
194 | $endDataParsoid = DOMDataUtils::getDataParsoid( $endMeta ); |
195 | |
196 | if ( $node instanceof Element ) { |
197 | $endMetaWasLastChild = $node->lastChild === $endMeta; |
198 | |
199 | // Migrate $endMeta and ensure template continuity is not broken |
200 | $node->parentNode->insertBefore( $endMeta, $node->nextSibling ); |
201 | $about = DOMCompat::getAttribute( $node, "about" ); |
202 | if ( $about !== null ) { |
203 | $endMeta->setAttribute( "about", $about ); |
204 | } |
205 | |
206 | if ( ( DOMCompat::nodeName( $node ) === "p" ) && $endMetaWasLastChild ) { |
207 | // If the last child of "p" is the meta, and it gets moved, then it got mistakenly |
208 | // pulled inside the paragraph, and the paragraph dsr that gets computed includes |
209 | // it - which may lead to the tag getting duplicated on roundtrip. Hence, we |
210 | // adjust the dsr of the paragraph in that case. We also don't consider the meta |
211 | // tag to have been moved in that case. |
212 | $pDataParsoid = DOMDataUtils::getDataParsoid( $node ); |
213 | $pDataParsoid->dsr->end = $endDataParsoid->dsr->start; |
214 | $prevLength = strlen( $node->textContent ?? '' ); |
215 | $this->migrateTrailingNls->doMigrateTrailingNLs( $node, $this->env ); |
216 | $newLength = strlen( $node->textContent ?? '' ); |
217 | if ( $prevLength != $newLength ) { |
218 | $pDataParsoid->dsr->end -= ( $prevLength - $newLength ); |
219 | } |
220 | } else { |
221 | $endDataParsoid->wasMoved = true; |
222 | DOMDataUtils::setDataParsoid( $endMeta, $endDataParsoid ); |
223 | } |
224 | } |
225 | $range->end = $endMeta; |
226 | } |
227 | |
228 | /** |
229 | * Returns whether one of the ends of the range has been moved, which corresponds to an extended |
230 | * range. |
231 | * @param DOMRangeInfo $range |
232 | * @return bool |
233 | */ |
234 | private function isExtended( DOMRangeInfo $range ): bool { |
235 | if ( $range->extendedByOverlapMerge ) { |
236 | return true; |
237 | } |
238 | |
239 | $startDataParsoid = DOMDataUtils::getDataParsoid( $range->startElem ); |
240 | $endDataParsoid = DOMDataUtils::getDataParsoid( $range->endElem ); |
241 | |
242 | return ( $startDataParsoid->wasMoved ?? false ) || ( $endDataParsoid->wasMoved ?? false ); |
243 | } |
244 | |
245 | /** |
246 | * Sets the data-mw attribute for meta tags of the provided range |
247 | * @param DOMRangeInfo $range range whose start and end element needs to be to modified |
248 | * @param bool $isExtended whether the range got extended |
249 | */ |
250 | private function setMetaDataMwForRange( DOMRangeInfo $range, bool $isExtended ): void { |
251 | $startDataMw = DOMDataUtils::getDataMw( $range->startElem ); |
252 | $endDataMw = DOMDataUtils::getDataMw( $range->endElem ); |
253 | |
254 | $startDataMw->extendedRange = $isExtended; |
255 | $startDataMw->wtOffsets = DOMDataUtils::getDataParsoid( $range->startElem )->tsr; |
256 | $endDataMw->wtOffsets = DOMDataUtils::getDataParsoid( $range->endElem )->tsr; |
257 | unset( $endDataMw->rangeId ); |
258 | } |
259 | |
260 | /** |
261 | * Returns the meta type of the element if it exists and matches the type expected by the |
262 | * current class, null otherwise |
263 | * @param Element $elem the element to check |
264 | * @return string|null |
265 | */ |
266 | protected function matchMetaType( Element $elem ): ?string { |
267 | // for this class we're interested in the annotation type |
268 | return WTUtils::matchAnnotationMeta( $elem ); |
269 | } |
270 | |
271 | /** @inheritDoc */ |
272 | protected function verifyTplInfoExpectation( ?TemplateInfo $templateInfo, TempData $tmp ): void { |
273 | // Annotations aren't templates. Nothing to do. |
274 | } |
275 | |
276 | /** |
277 | * Returns the range ID of a node - in the case of annotations, the "rangeId" property |
278 | * of its "data-mw" attribute. |
279 | * @param Element $node |
280 | * @return string |
281 | */ |
282 | protected function getRangeId( Element $node ): string { |
283 | return DOMDataUtils::getDataMw( $node )->rangeId ?? ''; |
284 | } |
285 | |
286 | /** |
287 | * @inheritDoc |
288 | */ |
289 | protected function updateDSRForFirstRangeNode( Element $target, Element $source ): void { |
290 | // nop |
291 | } |
292 | |
293 | public function execute( Node $root ): void { |
294 | try { |
295 | $annRanges = $this->findWrappableMetaRanges( $root ); |
296 | } catch ( RangeBuilderException $e ) { |
297 | $this->env->log( 'warn', 'The annotation ranges could not be fully detected. ' . |
298 | ' Annotation processing cancelled. ' ); |
299 | return; |
300 | } |
301 | |
302 | $rangesByType = []; |
303 | foreach ( $annRanges as $range ) { |
304 | $annType = WTUtils::extractAnnotationType( $range->startElem ); |
305 | $rangesByType[$annType] ??= []; |
306 | $rangesByType[$annType][] = $range; |
307 | } |
308 | |
309 | foreach ( $rangesByType as $singleTypeRange ) { |
310 | $this->nodeRanges = new SplObjectStorage; |
311 | $topRanges = $this->findTopLevelNonOverlappingRanges( $root, $singleTypeRange ); |
312 | $this->wrapAnnotationsInTree( $topRanges ); |
313 | foreach ( $topRanges as $range ) { |
314 | $actualRangeStart = DOMDataUtils::getDataParsoid( $range->start )->dsr->start; |
315 | $actualRangeEnd = DOMDataUtils::getDataParsoid( $range->end )->dsr->end; |
316 | $isExtended = $this->isExtended( $range ); |
317 | if ( $isExtended ) { |
318 | $this->makeUneditable( $range, $actualRangeStart, $actualRangeEnd ); |
319 | } |
320 | $this->setMetaDataMwForRange( $range, $isExtended ); |
321 | } |
322 | } |
323 | } |
324 | } |