Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 129 |
|
0.00% |
0 / 12 |
CRAP | |
0.00% |
0 / 1 |
AnnotationDOMRangeBuilder | |
0.00% |
0 / 129 |
|
0.00% |
0 / 12 |
2352 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
wrapAnnotationsInTree | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
56 | |||
makeUneditable | |
0.00% |
0 / 46 |
|
0.00% |
0 / 1 |
210 | |||
moveRangeStart | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 | |||
moveRangeEnd | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
42 | |||
isExtended | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
setMetaDataMwForRange | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
matchMetaType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
verifyTplInfoExpectation | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getRangeId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
updateDSRForFirstRangeNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
42 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Processors; |
5 | |
6 | use SplObjectStorage; |
7 | use Wikimedia\Parsoid\Core\DomSourceRange; |
8 | use Wikimedia\Parsoid\DOM\Document; |
9 | use Wikimedia\Parsoid\DOM\Element; |
10 | use Wikimedia\Parsoid\DOM\Node; |
11 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
12 | use Wikimedia\Parsoid\NodeData\TempData; |
13 | use Wikimedia\Parsoid\NodeData\TemplateInfo; |
14 | use Wikimedia\Parsoid\Utils\DOMCompat; |
15 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
16 | use Wikimedia\Parsoid\Utils\DOMUtils; |
17 | use Wikimedia\Parsoid\Utils\WTUtils; |
18 | use Wikimedia\Parsoid\Wt2Html\Frame; |
19 | |
20 | /** |
21 | * The handling of annotation ranges and transclusion ranges are somewhat different for a number of reasons. |
22 | * - Annotation ranges can be (and typically are) nested: we want to handle a <tvar> range inside a <translate> |
23 | * range (whereas non-top-level transclusions are ignored). That said, this only applies to annotations of |
24 | * different types, so finding/handling top-level ranges of a given type is useful (hence extending the |
25 | * DOMRangeBuilder, still.) |
26 | * |
27 | * - Annotation ranges are not represented in the final document in the same way as transclusions. In an ideal |
28 | * world, annotations are well-nested and the corresponding range is not extended; in this case, the annotation |
29 | * range is only delimited by a pair of <meta> tags (that can then be displayed by VE, or ignored by |
30 | * read-views). The annotated content stays editable; whereas editing of templated content is always prevented. |
31 | * |
32 | * - Relatedly: annotation meta tags are NOT removed from the output (whereas transclusion meta tags are an |
33 | * intermediary state). This has an impact on fostering. It is safe to bypass the fostering of meta tags in the |
34 | * template case, because the meta tags will disappear anyway, and their presence in a fostering position only |
35 | * marks the whole table as template content. Annotation tags do not benefit from the same leeway: they will need |
36 | * to be moved in the right place (and, for end tags, "the right place" means the end of the table, not the start |
37 | * of the table - which we can handle more consistently if the meta tag ends up in the FosterBox). Hence, |
38 | * there is little reason to not use the general fostering pass for annotation meta tags as well (except for |
39 | * the consistency with transclusion meta tags). |
40 | * |
41 | * The assumptions here are consequently as follows: |
42 | * - annotation <meta> tags are not in a fosterable position (they have been moved out of it in the |
43 | * TreeBuilderStage) |
44 | * - during the MarkFosteredContent pass, end annotation meta tags are moved from the foster box to after the |
45 | * table. |
46 | * This should guarantee that no range is reversed (so that's a case we do not have to worry about). |
47 | */ |
48 | class AnnotationDOMRangeBuilder extends DOMRangeBuilder { |
49 | /** @var MigrateTrailingNLs */ |
50 | private $migrateTrailingNls; |
51 | |
52 | /** |
53 | * AnnotationDOMRangeBuilder constructor. |
54 | * @param Document $document |
55 | * @param Frame $frame |
56 | */ |
57 | public function __construct( Document $document, Frame $frame ) { |
58 | parent::__construct( $document, $frame ); |
59 | $this->traceType = "annwrap"; |
60 | $this->migrateTrailingNls = new MigrateTrailingNLs(); |
61 | } |
62 | |
63 | private function wrapAnnotationsInTree( array $annRanges ): void { |
64 | foreach ( $annRanges as $range ) { |
65 | if ( $range->startElem !== $range->start ) { |
66 | $this->moveRangeStart( $range, $range->start ); |
67 | } |
68 | if ( $range->endElem !== $range->end ) { |
69 | $this->moveRangeEnd( $range, $range->end ); |
70 | } |
71 | |
72 | // It can happen that marking range uneditable adds another layer of nesting that is not captured |
73 | // by the initial range detection (since it's not there at that time). To avoid that, we check whether |
74 | // both nodes have the same parent and, if not, we hoist them to a common ancestor. |
75 | $startParent = DOMCompat::getParentElement( $range->start ); |
76 | $endParent = DOMCompat::getParentElement( $range->end ); |
77 | if ( $startParent !== $endParent ) { |
78 | // Post-moves above, start/end have been set to the respective metas |
79 | $correctedRange = self::findEnclosingRange( $range->start, $range->end ); |
80 | if ( $range->start !== $correctedRange->start ) { |
81 | $this->moveRangeStart( $range, $correctedRange->start ); |
82 | } |
83 | if ( $range->end !== $correctedRange->end ) { |
84 | $this->moveRangeEnd( $range, $correctedRange->end ); |
85 | } |
86 | } |
87 | } |
88 | } |
89 | |
90 | /** |
91 | * Makes the DOM range between $range->startElem and $range->endElem uneditable by wrapping |
92 | * it into a <div> (for block ranges) or <span> (for inline ranges) with the mw:ExtendedAnnRange |
93 | * type. |
94 | * @param DOMRangeInfo $range |
95 | */ |
96 | private function makeUneditable( DOMRangeInfo $range ) { |
97 | $startMeta = $range->startElem; |
98 | $endMeta = $range->endElem; |
99 | |
100 | $actualRangeStart = DOMDataUtils::getDataParsoid( $startMeta )->dsr->start; |
101 | $actualRangeEnd = DOMDataUtils::getDataParsoid( $endMeta )->dsr->end; |
102 | |
103 | $inline = true; |
104 | $node = $startMeta; |
105 | while ( true ) { |
106 | if ( $node === null ) { |
107 | // Start and end aren't siblings, we'll log an error below |
108 | break; |
109 | } |
110 | if ( DOMUtils::hasBlockTag( $node ) ) { |
111 | $inline = false; |
112 | break; |
113 | } |
114 | if ( $node === $endMeta ) { |
115 | break; |
116 | } |
117 | $node = $node->nextSibling; |
118 | } |
119 | |
120 | $wrap = $startMeta->ownerDocument->createElement( $inline ? 'span' : 'div' ); |
121 | $wrap->setAttribute( "typeof", "mw:ExtendedAnnRange" ); |
122 | $startMeta->parentNode->insertBefore( $wrap, $startMeta ); |
123 | |
124 | $node = $startMeta; |
125 | while ( true ) { |
126 | if ( $node === null ) { |
127 | $this->env->log( |
128 | 'warn', |
129 | "End of annotation range [$actualRangeStart, $actualRangeEnd] not found. " . |
130 | "Document marked uneditable until its end." |
131 | ); |
132 | break; |
133 | } |
134 | $next = $node->nextSibling; |
135 | $wrap->appendChild( $node ); |
136 | if ( $node === $endMeta ) { |
137 | break; |
138 | } |
139 | $node = $next; |
140 | } |
141 | |
142 | // Ensure template continuity is not broken |
143 | // FIXME: What about if the endMeta has an about id? Even though |
144 | // annotations don't come from template, template ranges can subsume |
145 | // them by adding strings to their "parts". |
146 | $about = DOMCompat::getAttribute( $startMeta, "about" ); |
147 | $previousElt = DOMCompat::getPreviousElementSibling( $startMeta ); |
148 | $nextElt = DOMCompat::getNextElementSibling( $endMeta ); |
149 | $continuity = ( |
150 | ( $previousElt && $previousElt->hasAttribute( "about" ) ) || |
151 | ( $nextElt && $nextElt->hasAttribute( "about" ) ) |
152 | ); |
153 | if ( $about && $continuity ) { |
154 | $wrap->setAttribute( "about", $about ); |
155 | } |
156 | |
157 | // FIXME: If we're adding an about id, we need to fixup the dsr |
158 | // on the template to include any range we may be adding. |
159 | $dp = new DataParsoid(); |
160 | $dp->autoInsertedStart = true; |
161 | $dp->autoInsertedEnd = true; |
162 | $dp->dsr = new DomSourceRange( $actualRangeStart, $actualRangeEnd, 0, 0 ); |
163 | DOMDataUtils::setDataParsoid( $wrap, $dp ); |
164 | } |
165 | |
166 | /** |
167 | * Moves the start of the range to the designated node |
168 | * @param DOMRangeInfo $range the range to modify |
169 | * @param Node $node the new start of the range |
170 | */ |
171 | private function moveRangeStart( DOMRangeInfo $range, Node $node ): void { |
172 | $startMeta = $range->startElem; |
173 | $startDataParsoid = DOMDataUtils::getDataParsoid( $startMeta ); |
174 | if ( $node instanceof Element ) { |
175 | if ( DOMCompat::nodeName( $node ) === "p" && $node->firstChild === $startMeta ) { |
176 | // If the first child of "p" is the meta, and it gets moved, then it got mistakenly |
177 | // pulled inside the paragraph, and the paragraph dsr that gets computed includes |
178 | // it - which may lead to the tag getting duplicated on roundtrip. Hence, we |
179 | // adjust the dsr of the paragraph in that case. We also don't consider the meta |
180 | // tag to have been moved in that case. |
181 | $pDataParsoid = DOMDataUtils::getDataParsoid( $node ); |
182 | $pDataParsoid->dsr->start = $startDataParsoid->dsr->end; |
183 | } else { |
184 | $startDataParsoid->wasMoved = true; |
185 | } |
186 | } |
187 | $node = $this->getStartConsideringFosteredContent( $node ); |
188 | $node->parentNode->insertBefore( $startMeta, $node ); |
189 | if ( $node instanceof Element ) { |
190 | // Ensure template continuity is not broken |
191 | $about = DOMCompat::getAttribute( $node, "about" ); |
192 | if ( $about !== null ) { |
193 | $startMeta->setAttribute( "about", $about ); |
194 | } |
195 | } |
196 | $range->start = $startMeta; |
197 | } |
198 | |
199 | /** |
200 | * Moves the start of the range to the designated node |
201 | * @param DOMRangeInfo $range the range to modify |
202 | * @param Node $node the new start of the range |
203 | */ |
204 | private function moveRangeEnd( DOMRangeInfo $range, Node $node ): void { |
205 | $endMeta = $range->endElem; |
206 | $endDataParsoid = DOMDataUtils::getDataParsoid( $endMeta ); |
207 | |
208 | if ( $node instanceof Element ) { |
209 | $endMetaWasLastChild = $node->lastChild === $endMeta; |
210 | |
211 | // Migrate $endMeta and ensure template continuity is not broken |
212 | $node->parentNode->insertBefore( $endMeta, $node->nextSibling ); |
213 | $about = DOMCompat::getAttribute( $node, "about" ); |
214 | if ( $about !== null ) { |
215 | $endMeta->setAttribute( "about", $about ); |
216 | } |
217 | |
218 | if ( ( DOMCompat::nodeName( $node ) === "p" ) && $endMetaWasLastChild ) { |
219 | // If the last child of "p" is the meta, and it gets moved, then it got mistakenly |
220 | // pulled inside the paragraph, and the paragraph dsr that gets computed includes |
221 | // it - which may lead to the tag getting duplicated on roundtrip. Hence, we |
222 | // adjust the dsr of the paragraph in that case. We also don't consider the meta |
223 | // tag to have been moved in that case. |
224 | $pDataParsoid = DOMDataUtils::getDataParsoid( $node ); |
225 | $pDataParsoid->dsr->end = $endDataParsoid->dsr->start; |
226 | $prevLength = strlen( $node->textContent ?? '' ); |
227 | $this->migrateTrailingNls->doMigrateTrailingNLs( $node, $this->env ); |
228 | $newLength = strlen( $node->textContent ?? '' ); |
229 | if ( $prevLength != $newLength ) { |
230 | $pDataParsoid->dsr->end -= ( $prevLength - $newLength ); |
231 | } |
232 | } else { |
233 | $endDataParsoid->wasMoved = true; |
234 | DOMDataUtils::setDataParsoid( $endMeta, $endDataParsoid ); |
235 | } |
236 | } |
237 | $range->end = $endMeta; |
238 | } |
239 | |
240 | /** |
241 | * Returns whether one of the ends of the range has been moved, which corresponds to an extended |
242 | * range. |
243 | * @param DOMRangeInfo $range |
244 | * @return bool |
245 | */ |
246 | private function isExtended( DOMRangeInfo $range ): bool { |
247 | if ( $range->extendedByOverlapMerge ) { |
248 | return true; |
249 | } |
250 | |
251 | $startDataParsoid = DOMDataUtils::getDataParsoid( $range->startElem ); |
252 | $endDataParsoid = DOMDataUtils::getDataParsoid( $range->endElem ); |
253 | |
254 | return ( $startDataParsoid->wasMoved ?? false ) || ( $endDataParsoid->wasMoved ?? false ); |
255 | } |
256 | |
257 | /** |
258 | * Sets the data-mw attribute for meta tags of the provided range |
259 | * @param DOMRangeInfo $range range whose start and end element needs to be to modified |
260 | * @param bool $isExtended whether the range got extended |
261 | */ |
262 | private function setMetaDataMwForRange( DOMRangeInfo $range, bool $isExtended ): void { |
263 | $startDataMw = DOMDataUtils::getDataMw( $range->startElem ); |
264 | $endDataMw = DOMDataUtils::getDataMw( $range->endElem ); |
265 | |
266 | $startDataMw->extendedRange = $isExtended; |
267 | $startDataMw->wtOffsets = DOMDataUtils::getDataParsoid( $range->startElem )->tsr; |
268 | $endDataMw->wtOffsets = DOMDataUtils::getDataParsoid( $range->endElem )->tsr; |
269 | unset( $endDataMw->rangeId ); |
270 | } |
271 | |
272 | /** |
273 | * Returns the meta type of the element if it exists and matches the type expected by the |
274 | * current class, null otherwise |
275 | * @param Element $elem the element to check |
276 | * @return string|null |
277 | */ |
278 | protected function matchMetaType( Element $elem ): ?string { |
279 | // for this class we're interested in the annotation type |
280 | return WTUtils::matchAnnotationMeta( $elem ); |
281 | } |
282 | |
283 | /** @inheritDoc */ |
284 | protected function verifyTplInfoExpectation( ?TemplateInfo $templateInfo, TempData $tmp ): void { |
285 | // Annotations aren't templates. Nothing to do. |
286 | } |
287 | |
288 | /** |
289 | * Returns the range ID of a node - in the case of annotations, the "rangeId" property |
290 | * of its "data-mw" attribute. |
291 | * @param Element $node |
292 | * @return string |
293 | */ |
294 | protected function getRangeId( Element $node ): string { |
295 | return DOMDataUtils::getDataMw( $node )->rangeId ?? ''; |
296 | } |
297 | |
298 | /** |
299 | * @inheritDoc |
300 | */ |
301 | protected function updateDSRForFirstRangeNode( Element $target, Element $source ): void { |
302 | // nop |
303 | } |
304 | |
305 | public function execute( Node $root ): void { |
306 | try { |
307 | $annRanges = $this->findWrappableMetaRanges( $root ); |
308 | } catch ( RangeBuilderException $e ) { |
309 | $this->env->log( 'warn', 'The annotation ranges could not be fully detected. ' . |
310 | ' Annotation processing cancelled. ' ); |
311 | return; |
312 | } |
313 | |
314 | $rangesByType = []; |
315 | foreach ( $annRanges as $range ) { |
316 | $annType = WTUtils::extractAnnotationType( $range->startElem ); |
317 | $rangesByType[$annType] ??= []; |
318 | $rangesByType[$annType][] = $range; |
319 | } |
320 | |
321 | foreach ( $rangesByType as $singleTypeRanges ) { |
322 | // FIXME: The ranges in $singleTypeRanges may have start/end that |
323 | // are no longer siblings because of the wrapping in makeUneditable. |
324 | // wrapAnnotationsInTree tries to account for that by calling |
325 | // by redoing findEnclosingRange but that happens after |
326 | // findTopLevelNonOverlappingRanges, which may rely on the assumption |
327 | // of a linear range, further analysis is needed. |
328 | // |
329 | // Furthermore, makeUneditable may be messing up any ranges we've |
330 | // already processed of other types since those aren't guaranteed |
331 | // to be non-overlapping of the current type. |
332 | $this->nodeRanges = new SplObjectStorage; |
333 | $topRanges = $this->findTopLevelNonOverlappingRanges( $root, $singleTypeRanges ); |
334 | $this->wrapAnnotationsInTree( $topRanges ); |
335 | foreach ( $topRanges as $range ) { |
336 | $isExtended = $this->isExtended( $range ); |
337 | if ( $isExtended ) { |
338 | $this->makeUneditable( $range ); |
339 | } |
340 | $this->setMetaDataMwForRange( $range, $isExtended ); |
341 | } |
342 | } |
343 | } |
344 | } |