Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 36 |
|
0.00% |
0 / 13 |
CRAP | |
0.00% |
0 / 1 |
StripState | |
0.00% |
0 / 36 |
|
0.00% |
0 / 13 |
210 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
__clone | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
newKey | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
isEmpty | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
addWtItem | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
containsStripMarker | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
startsWithStripMarker | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
endsWithStripMarker | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
addWtItemKey | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
splitWt | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
new | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
addAllFrom | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
merge | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Fragments; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Parsoid\Utils\PHPUtils; |
8 | |
9 | /** |
10 | * An abstraction/generalization of "strip state" from mediawiki core. |
11 | * |
12 | * The basic idea is that a special "strip marker" can be added to a text |
13 | * string to represent the insertion of a fragment, here represented as a |
14 | * PFragment (in core, represented as HTML). This allows us to tunnel |
15 | * rich content through interfaces which only allow strings, as long as |
16 | * (a) we can maintain a strip state on the side, and (b) we guarantee |
17 | * that the "strip markers" can never be forged in the string. For |
18 | * strip markers in wikitext and HTML this is guaranteed by using |
19 | * a character (\x7f) which is invalid in both wikitext and HTML. |
20 | * |
21 | * The StripState object is not serializable because we can't easily |
22 | * enforce the uniqueness of strip state keys on deserialization. |
23 | * It is recommended that wikitext+strip state be serialized using |
24 | * ::splitWt() (ie, as an array alternating between wikitext strings |
25 | * and serialized PFragments) which both avoids the need to serialize |
26 | * the strip state itself and also avoids exposing the internal keys |
27 | * in the serialized representation. |
28 | * |
29 | * StripState should generally be considered an opaque type internal |
30 | * to Parsoid; most external clients should use the appropriate |
31 | * `split` methods to yield a list of Fragments rather than directly |
32 | * interact with strip markers. |
33 | */ |
34 | class StripState { |
35 | |
36 | /** |
37 | * See Parser.php::MARKER_SUFFIX in core for an explanation of the |
38 | * special characters used in the marker. Note that this marker is |
39 | * only valid in strings! We would need to use an alternate marker for |
40 | * HTML/DOM since \x7f is (deliberately) not a valid HTML character. |
41 | */ |
42 | private const MARKER_PREFIX = "\x7f'\"`PUNIQ-"; |
43 | private const MARKER_SUFFIX = "-QINUP`\"'\x7f"; |
44 | |
45 | /** |
46 | * The global strip state counter is guaranteed to be greater than |
47 | * the major counters in any created strip state. |
48 | */ |
49 | private static int $stripStateCounter = 0; |
50 | |
51 | private int $majorCounter; |
52 | |
53 | /** |
54 | * The minor counter for a strip state is guaranteed to be greater than the |
55 | * minor counter for all items in the strip state *with the same major |
56 | * counter*. |
57 | */ |
58 | private int $minorCounter; |
59 | |
60 | /** |
61 | * @var array<string,PFragment> A mapping from strip state keys to |
62 | * PFragments. |
63 | */ |
64 | private array $items = []; |
65 | |
66 | private function __construct() { |
67 | $this->majorCounter = self::$stripStateCounter++; |
68 | $this->minorCounter = 0; |
69 | } |
70 | |
71 | public function __clone() { |
72 | // Ensure no two strip states have the same major counter |
73 | $this->majorCounter = self::$stripStateCounter++; |
74 | $this->minorCounter = 0; |
75 | } |
76 | |
77 | /** |
78 | * Create a new internal key, guaranteed not to conflict with any other |
79 | * key. |
80 | */ |
81 | private function newKey(): string { |
82 | $major = $this->majorCounter; |
83 | $minor = $this->minorCounter++; |
84 | return "$major-$minor"; |
85 | } |
86 | |
87 | /** Return true if there are no items in this strip state. */ |
88 | public function isEmpty(): bool { |
89 | return !$this->items; |
90 | } |
91 | |
92 | /** |
93 | * Add the given fragment to the strip state, returning a wikitext |
94 | * string that can be used as a placeholder for it. |
95 | */ |
96 | public function addWtItem( PFragment $fragment ): string { |
97 | Assert::invariant( |
98 | !( $fragment instanceof WikitextPFragment ), |
99 | "strip state items should not be wikitext" |
100 | ); |
101 | $key = $this->addWtItemKey( $fragment ); |
102 | return self::MARKER_PREFIX . $key . self::MARKER_SUFFIX; |
103 | } |
104 | |
105 | /** |
106 | * Return true if the given wikitext string contains a strip marker, |
107 | * or false otherwise. |
108 | */ |
109 | public static function containsStripMarker( string $s ): bool { |
110 | return str_contains( $s, self::MARKER_PREFIX ); |
111 | } |
112 | |
113 | /** |
114 | * Return true if the given wikitext string starts with a strip marker, |
115 | * or false otherwise. |
116 | */ |
117 | public static function startsWithStripMarker( string $s ): bool { |
118 | return str_starts_with( $s, self::MARKER_PREFIX ); |
119 | } |
120 | |
121 | /** |
122 | * Return true if the given wikitext string ends with a strip marker, |
123 | * or false otherwise. |
124 | */ |
125 | public static function endsWithStripMarker( string $s ): bool { |
126 | return str_ends_with( $s, self::MARKER_SUFFIX ); |
127 | } |
128 | |
129 | /** |
130 | * Add the given fragment to the strip state, returning the internal |
131 | * strip state key used for it. |
132 | */ |
133 | private function addWtItemKey( PFragment $fragment ): string { |
134 | Assert::invariant( |
135 | !( $fragment instanceof WikitextPFragment ), |
136 | "wikitext fragments shouldn't be buried in strip state" |
137 | ); |
138 | $key = $this->newKey(); |
139 | $this->items[$key] = $fragment; |
140 | return $key; |
141 | } |
142 | |
143 | /** |
144 | * Split the given wikitext string at its strip markers and return an array |
145 | * which alternates between string items and PFragment items. |
146 | * The first and last items are guaranteed to be strings, and the |
147 | * array length is guaranteed to be odd and at least 1. |
148 | * @return list<string|PFragment> |
149 | */ |
150 | public function splitWt( string $wikitext ): array { |
151 | static $regex = '/' . self::MARKER_PREFIX . "([^\x7f<>&'\"]+)" . self::MARKER_SUFFIX . '/'; |
152 | $pieces = preg_split( $regex, $wikitext, -1, PREG_SPLIT_DELIM_CAPTURE ); |
153 | for ( $i = 1; $i < count( $pieces ); $i += 2 ) { |
154 | $pieces[$i] = $this->items[$pieces[$i]]; |
155 | } |
156 | return $pieces; |
157 | } |
158 | |
159 | /** |
160 | * Create a new empty strip state. |
161 | */ |
162 | public static function new(): StripState { |
163 | return new StripState(); |
164 | } |
165 | |
166 | /** |
167 | * Add all mappings from the given strip states to this one. |
168 | */ |
169 | public function addAllFrom( StripState ...$others ): void { |
170 | PHPUtils::pushArray( |
171 | $this->items, ...array_map( fn ( $ss )=>$ss->items, $others ) |
172 | ); |
173 | } |
174 | |
175 | /** |
176 | * Create a new strip state which contains the mappings of all of the |
177 | * given strip states. |
178 | */ |
179 | public static function merge( StripState $first, StripState ...$others ): StripState { |
180 | $ss = clone $first; |
181 | $ss->addAllFrom( ...$others ); |
182 | return $ss; |
183 | } |
184 | } |