Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
83 / 83 |
|
100.00% |
11 / 11 |
CRAP | |
100.00% |
1 / 1 |
HashtagCommentParser | |
100.00% |
83 / 83 |
|
100.00% |
11 / 11 |
20 | |
100.00% |
1 / 1 |
__construct | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
preprocess | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
preprocessUnsafe | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
finalize | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
newMarker | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
addMarker | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
extractTags | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
2 | |||
isValidTag | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
checkExtractedTags | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
replaceMarkers | |
100.00% |
23 / 23 |
|
100.00% |
1 / 1 |
4 | |||
generateTagLink | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\Hashtags; |
4 | |
5 | use MediaWiki\ChangeTags\ChangeTagsStore; |
6 | use MediaWiki\CommentFormatter\CommentParser; |
7 | use MediaWiki\Linker\LinkRenderer; |
8 | use MediaWiki\Linker\LinkTarget; |
9 | use MediaWiki\Parser\Sanitizer; |
10 | use RuntimeException; |
11 | |
12 | /** |
13 | * This is our own version of Core's CommentParser. |
14 | * |
15 | * It works by wrapping around an existing CommentParser. We |
16 | * find all the tags, replace them with markers, and at the end |
17 | * replace the markers with links to Special:RecentChanges filtered by |
18 | * that tag. |
19 | * |
20 | * This extends core's CommentParser in order to pass type checks. We |
21 | * don't actually want to inherit anything. We implement all the |
22 | * public methods to intercept and wrap around them. |
23 | */ |
24 | class HashtagCommentParser extends CommentParser { |
25 | |
26 | // Alternatively, we could just make it be '#'. |
27 | public const HASHTAG_PREFIX = 'hashtag-'; |
28 | private const MARKER_PREFIX = "\x0F'\""; |
29 | private const MARKER_REGEX = "/\x0F'\"([0-9]{7})/"; |
30 | private const MARKER_REGEX_ESCAPED = "/\x0F(?:'|'|�?39;)(?:\"|"|�?34;)([0-9]{7})/"; |
31 | private CommentParser $commentParser; |
32 | private LinkRenderer $linkRenderer; |
33 | private ChangeTagsStore $changeTagsStore; |
34 | private bool $requireActivation; |
35 | private array $invalidList; |
36 | private LinkTarget $targetOfTagLinks; |
37 | private TagCollector $tagCollector; |
38 | |
39 | /** @var array Map of markers -> tag names */ |
40 | private $markerMap = []; |
41 | /** @var array map of markers -> bool, if we should replace html or plaintext */ |
42 | private $markersToReplace = []; |
43 | /** @var int How many markers so far */ |
44 | private $markerCount = 0; |
45 | |
46 | public function __construct( |
47 | CommentParser $commentParser, |
48 | LinkRenderer $linkRenderer, |
49 | ChangeTagsStore $changeTagsStore, |
50 | bool $requireActivation, |
51 | array $invalidList, |
52 | LinkTarget $targetOfTagLinks, |
53 | TagCollector $tagCollector |
54 | ) { |
55 | // CommentParser is technically marked @internal... but meh. |
56 | $this->commentParser = $commentParser; |
57 | $this->linkRenderer = $linkRenderer; |
58 | $this->requireActivation = $requireActivation; |
59 | $this->changeTagsStore = $changeTagsStore; |
60 | $this->invalidList = $invalidList; |
61 | $this->targetOfTagLinks = $targetOfTagLinks; |
62 | $this->tagCollector = $tagCollector; |
63 | // Intentionally do not call parent::__construct |
64 | } |
65 | |
66 | /** |
67 | * Convert a comment to HTML, but replace links with markers which are |
68 | * resolved later. |
69 | * |
70 | * @param string $comment |
71 | * @param LinkTarget|null $selfLinkTarget |
72 | * @param bool $samePage |
73 | * @param string|false|null $wikiId |
74 | * @param bool $enableSectionLinks |
75 | * @return string |
76 | */ |
77 | public function preprocess( string $comment, ?LinkTarget $selfLinkTarget = null, |
78 | $samePage = false, $wikiId = false, $enableSectionLinks = true |
79 | ) { |
80 | $this->tagCollector->startParse( $this ); |
81 | $comment = Sanitizer::escapeHtmlAllowEntities( $comment ); |
82 | $comment = $this->extractTags( $comment ); |
83 | // We escape ourselves before processing tags, so call unsafe variant. |
84 | $res = $this->commentParser->preprocessUnsafe( |
85 | $comment, $selfLinkTarget, $samePage, $wikiId, $enableSectionLinks |
86 | ); |
87 | $this->checkExtractedTags( $res ); |
88 | return $res; |
89 | } |
90 | |
91 | /** |
92 | * Convert a comment in pseudo-HTML format to HTML, without escaping HTML. |
93 | * |
94 | * @param string $comment |
95 | * @param LinkTarget|null $selfLinkTarget |
96 | * @param bool $samePage |
97 | * @param string|false|null $wikiId |
98 | * @param bool $enableSectionLinks |
99 | * @return string |
100 | */ |
101 | public function preprocessUnsafe( $comment, ?LinkTarget $selfLinkTarget = null, |
102 | $samePage = false, $wikiId = false, $enableSectionLinks = true |
103 | ) { |
104 | $comment = $this->extractTags( $comment ); |
105 | $res = $this->commentParser->preprocessUnsafe( |
106 | $comment, $selfLinkTarget, $samePage, $wikiId, $enableSectionLinks |
107 | ); |
108 | $this->checkExtractedTags( $res ); |
109 | return $res; |
110 | } |
111 | |
112 | /** |
113 | * Execute pending batch queries and replace markers in the specified |
114 | * string(s) with actual links. |
115 | * |
116 | * @param string|string[] $comments |
117 | * @return string|string[] |
118 | */ |
119 | public function finalize( $comments ) { |
120 | $finalized = $this->commentParser->finalize( $comments ); |
121 | return $this->replaceMarkers( $finalized ); |
122 | } |
123 | |
124 | /** |
125 | * Get a new marker to insert into comment as a placeholder |
126 | * @return string |
127 | */ |
128 | private function newMarker(): string { |
129 | $id = sprintf( self::MARKER_PREFIX . "%07d", $this->markerCount++ ); |
130 | if ( strlen( $id ) > 10 ) { |
131 | throw new RuntimeException( "Too many markers" ); |
132 | } |
133 | return $id; |
134 | } |
135 | |
136 | /** |
137 | * Get a marker for a specific tag |
138 | * |
139 | * @param string $tag Tag name |
140 | * @return string Marker for that tag |
141 | */ |
142 | private function addMarker( string $tag ): string { |
143 | $marker = $this->newMarker(); |
144 | $this->markerMap[substr( $marker, strlen( self::MARKER_PREFIX ) )] = $tag; |
145 | return $marker; |
146 | } |
147 | |
148 | /** |
149 | * Find all the tags and replace them with markers to be replaced later |
150 | * |
151 | * @param string $comment Comment text before preprocessing |
152 | * @return string Comment after markers inserted |
153 | */ |
154 | private function extractTags( $comment ): string { |
155 | return preg_replace_callback( |
156 | // It is a bit unclear how to define a hashtag in an i18n context |
157 | // I am going with a unicode letter followed by stuff that is |
158 | // letter, number, connecting punctuation, dash punctuation, |
159 | // mark or modifier symbol. |
160 | // I am unsure if all symbols should be included or if marks should |
161 | // be allowed as first letter. |
162 | // We allow a formatting character to come before # for bidi control chars |
163 | '/(\p{Zs}|^|\p{Zs}\p{Cf})#(\p{L}[\p{L}\p{N}\p{M}\p{Pc}\p{Pd}\p{Sk}]*)/u', |
164 | function ( $m ) { |
165 | $prefix = $m[1]; |
166 | $tag = $m[2]; |
167 | if ( $this->isValidTag( $tag ) ) { |
168 | return $prefix . $this->addMarker( $tag ); |
169 | } |
170 | return $prefix . '#' . $tag; |
171 | }, |
172 | $comment |
173 | ); |
174 | } |
175 | |
176 | private function isValidTag( string $tag ): bool { |
177 | if ( ( $this->invalidList[$tag] ?? false ) === true ) { |
178 | return false; |
179 | } |
180 | if ( $this->requireActivation ) { |
181 | // This does not include software activated tags, only user activated. |
182 | // No hashtags should meet that criteria in this case, but unclear if we |
183 | // should still check. |
184 | $tags = $this->changeTagsStore->listExplicitlyDefinedTags(); |
185 | return in_array( self::HASHTAG_PREFIX . $tag, $tags ); |
186 | } |
187 | return true; |
188 | } |
189 | |
190 | /** |
191 | * Detect which markers were eaten by links. |
192 | * |
193 | * If a marker disappears after preprocessing, that means it is |
194 | * probably in the body of a link tag. Make a list as we don't |
195 | * want to make a double link. (This is purely for UI and does |
196 | * not matter from a security perspective). |
197 | * |
198 | * For titles that are isAlwaysKnown(), the replacement happens too |
199 | * fast and we end up with a double nested link. |
200 | * e.g. [[Special:RecentChanges|foo #bar]] has unfortunate (but still secure) output. |
201 | * |
202 | * @param string $comment Comment text after preprocessing |
203 | */ |
204 | private function checkExtractedTags( string $comment ): void { |
205 | if ( preg_match_all( self::MARKER_REGEX, $comment, $m ) ) { |
206 | foreach ( $m[1] as $marker ) { |
207 | $this->tagCollector->submitTag( |
208 | $this, |
209 | self::HASHTAG_PREFIX . $this->markerMap[$marker] |
210 | ); |
211 | $this->markersToReplace[$marker] = true; |
212 | } |
213 | } |
214 | } |
215 | |
216 | /** |
217 | * Replace our markers to give the final result text |
218 | * |
219 | * @param string|string[] $comment Final comment text, after finalize |
220 | * @return string|string[] |
221 | */ |
222 | private function replaceMarkers( $comment ) { |
223 | $comment = preg_replace_callback( |
224 | self::MARKER_REGEX, |
225 | function ( $marker ) { |
226 | if ( !isset( $this->markerMap[$marker[1]] ) ) { |
227 | // This should probably not happen. |
228 | wfWarn( "Marker '$marker[1]' is missing in HashtagCommentParser" ); |
229 | return $marker[0]; |
230 | } |
231 | if ( ( $this->markersToReplace[$marker[1]] ?? false ) === true ) { |
232 | return $this->generateTagLink( $this->markerMap[$marker[1]] ); |
233 | } else { |
234 | // Inside the text of a link tag. Do not make a second link |
235 | return htmlspecialchars( '#' . $this->markerMap[$marker[1]] ); |
236 | } |
237 | }, |
238 | $comment |
239 | ); |
240 | $comment = preg_replace_callback( |
241 | self::MARKER_REGEX_ESCAPED, |
242 | function ( $marker ) { |
243 | if ( !isset( $this->markerMap[$marker[1]] ) ) { |
244 | // This should probably not happen. |
245 | wfWarn( "Marker '$marker[1]' is missing in HashtagCommentParser" ); |
246 | return $marker[0]; |
247 | } |
248 | // Our marker got escaped, replace it with just the plaintext tag |
249 | // This likely means the tag is in an attribute. For security reasons |
250 | // it is important we do not insert a link tag. |
251 | // It is not actually clear if this code is ever actually reachable |
252 | return htmlspecialchars( '#' . $this->markerMap[$marker[1]] ); |
253 | }, |
254 | $comment |
255 | ); |
256 | return $comment; |
257 | } |
258 | |
259 | /** |
260 | * Make a link for the hashtag |
261 | * |
262 | * @param string $tag The name of the tag |
263 | * @return string HTML link to RC filtered by that tag |
264 | */ |
265 | private function generateTagLink( $tag ) { |
266 | // This always links to RC. An argument could be |
267 | // made that instead we should filter the current page |
268 | // (e.g. History page or log list) instead of RC. |
269 | return $this->linkRenderer->makeLink( |
270 | $this->targetOfTagLinks, |
271 | '#' . $tag, |
272 | [ 'class' => 'mw-hashtag' ], |
273 | [ 'tagfilter' => self::HASHTAG_PREFIX . $tag ] |
274 | ); |
275 | } |
276 | } |