Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
69.55% |
153 / 220 |
|
81.82% |
9 / 11 |
CRAP | |
0.00% |
0 / 1 |
LinkHolderArray | |
69.55% |
153 / 220 |
|
81.82% |
9 / 11 |
170.58 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
__destruct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
merge | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
isBig | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
clear | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
makeHolder | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
2 | |||
replace | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
replaceInternal | |
96.10% |
74 / 77 |
|
0.00% |
0 / 1 |
18 | |||
replaceInterwiki | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
3 | |||
doVariants | |
23.81% |
20 / 84 |
|
0.00% |
0 / 1 |
374.75 | |||
replaceText | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * Holder of replacement pairs for wiki links |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Parser |
22 | */ |
23 | |
24 | use MediaWiki\Cache\LinkCache; |
25 | use MediaWiki\HookContainer\HookContainer; |
26 | use MediaWiki\HookContainer\HookRunner; |
27 | use MediaWiki\Linker\Linker; |
28 | use MediaWiki\MainConfigNames; |
29 | use MediaWiki\MediaWikiServices; |
30 | use MediaWiki\Parser\Parser; |
31 | use MediaWiki\Title\Title; |
32 | |
33 | /** |
34 | * @internal for using in Parser only. |
35 | * |
36 | * @ingroup Parser |
37 | */ |
38 | class LinkHolderArray { |
39 | /** @var array<int,array<int,array>> Indexed by numeric namespace and link ids, {@see Parser::nextLinkID} */ |
40 | private $internals = []; |
41 | /** @var array<int,array> Indexed by numeric link id */ |
42 | private $interwikis = []; |
43 | /** @var int */ |
44 | private $size = 0; |
45 | /** @var Parser */ |
46 | private $parent; |
47 | /** @var ILanguageConverter */ |
48 | private $languageConverter; |
49 | /** @var HookRunner */ |
50 | private $hookRunner; |
51 | |
52 | /** |
53 | * @param Parser $parent |
54 | * @param ILanguageConverter $languageConverter |
55 | * @param HookContainer $hookContainer |
56 | */ |
57 | public function __construct( Parser $parent, ILanguageConverter $languageConverter, |
58 | HookContainer $hookContainer |
59 | ) { |
60 | $this->parent = $parent; |
61 | $this->languageConverter = $languageConverter; |
62 | $this->hookRunner = new HookRunner( $hookContainer ); |
63 | } |
64 | |
65 | /** |
66 | * Reduce memory usage to reduce the impact of circular references |
67 | */ |
68 | public function __destruct() { |
69 | // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach |
70 | foreach ( $this as $name => $_ ) { |
71 | unset( $this->$name ); |
72 | } |
73 | } |
74 | |
75 | /** |
76 | * Merge another LinkHolderArray into this one |
77 | * @param LinkHolderArray $other |
78 | */ |
79 | public function merge( $other ) { |
80 | foreach ( $other->internals as $ns => $entries ) { |
81 | $this->size += count( $entries ); |
82 | if ( !isset( $this->internals[$ns] ) ) { |
83 | $this->internals[$ns] = $entries; |
84 | } else { |
85 | $this->internals[$ns] += $entries; |
86 | } |
87 | } |
88 | $this->interwikis += $other->interwikis; |
89 | } |
90 | |
91 | /** |
92 | * Returns true if the memory requirements of this object are getting large |
93 | * @return bool |
94 | */ |
95 | public function isBig() { |
96 | $linkHolderBatchSize = MediaWikiServices::getInstance()->getMainConfig() |
97 | ->get( MainConfigNames::LinkHolderBatchSize ); |
98 | return $this->size > $linkHolderBatchSize; |
99 | } |
100 | |
101 | /** |
102 | * Clear all stored link holders. |
103 | * Make sure you don't have any text left using these link holders, before you call this |
104 | */ |
105 | public function clear() { |
106 | $this->internals = []; |
107 | $this->interwikis = []; |
108 | $this->size = 0; |
109 | } |
110 | |
111 | /** |
112 | * Make a link placeholder. The text returned can be later resolved to a real link with |
113 | * replaceLinkHolders(). This is done for two reasons: firstly to avoid further |
114 | * parsing of interwiki links, and secondly to allow all existence checks and |
115 | * article length checks (for stub links) to be bundled into a single query. |
116 | * |
117 | * @param Title $nt |
118 | * @param string $text |
119 | * @param string $trail [optional] |
120 | * @param string $prefix [optional] |
121 | * @return string |
122 | */ |
123 | public function makeHolder( Title $nt, $text = '', $trail = '', $prefix = '' ) { |
124 | # Separate the link trail from the rest of the link |
125 | [ $inside, $trail ] = Linker::splitTrail( $trail ); |
126 | |
127 | $key = $this->parent->nextLinkID(); |
128 | $entry = [ |
129 | 'title' => $nt, |
130 | 'text' => $prefix . $text . $inside, |
131 | 'pdbk' => $nt->getPrefixedDBkey(), |
132 | ]; |
133 | |
134 | $this->size++; |
135 | if ( $nt->isExternal() ) { |
136 | // Use a globally unique ID to keep the objects mergable |
137 | $this->interwikis[$key] = $entry; |
138 | return "<!--IWLINK'\" $key-->{$trail}"; |
139 | } else { |
140 | $ns = $nt->getNamespace(); |
141 | $this->internals[$ns][$key] = $entry; |
142 | return "<!--LINK'\" $ns:$key-->{$trail}"; |
143 | } |
144 | } |
145 | |
146 | /** |
147 | * Replace <!--LINK--> link placeholders with actual links, in the buffer |
148 | * |
149 | * @param string &$text |
150 | */ |
151 | public function replace( &$text ) { |
152 | $this->replaceInternal( $text ); |
153 | $this->replaceInterwiki( $text ); |
154 | } |
155 | |
156 | /** |
157 | * Replace internal links |
158 | * @param string &$text |
159 | */ |
160 | protected function replaceInternal( &$text ) { |
161 | if ( !$this->internals ) { |
162 | return; |
163 | } |
164 | |
165 | $classes = []; |
166 | $services = MediaWikiServices::getInstance(); |
167 | $linkCache = $services->getLinkCache(); |
168 | $output = $this->parent->getOutput(); |
169 | $linkRenderer = $this->parent->getLinkRenderer(); |
170 | |
171 | $dbr = $services->getConnectionProvider()->getReplicaDatabase(); |
172 | |
173 | # Sort by namespace |
174 | ksort( $this->internals ); |
175 | |
176 | $pagemap = []; |
177 | |
178 | # Generate query |
179 | $linkBatchFactory = $services->getLinkBatchFactory(); |
180 | $lb = $linkBatchFactory->newLinkBatch(); |
181 | $lb->setCaller( __METHOD__ ); |
182 | |
183 | foreach ( $this->internals as $ns => $entries ) { |
184 | foreach ( $entries as [ 'title' => $title, 'pdbk' => $pdbk ] ) { |
185 | /** @var Title $title */ |
186 | # Check if it's a static known link, e.g. interwiki |
187 | if ( $title->isAlwaysKnown() ) { |
188 | $classes[$pdbk] = ''; |
189 | } elseif ( $ns === NS_SPECIAL ) { |
190 | $classes[$pdbk] = 'new'; |
191 | } else { |
192 | $id = $linkCache->getGoodLinkID( $pdbk ); |
193 | if ( $id ) { |
194 | $classes[$pdbk] = $linkRenderer->getLinkClasses( $title ); |
195 | $output->addLink( $title, $id ); |
196 | $pagemap[$id] = $pdbk; |
197 | } elseif ( $linkCache->isBadLink( $pdbk ) ) { |
198 | $classes[$pdbk] = 'new'; |
199 | } else { |
200 | # Not in the link cache, add it to the query |
201 | $lb->addObj( $title ); |
202 | } |
203 | } |
204 | } |
205 | } |
206 | if ( !$lb->isEmpty() ) { |
207 | $res = $dbr->newSelectQueryBuilder() |
208 | ->select( LinkCache::getSelectFields() ) |
209 | ->from( 'page' ) |
210 | ->where( [ $lb->constructSet( 'page', $dbr ) ] ) |
211 | ->caller( __METHOD__ ) |
212 | ->fetchResultSet(); |
213 | |
214 | # Fetch data and form into an associative array |
215 | # non-existent = broken |
216 | foreach ( $res as $s ) { |
217 | $title = Title::makeTitle( $s->page_namespace, $s->page_title ); |
218 | $pdbk = $title->getPrefixedDBkey(); |
219 | $linkCache->addGoodLinkObjFromRow( $title, $s ); |
220 | $output->addLink( $title, $s->page_id ); |
221 | $classes[$pdbk] = $linkRenderer->getLinkClasses( $title ); |
222 | // add id to the extension todolist |
223 | $pagemap[$s->page_id] = $pdbk; |
224 | } |
225 | unset( $res ); |
226 | } |
227 | if ( $pagemap !== [] ) { |
228 | // pass an array of page_ids to an extension |
229 | $this->hookRunner->onGetLinkColours( $pagemap, $classes, $this->parent->getTitle() ); |
230 | } |
231 | |
232 | # Do a second query for different language variants of links and categories |
233 | if ( $this->languageConverter->hasVariants() ) { |
234 | $this->doVariants( $classes ); |
235 | } |
236 | |
237 | # Construct search and replace arrays |
238 | $replacePairs = []; |
239 | foreach ( $this->internals as $ns => $entries ) { |
240 | foreach ( $entries as $index => $entry ) { |
241 | $pdbk = $entry['pdbk']; |
242 | $title = $entry['title']; |
243 | $query = $entry['query'] ?? []; |
244 | $searchkey = "$ns:$index"; |
245 | $displayTextHtml = $entry['text']; |
246 | if ( isset( $entry['selflink'] ) ) { |
247 | $replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayTextHtml, $query ); |
248 | continue; |
249 | } |
250 | |
251 | $displayText = $displayTextHtml === '' ? null : new HtmlArmor( $displayTextHtml ); |
252 | if ( !isset( $classes[$pdbk] ) ) { |
253 | $classes[$pdbk] = 'new'; |
254 | } |
255 | if ( $classes[$pdbk] === 'new' ) { |
256 | $linkCache->addBadLinkObj( $title ); |
257 | $output->addLink( $title, 0 ); |
258 | $link = $linkRenderer->makeBrokenLink( |
259 | $title, $displayText, [], $query |
260 | ); |
261 | } else { |
262 | $link = $linkRenderer->makePreloadedLink( |
263 | $title, $displayText, $classes[$pdbk], [], $query |
264 | ); |
265 | } |
266 | |
267 | $replacePairs[$searchkey] = $link; |
268 | } |
269 | } |
270 | |
271 | # Do the thing |
272 | $text = preg_replace_callback( |
273 | '/<!--LINK\'" (-?[\d:]+)-->/', |
274 | static function ( array $matches ) use ( $replacePairs ) { |
275 | return $replacePairs[$matches[1]]; |
276 | }, |
277 | $text |
278 | ); |
279 | } |
280 | |
281 | /** |
282 | * Replace interwiki links |
283 | * @param string &$text |
284 | */ |
285 | protected function replaceInterwiki( &$text ) { |
286 | if ( !$this->interwikis ) { |
287 | return; |
288 | } |
289 | |
290 | # Make interwiki link HTML |
291 | $output = $this->parent->getOutput(); |
292 | $replacePairs = []; |
293 | $linkRenderer = $this->parent->getLinkRenderer(); |
294 | foreach ( $this->interwikis as $key => [ 'title' => $title, 'text' => $linkText ] ) { |
295 | $replacePairs[$key] = $linkRenderer->makeLink( $title, new HtmlArmor( $linkText ) ); |
296 | $output->addInterwikiLink( $title ); |
297 | } |
298 | |
299 | $text = preg_replace_callback( |
300 | '/<!--IWLINK\'" (\d+)-->/', |
301 | static function ( array $matches ) use ( $replacePairs ) { |
302 | return $replacePairs[$matches[1]]; |
303 | }, |
304 | $text |
305 | ); |
306 | } |
307 | |
308 | /** |
309 | * Modify $this->internals and $classes according to language variant linking rules |
310 | * @param string[] &$classes |
311 | */ |
312 | protected function doVariants( &$classes ) { |
313 | $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory(); |
314 | $linkBatch = $linkBatchFactory->newLinkBatch(); |
315 | $variantMap = []; // maps $pdbkey_Variant => $keys (of link holders) |
316 | $output = $this->parent->getOutput(); |
317 | $titlesToBeConverted = ''; |
318 | $titlesAttrs = []; |
319 | |
320 | // Concatenate titles to a single string, thus we only need auto convert the |
321 | // single string to all variants. This would improve parser's performance |
322 | // significantly. |
323 | foreach ( $this->internals as $ns => $entries ) { |
324 | if ( $ns === NS_SPECIAL ) { |
325 | continue; |
326 | } |
327 | foreach ( $entries as $index => [ 'title' => $title, 'pdbk' => $pdbk ] ) { |
328 | // we only deal with new links (in its first query) |
329 | if ( !isset( $classes[$pdbk] ) || $classes[$pdbk] === 'new' ) { |
330 | $titlesAttrs[] = [ $index, $title ]; |
331 | // separate titles with \0 because it would never appears |
332 | // in a valid title |
333 | $titlesToBeConverted .= $title->getText() . "\0"; |
334 | } |
335 | } |
336 | } |
337 | |
338 | // Now do the conversion and explode string to text of titles |
339 | $titlesAllVariants = $this->languageConverter-> |
340 | autoConvertToAllVariants( rtrim( $titlesToBeConverted, "\0" ) ); |
341 | foreach ( $titlesAllVariants as &$titlesVariant ) { |
342 | $titlesVariant = explode( "\0", $titlesVariant ); |
343 | } |
344 | |
345 | // Then add variants of links to link batch |
346 | $parentTitle = $this->parent->getTitle(); |
347 | foreach ( $titlesAttrs as $i => [ $index, $title ] ) { |
348 | /** @var Title $title */ |
349 | $ns = $title->getNamespace(); |
350 | $text = $title->getText(); |
351 | |
352 | foreach ( $titlesAllVariants as $textVariants ) { |
353 | $textVariant = $textVariants[$i]; |
354 | if ( $textVariant === $text ) { |
355 | continue; |
356 | } |
357 | |
358 | $variantTitle = Title::makeTitle( $ns, $textVariant ); |
359 | |
360 | // Self-link checking for mixed/different variant titles. At this point, we |
361 | // already know the exact title does not exist, so the link cannot be to a |
362 | // variant of the current title that exists as a separate page. |
363 | if ( $variantTitle->equals( $parentTitle ) && !$title->hasFragment() ) { |
364 | $this->internals[$ns][$index]['selflink'] = true; |
365 | continue 2; |
366 | } |
367 | |
368 | $linkBatch->addObj( $variantTitle ); |
369 | $variantMap[$variantTitle->getPrefixedDBkey()][] = "$ns:$index"; |
370 | } |
371 | } |
372 | |
373 | // process categories, check if a category exists in some variant |
374 | $categoryMap = []; // maps $category_variant => $category (dbkeys) |
375 | foreach ( $output->getCategoryNames() as $category ) { |
376 | $categoryTitle = Title::makeTitleSafe( NS_CATEGORY, $category ); |
377 | $linkBatch->addObj( $categoryTitle ); |
378 | $variants = $this->languageConverter->autoConvertToAllVariants( $category ); |
379 | foreach ( $variants as $variant ) { |
380 | if ( $variant !== $category ) { |
381 | $variantTitle = Title::makeTitleSafe( NS_CATEGORY, $variant ); |
382 | if ( $variantTitle ) { |
383 | $linkBatch->addObj( $variantTitle ); |
384 | $categoryMap[$variant] = [ $category, $categoryTitle ]; |
385 | } |
386 | } |
387 | } |
388 | } |
389 | |
390 | if ( $linkBatch->isEmpty() ) { |
391 | return; |
392 | } |
393 | |
394 | // construct query |
395 | $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase(); |
396 | |
397 | $varRes = $dbr->newSelectQueryBuilder() |
398 | ->select( LinkCache::getSelectFields() ) |
399 | ->from( 'page' ) |
400 | ->where( [ $linkBatch->constructSet( 'page', $dbr ) ] ) |
401 | ->caller( __METHOD__ ) |
402 | ->fetchResultSet(); |
403 | |
404 | $pagemap = []; |
405 | $varCategories = []; |
406 | $linkCache = MediaWikiServices::getInstance()->getLinkCache(); |
407 | $linkRenderer = $this->parent->getLinkRenderer(); |
408 | |
409 | // for each found variants, figure out link holders and replace |
410 | foreach ( $varRes as $s ) { |
411 | $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title ); |
412 | $varPdbk = $variantTitle->getPrefixedDBkey(); |
413 | |
414 | if ( !isset( $variantMap[$varPdbk] ) ) { |
415 | continue; |
416 | } |
417 | |
418 | $linkCache->addGoodLinkObjFromRow( $variantTitle, $s ); |
419 | $output->addLink( $variantTitle, $s->page_id ); |
420 | |
421 | // loop over link holders |
422 | foreach ( $variantMap[$varPdbk] as $key ) { |
423 | [ $ns, $index ] = explode( ':', $key, 2 ); |
424 | $entry =& $this->internals[(int)$ns][(int)$index]; |
425 | $pdbk = $entry['pdbk']; |
426 | |
427 | if ( !isset( $classes[$pdbk] ) || $classes[$pdbk] === 'new' ) { |
428 | // found link in some of the variants, replace the link holder data |
429 | $entry['title'] = $variantTitle; |
430 | $entry['pdbk'] = $varPdbk; |
431 | |
432 | // set pdbk and colour if we haven't checked this title yet. |
433 | if ( !isset( $classes[$varPdbk] ) ) { |
434 | $classes[$varPdbk] = $linkRenderer->getLinkClasses( $variantTitle ); |
435 | $pagemap[$s->page_id] = $varPdbk; |
436 | } |
437 | } |
438 | } |
439 | |
440 | // check if the object is a variant of a category |
441 | $vardbk = $variantTitle->getDBkey(); |
442 | if ( isset( $categoryMap[$vardbk] ) ) { |
443 | [ $oldkey, $oldtitle ] = $categoryMap[$vardbk]; |
444 | if ( !isset( $varCategories[$oldkey] ) && !$oldtitle->exists() ) { |
445 | $varCategories[$oldkey] = $vardbk; |
446 | } |
447 | } |
448 | } |
449 | $this->hookRunner->onGetLinkColours( $pagemap, $classes, $this->parent->getTitle() ); |
450 | |
451 | // rebuild the categories in original order (if there are replacements) |
452 | if ( $varCategories !== [] ) { |
453 | $newCats = []; |
454 | foreach ( $output->getCategoryNames() as $cat ) { |
455 | $sortkey = $output->getCategorySortKey( $cat ); |
456 | // make the replacement |
457 | $newCats[$varCategories[$cat] ?? $cat] = $sortkey; |
458 | } |
459 | $output->setCategories( $newCats ); |
460 | } |
461 | } |
462 | |
463 | /** |
464 | * Replace <!--LINK'" …--> and <!--IWLINK'" …--> link placeholders with plain text of links |
465 | * (not HTML-formatted). |
466 | * |
467 | * @param string $text |
468 | * @return string |
469 | */ |
470 | public function replaceText( $text ) { |
471 | return preg_replace_callback( |
472 | '/<!--(IW)?LINK\'" (-?[\d:]+)-->/', |
473 | function ( $matches ) { |
474 | [ $unchanged, $isInterwiki, $key ] = $matches; |
475 | |
476 | if ( !$isInterwiki ) { |
477 | [ $ns, $index ] = explode( ':', $key, 2 ); |
478 | return $this->internals[(int)$ns][(int)$index]['text'] ?? $unchanged; |
479 | } else { |
480 | return $this->interwikis[$key]['text'] ?? $unchanged; |
481 | } |
482 | }, |
483 | $text |
484 | ); |
485 | } |
486 | } |