Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
2.59% |
8 / 309 |
|
0.00% |
0 / 24 |
CRAP | |
0.00% |
0 / 1 |
XmlDumpWriter | |
2.59% |
8 / 309 |
|
0.00% |
0 / 24 |
5995.67 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
2 | |||
openStream | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
2 | |||
siteInfo | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
sitename | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
dbname | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
generator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
homelink | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
caseSetting | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
namespaces | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 | |||
closeStream | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
openPage | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
20 | |||
closePage | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getRevisionStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getBlobStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
invokeLenient | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
writeRevision | |
0.00% |
0 / 56 |
|
0.00% |
0 / 1 |
210 | |||
writeSlot | |
0.00% |
0 / 67 |
|
0.00% |
0 / 1 |
306 | |||
writeText | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
2.01 | |||
writeLogItem | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
30 | |||
writeTimestamp | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
writeContributor | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
writeUploads | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
writeUpload | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
42 | |||
canonicalTitle | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | /** |
3 | * XmlDumpWriter |
4 | * |
5 | * Copyright © 2003, 2005, 2006 Brooke Vibber <bvibber@wikimedia.org> |
6 | * https://www.mediawiki.org/ |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License along |
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | * http://www.gnu.org/copyleft/gpl.html |
22 | * |
23 | * @file |
24 | */ |
25 | |
26 | use MediaWiki\CommentStore\CommentStore; |
27 | use MediaWiki\Content\Content; |
28 | use MediaWiki\Content\TextContent; |
29 | use MediaWiki\Debug\MWDebug; |
30 | use MediaWiki\HookContainer\HookContainer; |
31 | use MediaWiki\HookContainer\HookRunner; |
32 | use MediaWiki\MainConfigNames; |
33 | use MediaWiki\MediaWikiServices; |
34 | use MediaWiki\Revision\RevisionAccessException; |
35 | use MediaWiki\Revision\RevisionRecord; |
36 | use MediaWiki\Revision\RevisionStore; |
37 | use MediaWiki\Revision\SlotRecord; |
38 | use MediaWiki\Revision\SuppressedDataException; |
39 | use MediaWiki\Storage\SqlBlobStore; |
40 | use MediaWiki\Title\Title; |
41 | use MediaWiki\Xml\Xml; |
42 | use Wikimedia\Assert\Assert; |
43 | use Wikimedia\IPUtils; |
44 | |
45 | /** |
46 | * @ingroup Dump |
47 | */ |
48 | class XmlDumpWriter { |
49 | |
50 | /** Output serialized revision content. */ |
51 | public const WRITE_CONTENT = 0; |
52 | |
53 | /** Only output subs for revision content. */ |
54 | public const WRITE_STUB = 1; |
55 | |
56 | /** |
57 | * Only output subs for revision content, indicating that the content has been |
58 | * deleted/suppressed. |
59 | */ |
60 | private const WRITE_STUB_DELETED = 2; |
61 | |
62 | /** |
63 | * @var string[] the schema versions supported for output |
64 | * @final |
65 | */ |
66 | public static $supportedSchemas = [ |
67 | XML_DUMP_SCHEMA_VERSION_10, |
68 | XML_DUMP_SCHEMA_VERSION_11 |
69 | ]; |
70 | |
71 | /** |
72 | * @var string which schema version the generated XML should comply to. |
73 | * One of the values from self::$supportedSchemas, using the SCHEMA_VERSION_XX |
74 | * constants. |
75 | */ |
76 | private $schemaVersion; |
77 | |
78 | /** |
79 | * Title of the currently processed page |
80 | * |
81 | * @var Title|null |
82 | */ |
83 | private $currentTitle = null; |
84 | |
85 | /** |
86 | * @var int Whether to output revision content or just stubs. WRITE_CONTENT or WRITE_STUB. |
87 | */ |
88 | private $contentMode; |
89 | |
90 | /** @var HookRunner */ |
91 | private $hookRunner; |
92 | |
93 | /** @var CommentStore */ |
94 | private $commentStore; |
95 | |
96 | /** |
97 | * @param int $contentMode WRITE_CONTENT or WRITE_STUB. |
98 | * @param string $schemaVersion which schema version the generated XML should comply to. |
99 | * One of the values from self::$supportedSchemas, using the XML_DUMP_SCHEMA_VERSION_XX |
100 | * constants. |
101 | * @param HookContainer|null $hookContainer |
102 | * @param CommentStore|null $commentStore |
103 | */ |
104 | public function __construct( |
105 | $contentMode = self::WRITE_CONTENT, |
106 | $schemaVersion = XML_DUMP_SCHEMA_VERSION_11, |
107 | ?HookContainer $hookContainer = null, |
108 | ?CommentStore $commentStore = null |
109 | ) { |
110 | Assert::parameter( |
111 | in_array( $contentMode, [ self::WRITE_CONTENT, self::WRITE_STUB ], true ), |
112 | '$contentMode', |
113 | 'must be one of the following constants: WRITE_CONTENT or WRITE_STUB.' |
114 | ); |
115 | |
116 | Assert::parameter( |
117 | in_array( $schemaVersion, self::$supportedSchemas, true ), |
118 | '$schemaVersion', |
119 | 'must be one of the following schema versions: ' |
120 | . implode( ',', self::$supportedSchemas ) |
121 | ); |
122 | |
123 | $this->contentMode = $contentMode; |
124 | $this->schemaVersion = $schemaVersion; |
125 | $this->hookRunner = new HookRunner( |
126 | $hookContainer ?? MediaWikiServices::getInstance()->getHookContainer() |
127 | ); |
128 | $this->commentStore = $commentStore ?? MediaWikiServices::getInstance()->getCommentStore(); |
129 | } |
130 | |
131 | /** |
132 | * Opens the XML output stream's root "<mediawiki>" element. |
133 | * This does not include an xml directive, so is safe to include |
134 | * as a subelement in a larger XML stream. Namespace and XML Schema |
135 | * references are included. |
136 | * |
137 | * Output will be encoded in UTF-8. |
138 | * |
139 | * @return string |
140 | */ |
141 | public function openStream() { |
142 | $ver = $this->schemaVersion; |
143 | return Xml::element( 'mediawiki', [ |
144 | 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", |
145 | 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", |
146 | /* |
147 | * When a new version of the schema is created, it needs staging on mediawiki.org. |
148 | * This requires a change in the operations/mediawiki-config git repo. |
149 | * |
150 | * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which |
151 | * you copy in the new xsd file. |
152 | * |
153 | * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging. |
154 | * echo "https://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki |
155 | */ |
156 | 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . |
157 | "http://www.mediawiki.org/xml/export-$ver.xsd", |
158 | 'version' => $ver, |
159 | 'xml:lang' => MediaWikiServices::getInstance()->getContentLanguage()->getHtmlCode() ], |
160 | null ) . |
161 | "\n" . |
162 | $this->siteInfo(); |
163 | } |
164 | |
165 | /** |
166 | * @return string |
167 | */ |
168 | private function siteInfo() { |
169 | $info = [ |
170 | $this->sitename(), |
171 | $this->dbname(), |
172 | $this->homelink(), |
173 | $this->generator(), |
174 | $this->caseSetting(), |
175 | $this->namespaces() ]; |
176 | return " <siteinfo>\n " . |
177 | implode( "\n ", $info ) . |
178 | "\n </siteinfo>\n"; |
179 | } |
180 | |
181 | /** |
182 | * @return string |
183 | */ |
184 | private function sitename() { |
185 | $sitename = MediaWikiServices::getInstance()->getMainConfig()->get( |
186 | MainConfigNames::Sitename ); |
187 | return Xml::element( 'sitename', [], $sitename ); |
188 | } |
189 | |
190 | /** |
191 | * @return string |
192 | */ |
193 | private function dbname() { |
194 | $dbname = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DBname ); |
195 | return Xml::element( 'dbname', [], $dbname ); |
196 | } |
197 | |
198 | /** |
199 | * @return string |
200 | */ |
201 | private function generator() { |
202 | return Xml::element( 'generator', [], 'MediaWiki ' . MW_VERSION ); |
203 | } |
204 | |
205 | /** |
206 | * @return string |
207 | */ |
208 | private function homelink() { |
209 | return Xml::element( 'base', [], Title::newMainPage()->getCanonicalURL() ); |
210 | } |
211 | |
212 | /** |
213 | * @return string |
214 | */ |
215 | private function caseSetting() { |
216 | $capitalLinks = MediaWikiServices::getInstance()->getMainConfig()->get( |
217 | MainConfigNames::CapitalLinks ); |
218 | // "case-insensitive" option is reserved for future |
219 | $sensitivity = $capitalLinks ? 'first-letter' : 'case-sensitive'; |
220 | return Xml::element( 'case', [], $sensitivity ); |
221 | } |
222 | |
223 | /** |
224 | * @return string |
225 | */ |
226 | private function namespaces() { |
227 | $spaces = "<namespaces>\n"; |
228 | $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo(); |
229 | foreach ( |
230 | MediaWikiServices::getInstance()->getContentLanguage()->getFormattedNamespaces() |
231 | as $ns => $title |
232 | ) { |
233 | $spaces .= ' ' . |
234 | Xml::element( 'namespace', |
235 | [ |
236 | 'key' => $ns, |
237 | 'case' => $nsInfo->isCapitalized( $ns ) |
238 | ? 'first-letter' : 'case-sensitive', |
239 | ], $title ) . "\n"; |
240 | } |
241 | $spaces .= " </namespaces>"; |
242 | return $spaces; |
243 | } |
244 | |
245 | /** |
246 | * Closes the output stream with the closing root element. |
247 | * Call when finished dumping things. |
248 | * |
249 | * @return string |
250 | */ |
251 | public function closeStream() { |
252 | return "</mediawiki>\n"; |
253 | } |
254 | |
255 | /** |
256 | * Opens a "<page>" section on the output stream, with data |
257 | * from the given database row. |
258 | * |
259 | * @param stdClass $row |
260 | * @return string |
261 | */ |
262 | public function openPage( $row ) { |
263 | $out = " <page>\n"; |
264 | $this->currentTitle = Title::newFromRow( $row ); |
265 | $canonicalTitle = self::canonicalTitle( $this->currentTitle ); |
266 | $out .= ' ' . Xml::elementClean( 'title', [], $canonicalTitle ) . "\n"; |
267 | $out .= ' ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n"; |
268 | $out .= ' ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n"; |
269 | if ( $row->page_is_redirect ) { |
270 | $services = MediaWikiServices::getInstance(); |
271 | $page = $services->getWikiPageFactory()->newFromTitle( $this->currentTitle ); |
272 | $redirectStore = $services->getRedirectStore(); |
273 | $redirect = $this->invokeLenient( |
274 | static function () use ( $page, $redirectStore ) { |
275 | return $redirectStore->getRedirectTarget( $page ); |
276 | }, |
277 | 'Failed to get redirect target of page ' . $page->getId() |
278 | ); |
279 | $redirect = Title::castFromLinkTarget( $redirect ); |
280 | if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) { |
281 | $out .= ' '; |
282 | $out .= Xml::element( 'redirect', [ 'title' => self::canonicalTitle( $redirect ) ] ); |
283 | $out .= "\n"; |
284 | } |
285 | } |
286 | $this->hookRunner->onXmlDumpWriterOpenPage( $this, $out, $row, $this->currentTitle ); |
287 | |
288 | return $out; |
289 | } |
290 | |
291 | /** |
292 | * Closes a "<page>" section on the output stream. |
293 | * |
294 | * @internal |
295 | * @return string |
296 | */ |
297 | public function closePage() { |
298 | if ( $this->currentTitle !== null ) { |
299 | $linkCache = MediaWikiServices::getInstance()->getLinkCache(); |
300 | // In rare cases, link cache has the same key for some pages which |
301 | // might be read as part of the same batch. T220424 and T220316 |
302 | $linkCache->clearLink( $this->currentTitle ); |
303 | } |
304 | return " </page>\n"; |
305 | } |
306 | |
307 | /** |
308 | * @return RevisionStore |
309 | */ |
310 | private function getRevisionStore() { |
311 | return MediaWikiServices::getInstance()->getRevisionStore(); |
312 | } |
313 | |
314 | /** |
315 | * @return SqlBlobStore |
316 | */ |
317 | private function getBlobStore() { |
318 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
319 | return MediaWikiServices::getInstance()->getBlobStore(); |
320 | } |
321 | |
322 | /** |
323 | * Invokes the given callback, catching and logging any exceptions. |
324 | * |
325 | * @param callable $callback |
326 | * @param string $warning The warning to output in case of a storage related exception. |
327 | * |
328 | * @return mixed Returns the method's return value, or null in case of an exception. |
329 | * @throws Exception |
330 | */ |
331 | private function invokeLenient( $callback, $warning ) { |
332 | try { |
333 | return $callback(); |
334 | } catch ( SuppressedDataException $ex ) { |
335 | return null; |
336 | } catch ( MWException | RuntimeException | InvalidArgumentException | ErrorException $ex ) { |
337 | MWDebug::warning( $warning . ': ' . $ex->getMessage() ); |
338 | return null; |
339 | } |
340 | } |
341 | |
342 | /** |
343 | * Dumps a "<revision>" section on the output stream, with |
344 | * data filled in from the given database row. |
345 | * |
346 | * @param stdClass $row |
347 | * @param null|stdClass[] $slotRows |
348 | * |
349 | * @return string |
350 | * @throws RevisionAccessException |
351 | */ |
352 | public function writeRevision( $row, $slotRows = null ) { |
353 | $rev = $this->getRevisionStore()->newRevisionFromRowAndSlots( |
354 | $row, |
355 | $slotRows, |
356 | 0, |
357 | $this->currentTitle |
358 | ); |
359 | |
360 | $out = " <revision>\n"; |
361 | $out .= " " . Xml::element( 'id', null, strval( $rev->getId() ) ) . "\n"; |
362 | |
363 | if ( $rev->getParentId() ) { |
364 | $out .= " " . Xml::element( 'parentid', null, strval( $rev->getParentId() ) ) . "\n"; |
365 | } |
366 | |
367 | $out .= $this->writeTimestamp( $rev->getTimestamp() ); |
368 | |
369 | if ( $rev->isDeleted( RevisionRecord::DELETED_USER ) ) { |
370 | $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
371 | } else { |
372 | // empty values get written out as uid 0, see T224221 |
373 | $user = $rev->getUser(); |
374 | $out .= $this->writeContributor( |
375 | $user ? $user->getId() : 0, |
376 | $user ? $user->getName() : '' |
377 | ); |
378 | } |
379 | |
380 | if ( $rev->isMinor() ) { |
381 | $out .= " <minor/>\n"; |
382 | } |
383 | if ( $rev->isDeleted( RevisionRecord::DELETED_COMMENT ) ) { |
384 | $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n"; |
385 | } else { |
386 | if ( $rev->getComment()->text != '' ) { |
387 | $out .= " " |
388 | . Xml::elementClean( 'comment', [], strval( $rev->getComment()->text ) ) |
389 | . "\n"; |
390 | } |
391 | } |
392 | |
393 | $contentMode = $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ? self::WRITE_STUB_DELETED |
394 | : $this->contentMode; |
395 | |
396 | $slots = $rev->getSlots()->getSlots(); |
397 | |
398 | // use predictable order, put main slot first |
399 | ksort( $slots ); |
400 | $out .= $this->writeSlot( $slots[SlotRecord::MAIN], $contentMode ); |
401 | |
402 | foreach ( $slots as $role => $slot ) { |
403 | if ( $role === SlotRecord::MAIN ) { |
404 | continue; |
405 | } |
406 | $out .= $this->writeSlot( $slot, $contentMode ); |
407 | } |
408 | |
409 | if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) { |
410 | $out .= " <sha1/>\n"; |
411 | } else { |
412 | $sha1 = $this->invokeLenient( |
413 | static function () use ( $rev ) { |
414 | return $rev->getSha1(); |
415 | }, |
416 | 'failed to determine sha1 for revision ' . $rev->getId() |
417 | ); |
418 | $out .= " " . Xml::element( 'sha1', null, strval( $sha1 ) ) . "\n"; |
419 | } |
420 | |
421 | $text = ''; |
422 | if ( $contentMode === self::WRITE_CONTENT ) { |
423 | /** @var Content $content */ |
424 | $content = $this->invokeLenient( |
425 | static function () use ( $rev ) { |
426 | return $rev->getMainContentRaw(); |
427 | }, |
428 | 'Failed to load main slot content of revision ' . $rev->getId() |
429 | ); |
430 | |
431 | $text = $content ? $content->serialize() : ''; |
432 | } |
433 | $this->hookRunner->onXmlDumpWriterWriteRevision( $this, $out, $row, $text, $rev ); |
434 | |
435 | $out .= " </revision>\n"; |
436 | |
437 | return $out; |
438 | } |
439 | |
440 | /** |
441 | * @param SlotRecord $slot |
442 | * @param int $contentMode see the WRITE_XXX constants |
443 | * |
444 | * @return string |
445 | */ |
446 | private function writeSlot( SlotRecord $slot, $contentMode ) { |
447 | $isMain = $slot->getRole() === SlotRecord::MAIN; |
448 | $isV11 = $this->schemaVersion >= XML_DUMP_SCHEMA_VERSION_11; |
449 | |
450 | if ( !$isV11 && !$isMain ) { |
451 | // ignore extra slots |
452 | return ''; |
453 | } |
454 | |
455 | $out = ''; |
456 | $indent = ' '; |
457 | |
458 | if ( !$isMain ) { |
459 | // non-main slots are wrapped into an additional element. |
460 | $out .= ' ' . Xml::openElement( 'content' ) . "\n"; |
461 | $indent .= ' '; |
462 | $out .= $indent . Xml::element( 'role', null, strval( $slot->getRole() ) ) . "\n"; |
463 | } |
464 | |
465 | if ( $isV11 ) { |
466 | $out .= $indent . Xml::element( 'origin', null, strval( $slot->getOrigin() ) ) . "\n"; |
467 | } |
468 | |
469 | $contentModel = $slot->getModel(); |
470 | $contentHandler = MediaWikiServices::getInstance() |
471 | ->getContentHandlerFactory() |
472 | ->getContentHandler( $contentModel ); |
473 | $contentFormat = $contentHandler->getDefaultFormat(); |
474 | |
475 | // XXX: The content format is only relevant when actually outputting serialized content. |
476 | // It should probably be an attribute on the text tag. |
477 | $out .= $indent . Xml::element( 'model', null, strval( $contentModel ) ) . "\n"; |
478 | $out .= $indent . Xml::element( 'format', null, strval( $contentFormat ) ) . "\n"; |
479 | |
480 | $textAttributes = [ |
481 | 'bytes' => $this->invokeLenient( |
482 | static function () use ( $slot ) { |
483 | return $slot->getSize(); |
484 | }, |
485 | 'failed to determine size for slot ' . $slot->getRole() . ' of revision ' |
486 | . $slot->getRevision() |
487 | ) ?: '0' |
488 | ]; |
489 | |
490 | if ( $isV11 ) { |
491 | $textAttributes['sha1'] = $this->invokeLenient( |
492 | static function () use ( $slot ) { |
493 | return $slot->getSha1(); |
494 | }, |
495 | 'failed to determine sha1 for slot ' . $slot->getRole() . ' of revision ' |
496 | . $slot->getRevision() |
497 | ) ?: ''; |
498 | } |
499 | |
500 | if ( $contentMode === self::WRITE_CONTENT ) { |
501 | $content = $this->invokeLenient( |
502 | static function () use ( $slot ) { |
503 | return $slot->getContent(); |
504 | }, |
505 | 'failed to load content for slot ' . $slot->getRole() . ' of revision ' |
506 | . $slot->getRevision() |
507 | ); |
508 | |
509 | if ( $content === null ) { |
510 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
511 | } else { |
512 | $out .= $this->writeText( $content, $textAttributes, $indent ); |
513 | } |
514 | } elseif ( $contentMode === self::WRITE_STUB_DELETED ) { |
515 | // write <text> placeholder tag |
516 | $textAttributes['deleted'] = 'deleted'; |
517 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
518 | } else { |
519 | // write <text> stub tag |
520 | if ( $isV11 ) { |
521 | $textAttributes['location'] = $slot->getAddress(); |
522 | } |
523 | $schema = null; |
524 | |
525 | if ( $isMain ) { |
526 | // Output the numerical text ID if possible, for backwards compatibility. |
527 | // Note that this is currently the ONLY reason we have a BlobStore here at all. |
528 | // When removing this line, check whether the BlobStore has become unused. |
529 | try { |
530 | // NOTE: this will only work for addresses of the form "tt:12345" or "es:DB://cluster1/1234". |
531 | // If we want to support other kinds of addresses in the future, |
532 | // we will have to silently ignore failures here. |
533 | // For now, this fails for "tt:0", which is present in the WMF production |
534 | // database as of July 2019, due to data corruption. |
535 | [ $schema, $textId ] = $this->getBlobStore()->splitBlobAddress( $slot->getAddress() ); |
536 | } catch ( InvalidArgumentException $ex ) { |
537 | MWDebug::warning( 'Bad content address for slot ' . $slot->getRole() |
538 | . ' of revision ' . $slot->getRevision() . ': ' . $ex->getMessage() ); |
539 | $textId = 0; |
540 | } |
541 | |
542 | if ( $schema === 'tt' ) { |
543 | $textAttributes['id'] = $textId; |
544 | } elseif ( $schema === 'es' ) { |
545 | $textAttributes['id'] = bin2hex( $textId ); |
546 | } |
547 | } |
548 | |
549 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
550 | } |
551 | |
552 | if ( !$isMain ) { |
553 | $out .= ' ' . Xml::closeElement( 'content' ) . "\n"; |
554 | } |
555 | |
556 | return $out; |
557 | } |
558 | |
559 | /** |
560 | * @param Content $content |
561 | * @param string[] $textAttributes |
562 | * @param string $indent |
563 | * |
564 | * @return string |
565 | */ |
566 | private function writeText( Content $content, $textAttributes, $indent ) { |
567 | $contentHandler = $content->getContentHandler(); |
568 | $contentFormat = $contentHandler->getDefaultFormat(); |
569 | |
570 | if ( $content instanceof TextContent ) { |
571 | // HACK: For text based models, bypass the serialization step. This allows extensions (like Flow) |
572 | // that use incompatible combinations of serialization format and content model. |
573 | $data = $content->getText(); |
574 | } else { |
575 | $data = $content->serialize( $contentFormat ); |
576 | } |
577 | |
578 | $data = $contentHandler->exportTransform( $data, $contentFormat ); |
579 | // make sure to use the actual size |
580 | $textAttributes['bytes'] = strlen( $data ); |
581 | $textAttributes['xml:space'] = 'preserve'; |
582 | return $indent . Xml::elementClean( 'text', $textAttributes, strval( $data ) ) . "\n"; |
583 | } |
584 | |
585 | /** |
586 | * Dumps a "<logitem>" section on the output stream, with |
587 | * data filled in from the given database row. |
588 | * |
589 | * @param stdClass $row |
590 | * @return string |
591 | */ |
592 | public function writeLogItem( $row ) { |
593 | $out = " <logitem>\n"; |
594 | $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n"; |
595 | |
596 | $out .= $this->writeTimestamp( $row->log_timestamp, " " ); |
597 | |
598 | if ( $row->log_deleted & LogPage::DELETED_USER ) { |
599 | $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
600 | } else { |
601 | $out .= $this->writeContributor( $row->actor_user, $row->actor_name, " " ); |
602 | } |
603 | |
604 | if ( $row->log_deleted & LogPage::DELETED_COMMENT ) { |
605 | $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n"; |
606 | } else { |
607 | $comment = $this->commentStore->getComment( 'log_comment', $row )->text; |
608 | if ( $comment != '' ) { |
609 | $out .= " " . Xml::elementClean( 'comment', null, strval( $comment ) ) . "\n"; |
610 | } |
611 | } |
612 | |
613 | $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n"; |
614 | $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n"; |
615 | |
616 | if ( $row->log_deleted & LogPage::DELETED_ACTION ) { |
617 | $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n"; |
618 | } else { |
619 | $title = Title::makeTitle( $row->log_namespace, $row->log_title ); |
620 | $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n"; |
621 | $out .= " " . Xml::elementClean( 'params', |
622 | [ 'xml:space' => 'preserve' ], |
623 | strval( $row->log_params ) ) . "\n"; |
624 | } |
625 | |
626 | $out .= " </logitem>\n"; |
627 | |
628 | return $out; |
629 | } |
630 | |
631 | /** |
632 | * @param string $timestamp |
633 | * @param string $indent Default to six spaces |
634 | * @return string |
635 | */ |
636 | public function writeTimestamp( $timestamp, $indent = " " ) { |
637 | $ts = wfTimestamp( TS_ISO_8601, $timestamp ); |
638 | return $indent . Xml::element( 'timestamp', null, $ts ) . "\n"; |
639 | } |
640 | |
641 | /** |
642 | * @param int $id |
643 | * @param string $text |
644 | * @param string $indent Default to six spaces |
645 | * @return string |
646 | */ |
647 | public function writeContributor( $id, $text, $indent = " " ) { |
648 | $out = $indent . "<contributor>\n"; |
649 | if ( $id || !IPUtils::isValid( $text ) ) { |
650 | $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n"; |
651 | $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n"; |
652 | } else { |
653 | $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n"; |
654 | } |
655 | $out .= $indent . "</contributor>\n"; |
656 | return $out; |
657 | } |
658 | |
659 | /** |
660 | * Warning! This data is potentially inconsistent. :( |
661 | * @param stdClass $row |
662 | * @param bool $dumpContents |
663 | * @return string |
664 | */ |
665 | public function writeUploads( $row, $dumpContents = false ) { |
666 | if ( $row->page_namespace == NS_FILE ) { |
667 | $img = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo() |
668 | ->newFile( $row->page_title ); |
669 | if ( $img && $img->exists() ) { |
670 | $out = ''; |
671 | foreach ( array_reverse( $img->getHistory() ) as $ver ) { |
672 | $out .= $this->writeUpload( $ver, $dumpContents ); |
673 | } |
674 | $out .= $this->writeUpload( $img, $dumpContents ); |
675 | return $out; |
676 | } |
677 | } |
678 | return ''; |
679 | } |
680 | |
681 | /** |
682 | * @param File $file |
683 | * @param bool $dumpContents |
684 | * @return string |
685 | */ |
686 | private function writeUpload( $file, $dumpContents = false ) { |
687 | if ( $file->isOld() ) { |
688 | /** @var OldLocalFile $file */ |
689 | '@phan-var OldLocalFile $file'; |
690 | $archiveName = " " . |
691 | Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n"; |
692 | } else { |
693 | $archiveName = ''; |
694 | } |
695 | if ( $dumpContents ) { |
696 | $be = $file->getRepo()->getBackend(); |
697 | # Dump file as base64 |
698 | # Uses only XML-safe characters, so does not need escaping |
699 | # @todo Too bad this loads the contents into memory (script might swap) |
700 | $contents = ' <contents encoding="base64">' . |
701 | chunk_split( base64_encode( |
702 | $be->getFileContents( [ 'src' => $file->getPath() ] ) ) ) . |
703 | " </contents>\n"; |
704 | } else { |
705 | $contents = ''; |
706 | } |
707 | $uploader = $file->getUploader( File::FOR_PUBLIC ); |
708 | if ( $uploader ) { |
709 | $uploader = $this->writeContributor( $uploader->getId(), $uploader->getName() ); |
710 | } else { |
711 | $uploader = Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
712 | } |
713 | $comment = $file->getDescription( File::FOR_PUBLIC ); |
714 | if ( ( $comment ?? '' ) !== '' ) { |
715 | $comment = Xml::elementClean( 'comment', null, $comment ); |
716 | } else { |
717 | $comment = Xml::element( 'comment', [ 'deleted' => 'deleted' ] ); |
718 | } |
719 | return " <upload>\n" . |
720 | $this->writeTimestamp( $file->getTimestamp() ) . |
721 | $uploader . |
722 | " " . $comment . "\n" . |
723 | " " . Xml::element( 'filename', null, $file->getName() ) . "\n" . |
724 | $archiveName . |
725 | " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" . |
726 | " " . Xml::element( 'size', null, (string)( $file->getSize() ?: 0 ) ) . "\n" . |
727 | " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" . |
728 | " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" . |
729 | $contents . |
730 | " </upload>\n"; |
731 | } |
732 | |
733 | /** |
734 | * Return prefixed text form of title, but using the content language's |
735 | * canonical namespace. This skips any special-casing such as gendered |
736 | * user namespaces -- which while useful, are not yet listed in the |
737 | * XML "<siteinfo>" data so are unsafe in export. |
738 | * |
739 | * @param Title $title |
740 | * @return string |
741 | * @since 1.18 |
742 | */ |
743 | public static function canonicalTitle( Title $title ) { |
744 | if ( $title->isExternal() ) { |
745 | return $title->getPrefixedText(); |
746 | } |
747 | |
748 | $prefix = MediaWikiServices::getInstance()->getContentLanguage()-> |
749 | getFormattedNsText( $title->getNamespace() ); |
750 | |
751 | // @todo Emit some kind of warning to the user if $title->getNamespace() !== |
752 | // NS_MAIN and $prefix === '' (viz. pages in an unregistered namespace) |
753 | |
754 | if ( $prefix !== '' ) { |
755 | $prefix .= ':'; |
756 | } |
757 | |
758 | return $prefix . $title->getText(); |
759 | } |
760 | } |