Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
61.02% |
191 / 313 |
|
54.17% |
13 / 24 |
CRAP | |
0.00% |
0 / 1 |
| XmlDumpWriter | |
61.22% |
191 / 312 |
|
54.17% |
13 / 24 |
463.70 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
1 | |||
| openStream | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
1 | |||
| siteInfo | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
| sitename | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| dbname | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| generator | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| homelink | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| caseSetting | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| namespaces | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
3 | |||
| closeStream | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| openPage | |
39.13% |
9 / 23 |
|
0.00% |
0 / 1 |
7.61 | |||
| closePage | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| getRevisionStore | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getBlobStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| invokeLenient | |
16.67% |
1 / 6 |
|
0.00% |
0 / 1 |
8.21 | |||
| writeRevision | |
89.29% |
50 / 56 |
|
0.00% |
0 / 1 |
14.24 | |||
| writeSlot | |
61.11% |
44 / 72 |
|
0.00% |
0 / 1 |
37.06 | |||
| writeText | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
2.01 | |||
| writeLogItem | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
30 | |||
| writeTimestamp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| writeContributor | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
| writeUploads | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
| writeUpload | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
42 | |||
| canonicalTitle | |
71.43% |
5 / 7 |
|
0.00% |
0 / 1 |
3.21 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * XmlDumpWriter |
| 4 | * |
| 5 | * Copyright © 2003, 2005, 2006 Brooke Vibber <bvibber@wikimedia.org> |
| 6 | * https://www.mediawiki.org/ |
| 7 | * |
| 8 | * @license GPL-2.0-or-later |
| 9 | * @file |
| 10 | */ |
| 11 | |
| 12 | namespace MediaWiki\Export; |
| 13 | |
| 14 | use ErrorException; |
| 15 | use InvalidArgumentException; |
| 16 | use MediaWiki\CommentStore\CommentStore; |
| 17 | use MediaWiki\Content\Content; |
| 18 | use MediaWiki\Content\ContentHandler; |
| 19 | use MediaWiki\Content\TextContent; |
| 20 | use MediaWiki\Debug\MWDebug; |
| 21 | use MediaWiki\Exception\MWException; |
| 22 | use MediaWiki\Exception\MWUnknownContentModelException; |
| 23 | use MediaWiki\FileRepo\File\File; |
| 24 | use MediaWiki\FileRepo\File\OldLocalFile; |
| 25 | use MediaWiki\HookContainer\HookContainer; |
| 26 | use MediaWiki\HookContainer\HookRunner; |
| 27 | use MediaWiki\Logging\LogPage; |
| 28 | use MediaWiki\MainConfigNames; |
| 29 | use MediaWiki\MediaWikiServices; |
| 30 | use MediaWiki\Revision\RevisionAccessException; |
| 31 | use MediaWiki\Revision\RevisionRecord; |
| 32 | use MediaWiki\Revision\RevisionStore; |
| 33 | use MediaWiki\Revision\SlotRecord; |
| 34 | use MediaWiki\Revision\SuppressedDataException; |
| 35 | use MediaWiki\Storage\SqlBlobStore; |
| 36 | use MediaWiki\Title\Title; |
| 37 | use MediaWiki\Xml\Xml; |
| 38 | use RuntimeException; |
| 39 | use Wikimedia\Assert\Assert; |
| 40 | use Wikimedia\IPUtils; |
| 41 | use Wikimedia\Timestamp\TimestampFormat as TS; |
| 42 | |
| 43 | /** |
| 44 | * @ingroup Dump |
| 45 | */ |
| 46 | class XmlDumpWriter { |
| 47 | |
| 48 | /** Output serialized revision content. */ |
| 49 | public const WRITE_CONTENT = 0; |
| 50 | |
| 51 | /** Only output subs for revision content. */ |
| 52 | public const WRITE_STUB = 1; |
| 53 | |
| 54 | /** |
| 55 | * Only output subs for revision content, indicating that the content has been |
| 56 | * deleted/suppressed. |
| 57 | */ |
| 58 | private const WRITE_STUB_DELETED = 2; |
| 59 | |
| 60 | /** |
| 61 | * @var string[] the schema versions supported for output |
| 62 | * @final |
| 63 | */ |
| 64 | public static $supportedSchemas = [ |
| 65 | XML_DUMP_SCHEMA_VERSION_10, |
| 66 | XML_DUMP_SCHEMA_VERSION_11 |
| 67 | ]; |
| 68 | |
| 69 | /** |
| 70 | * @var string which schema version the generated XML should comply to. |
| 71 | * One of the values from self::$supportedSchemas, using the SCHEMA_VERSION_XX |
| 72 | * constants. |
| 73 | */ |
| 74 | private $schemaVersion; |
| 75 | |
| 76 | /** |
| 77 | * Title of the currently processed page |
| 78 | * |
| 79 | * @var Title|null |
| 80 | */ |
| 81 | private $currentTitle = null; |
| 82 | |
| 83 | /** |
| 84 | * @var int Whether to output revision content or just stubs. WRITE_CONTENT or WRITE_STUB. |
| 85 | */ |
| 86 | private $contentMode; |
| 87 | |
| 88 | /** @var HookRunner */ |
| 89 | private $hookRunner; |
| 90 | |
| 91 | /** @var CommentStore */ |
| 92 | private $commentStore; |
| 93 | |
| 94 | /** |
| 95 | * @param int $contentMode WRITE_CONTENT or WRITE_STUB. |
| 96 | * @param string $schemaVersion which schema version the generated XML should comply to. |
| 97 | * One of the values from self::$supportedSchemas, using the XML_DUMP_SCHEMA_VERSION_XX |
| 98 | * constants. |
| 99 | * @param HookContainer|null $hookContainer |
| 100 | * @param CommentStore|null $commentStore |
| 101 | */ |
| 102 | public function __construct( |
| 103 | $contentMode = self::WRITE_CONTENT, |
| 104 | $schemaVersion = XML_DUMP_SCHEMA_VERSION_11, |
| 105 | ?HookContainer $hookContainer = null, |
| 106 | ?CommentStore $commentStore = null |
| 107 | ) { |
| 108 | Assert::parameter( |
| 109 | in_array( $contentMode, [ self::WRITE_CONTENT, self::WRITE_STUB ], true ), |
| 110 | '$contentMode', |
| 111 | 'must be one of the following constants: WRITE_CONTENT or WRITE_STUB.' |
| 112 | ); |
| 113 | |
| 114 | Assert::parameter( |
| 115 | in_array( $schemaVersion, self::$supportedSchemas, true ), |
| 116 | '$schemaVersion', |
| 117 | 'must be one of the following schema versions: ' |
| 118 | . implode( ',', self::$supportedSchemas ) |
| 119 | ); |
| 120 | |
| 121 | $this->contentMode = $contentMode; |
| 122 | $this->schemaVersion = $schemaVersion; |
| 123 | $this->hookRunner = new HookRunner( |
| 124 | $hookContainer ?? MediaWikiServices::getInstance()->getHookContainer() |
| 125 | ); |
| 126 | $this->commentStore = $commentStore ?? MediaWikiServices::getInstance()->getCommentStore(); |
| 127 | } |
| 128 | |
| 129 | /** |
| 130 | * Opens the XML output stream's root "<mediawiki>" element. |
| 131 | * This does not include an xml directive, so is safe to include |
| 132 | * as a subelement in a larger XML stream. Namespace and XML Schema |
| 133 | * references are included. |
| 134 | * |
| 135 | * Output will be encoded in UTF-8. |
| 136 | * |
| 137 | * @return string |
| 138 | */ |
| 139 | public function openStream() { |
| 140 | $ver = $this->schemaVersion; |
| 141 | return Xml::element( 'mediawiki', [ |
| 142 | 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", |
| 143 | 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", |
| 144 | /* |
| 145 | * When a new version of the schema is created, it needs staging on mediawiki.org. |
| 146 | * This requires a change in the operations/mediawiki-config git repo. |
| 147 | * |
| 148 | * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which |
| 149 | * you copy in the new xsd file. |
| 150 | * |
| 151 | * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging. |
| 152 | * echo "https://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki |
| 153 | */ |
| 154 | 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . |
| 155 | "http://www.mediawiki.org/xml/export-$ver.xsd", |
| 156 | 'version' => $ver, |
| 157 | 'xml:lang' => MediaWikiServices::getInstance()->getContentLanguage()->getHtmlCode() ], |
| 158 | null ) . |
| 159 | "\n" . |
| 160 | $this->siteInfo(); |
| 161 | } |
| 162 | |
| 163 | /** |
| 164 | * @return string |
| 165 | */ |
| 166 | private function siteInfo() { |
| 167 | $info = [ |
| 168 | $this->sitename(), |
| 169 | $this->dbname(), |
| 170 | $this->homelink(), |
| 171 | $this->generator(), |
| 172 | $this->caseSetting(), |
| 173 | $this->namespaces() ]; |
| 174 | return " <siteinfo>\n " . |
| 175 | implode( "\n ", $info ) . |
| 176 | "\n </siteinfo>\n"; |
| 177 | } |
| 178 | |
| 179 | /** |
| 180 | * @return string |
| 181 | */ |
| 182 | private function sitename() { |
| 183 | $sitename = MediaWikiServices::getInstance()->getMainConfig()->get( |
| 184 | MainConfigNames::Sitename ); |
| 185 | return Xml::element( 'sitename', [], $sitename ); |
| 186 | } |
| 187 | |
| 188 | /** |
| 189 | * @return string |
| 190 | */ |
| 191 | private function dbname() { |
| 192 | $dbname = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DBname ); |
| 193 | return Xml::element( 'dbname', [], $dbname ); |
| 194 | } |
| 195 | |
| 196 | /** |
| 197 | * @return string |
| 198 | */ |
| 199 | private function generator() { |
| 200 | return Xml::element( 'generator', [], 'MediaWiki ' . MW_VERSION ); |
| 201 | } |
| 202 | |
| 203 | /** |
| 204 | * @return string |
| 205 | */ |
| 206 | private function homelink() { |
| 207 | return Xml::element( 'base', [], Title::newMainPage()->getCanonicalURL() ); |
| 208 | } |
| 209 | |
| 210 | /** |
| 211 | * @return string |
| 212 | */ |
| 213 | private function caseSetting() { |
| 214 | $capitalLinks = MediaWikiServices::getInstance()->getMainConfig()->get( |
| 215 | MainConfigNames::CapitalLinks ); |
| 216 | // "case-insensitive" option is reserved for future |
| 217 | $sensitivity = $capitalLinks ? 'first-letter' : 'case-sensitive'; |
| 218 | return Xml::element( 'case', [], $sensitivity ); |
| 219 | } |
| 220 | |
| 221 | /** |
| 222 | * @return string |
| 223 | */ |
| 224 | private function namespaces() { |
| 225 | $spaces = "<namespaces>\n"; |
| 226 | $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo(); |
| 227 | foreach ( |
| 228 | MediaWikiServices::getInstance()->getContentLanguage()->getFormattedNamespaces() |
| 229 | as $ns => $title |
| 230 | ) { |
| 231 | $spaces .= ' ' . |
| 232 | Xml::element( 'namespace', |
| 233 | [ |
| 234 | 'key' => $ns, |
| 235 | 'case' => $nsInfo->isCapitalized( $ns ) |
| 236 | ? 'first-letter' : 'case-sensitive', |
| 237 | ], $title ) . "\n"; |
| 238 | } |
| 239 | $spaces .= " </namespaces>"; |
| 240 | return $spaces; |
| 241 | } |
| 242 | |
| 243 | /** |
| 244 | * Closes the output stream with the closing root element. |
| 245 | * Call when finished dumping things. |
| 246 | * |
| 247 | * @return string |
| 248 | */ |
| 249 | public function closeStream() { |
| 250 | return "</mediawiki>\n"; |
| 251 | } |
| 252 | |
| 253 | /** |
| 254 | * Opens a "<page>" section on the output stream, with data |
| 255 | * from the given database row. |
| 256 | * |
| 257 | * @param \stdClass $row |
| 258 | * @return string |
| 259 | */ |
| 260 | public function openPage( $row ) { |
| 261 | $out = " <page>\n"; |
| 262 | $this->currentTitle = Title::newFromRow( $row ); |
| 263 | $canonicalTitle = self::canonicalTitle( $this->currentTitle ); |
| 264 | $out .= ' ' . Xml::elementClean( 'title', [], $canonicalTitle ) . "\n"; |
| 265 | $out .= ' ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n"; |
| 266 | $out .= ' ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n"; |
| 267 | if ( $row->page_is_redirect ) { |
| 268 | $services = MediaWikiServices::getInstance(); |
| 269 | $page = $services->getWikiPageFactory()->newFromTitle( $this->currentTitle ); |
| 270 | $redirectStore = $services->getRedirectStore(); |
| 271 | $redirect = $this->invokeLenient( |
| 272 | static function () use ( $page, $redirectStore ) { |
| 273 | return $redirectStore->getRedirectTarget( $page ); |
| 274 | }, |
| 275 | 'Failed to get redirect target of page ' . $page->getId() |
| 276 | ); |
| 277 | $redirect = Title::castFromLinkTarget( $redirect ); |
| 278 | if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) { |
| 279 | $out .= ' '; |
| 280 | $out .= Xml::element( 'redirect', [ 'title' => self::canonicalTitle( $redirect ) ] ); |
| 281 | $out .= "\n"; |
| 282 | } |
| 283 | } |
| 284 | $this->hookRunner->onXmlDumpWriterOpenPage( $this, $out, $row, $this->currentTitle ); |
| 285 | |
| 286 | return $out; |
| 287 | } |
| 288 | |
| 289 | /** |
| 290 | * Closes a "<page>" section on the output stream. |
| 291 | * |
| 292 | * @internal |
| 293 | * @return string |
| 294 | */ |
| 295 | public function closePage() { |
| 296 | if ( $this->currentTitle !== null ) { |
| 297 | $linkCache = MediaWikiServices::getInstance()->getLinkCache(); |
| 298 | // In rare cases, link cache has the same key for some pages which |
| 299 | // might be read as part of the same batch. T220424 and T220316 |
| 300 | $linkCache->clearLink( $this->currentTitle ); |
| 301 | } |
| 302 | return " </page>\n"; |
| 303 | } |
| 304 | |
| 305 | /** |
| 306 | * @return RevisionStore |
| 307 | */ |
| 308 | private function getRevisionStore() { |
| 309 | return MediaWikiServices::getInstance()->getRevisionStore(); |
| 310 | } |
| 311 | |
| 312 | /** |
| 313 | * @return SqlBlobStore |
| 314 | */ |
| 315 | private function getBlobStore() { |
| 316 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
| 317 | return MediaWikiServices::getInstance()->getBlobStore(); |
| 318 | } |
| 319 | |
| 320 | /** |
| 321 | * Invokes the given callback, catching and logging any exceptions. |
| 322 | * |
| 323 | * @param callable $callback |
| 324 | * @param string $warning The warning to output in case of a storage related exception. |
| 325 | * |
| 326 | * @return mixed Returns the method's return value, or null in case of an exception. |
| 327 | * @throws \Exception |
| 328 | */ |
| 329 | private function invokeLenient( $callback, $warning ) { |
| 330 | try { |
| 331 | return $callback(); |
| 332 | } catch ( SuppressedDataException ) { |
| 333 | return null; |
| 334 | } catch ( MWException | RuntimeException | InvalidArgumentException | ErrorException $ex ) { |
| 335 | MWDebug::warning( $warning . ': ' . $ex->getMessage() ); |
| 336 | return null; |
| 337 | } |
| 338 | } |
| 339 | |
| 340 | /** |
| 341 | * Dumps a "<revision>" section on the output stream, with |
| 342 | * data filled in from the given database row. |
| 343 | * |
| 344 | * @param \stdClass $row |
| 345 | * @param null|\stdClass[] $slotRows |
| 346 | * |
| 347 | * @return string |
| 348 | * @throws RevisionAccessException |
| 349 | */ |
| 350 | public function writeRevision( $row, $slotRows = null ) { |
| 351 | $rev = $this->getRevisionStore()->newRevisionFromRowAndSlots( |
| 352 | $row, |
| 353 | $slotRows, |
| 354 | 0, |
| 355 | $this->currentTitle |
| 356 | ); |
| 357 | |
| 358 | $out = " <revision>\n"; |
| 359 | $out .= " " . Xml::element( 'id', null, strval( $rev->getId() ) ) . "\n"; |
| 360 | |
| 361 | if ( $rev->getParentId() ) { |
| 362 | $out .= " " . Xml::element( 'parentid', null, strval( $rev->getParentId() ) ) . "\n"; |
| 363 | } |
| 364 | |
| 365 | $out .= $this->writeTimestamp( $rev->getTimestamp() ); |
| 366 | |
| 367 | if ( $rev->isDeleted( RevisionRecord::DELETED_USER ) ) { |
| 368 | $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
| 369 | } else { |
| 370 | // empty values get written out as uid 0, see T224221 |
| 371 | $user = $rev->getUser(); |
| 372 | $out .= $this->writeContributor( |
| 373 | $user ? $user->getId() : 0, |
| 374 | $user ? $user->getName() : '' |
| 375 | ); |
| 376 | } |
| 377 | |
| 378 | if ( $rev->isMinor() ) { |
| 379 | $out .= " <minor/>\n"; |
| 380 | } |
| 381 | if ( $rev->isDeleted( RevisionRecord::DELETED_COMMENT ) ) { |
| 382 | $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n"; |
| 383 | } else { |
| 384 | if ( $rev->getComment()->text != '' ) { |
| 385 | $out .= " " |
| 386 | . Xml::elementClean( 'comment', [], strval( $rev->getComment()->text ) ) |
| 387 | . "\n"; |
| 388 | } |
| 389 | } |
| 390 | |
| 391 | $contentMode = $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ? self::WRITE_STUB_DELETED |
| 392 | : $this->contentMode; |
| 393 | |
| 394 | $slots = $rev->getSlots()->getSlots(); |
| 395 | |
| 396 | // use predictable order, put main slot first |
| 397 | ksort( $slots ); |
| 398 | $out .= $this->writeSlot( $slots[SlotRecord::MAIN], $contentMode ); |
| 399 | |
| 400 | foreach ( $slots as $role => $slot ) { |
| 401 | if ( $role === SlotRecord::MAIN ) { |
| 402 | continue; |
| 403 | } |
| 404 | $out .= $this->writeSlot( $slot, $contentMode ); |
| 405 | } |
| 406 | |
| 407 | if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) { |
| 408 | $out .= " <sha1/>\n"; |
| 409 | } else { |
| 410 | $sha1 = $this->invokeLenient( |
| 411 | static function () use ( $rev ) { |
| 412 | return $rev->getSha1(); |
| 413 | }, |
| 414 | 'failed to determine sha1 for revision ' . $rev->getId() |
| 415 | ); |
| 416 | $out .= " " . Xml::element( 'sha1', null, strval( $sha1 ) ) . "\n"; |
| 417 | } |
| 418 | |
| 419 | $text = ''; |
| 420 | if ( $contentMode === self::WRITE_CONTENT ) { |
| 421 | /** @var Content $content */ |
| 422 | $content = $this->invokeLenient( |
| 423 | static function () use ( $rev ) { |
| 424 | return $rev->getMainContentRaw(); |
| 425 | }, |
| 426 | 'Failed to load main slot content of revision ' . $rev->getId() |
| 427 | ); |
| 428 | |
| 429 | $text = $content ? $content->serialize() : ''; |
| 430 | } |
| 431 | $this->hookRunner->onXmlDumpWriterWriteRevision( $this, $out, $row, $text, $rev ); |
| 432 | |
| 433 | $out .= " </revision>\n"; |
| 434 | |
| 435 | return $out; |
| 436 | } |
| 437 | |
| 438 | /** |
| 439 | * @param SlotRecord $slot |
| 440 | * @param int $contentMode see the WRITE_XXX constants |
| 441 | * |
| 442 | * @return string |
| 443 | */ |
| 444 | private function writeSlot( SlotRecord $slot, $contentMode ) { |
| 445 | $isMain = $slot->getRole() === SlotRecord::MAIN; |
| 446 | $isV11 = $this->schemaVersion >= XML_DUMP_SCHEMA_VERSION_11; |
| 447 | |
| 448 | if ( !$isV11 && !$isMain ) { |
| 449 | // ignore extra slots |
| 450 | return ''; |
| 451 | } |
| 452 | |
| 453 | $out = ''; |
| 454 | $indent = ' '; |
| 455 | |
| 456 | if ( !$isMain ) { |
| 457 | // non-main slots are wrapped into an additional element. |
| 458 | $out .= ' ' . Xml::openElement( 'content' ) . "\n"; |
| 459 | $indent .= ' '; |
| 460 | $out .= $indent . Xml::element( 'role', null, strval( $slot->getRole() ) ) . "\n"; |
| 461 | } |
| 462 | |
| 463 | if ( $isV11 ) { |
| 464 | $out .= $indent . Xml::element( 'origin', null, strval( $slot->getOrigin() ) ) . "\n"; |
| 465 | } |
| 466 | |
| 467 | $contentModel = $slot->getModel(); |
| 468 | $contentHandlerFactory = MediaWikiServices::getInstance()->getContentHandlerFactory(); |
| 469 | $contentHandler = null; |
| 470 | |
| 471 | try { |
| 472 | $contentHandler = $contentHandlerFactory->getContentHandler( $contentModel ); |
| 473 | |
| 474 | } catch ( MWUnknownContentModelException ) { |
| 475 | // A content model should not be removed, as this would cause old revisions |
| 476 | // to fail to render. If this does happen, let dumps keep going but warn. |
| 477 | // To stop these warnings, register a fallback content model like so: |
| 478 | // $wgContentHandlers['Your.Removed.Handler'] = 'FallbackContentHandler' |
| 479 | MWDebug::warning( 'Revision ' . $slot->getRevision() . ' is using an unknown ' |
| 480 | . ' content model, falling back to FallbackContentHandler.' ); |
| 481 | $contentModel = CONTENT_MODEL_UNKNOWN; |
| 482 | $contentHandler = $contentHandlerFactory->getContentHandler( $contentModel ); |
| 483 | } |
| 484 | $contentFormat = $contentHandler->getDefaultFormat(); |
| 485 | |
| 486 | // XXX: The content format is only relevant when actually outputting serialized content. |
| 487 | // It should probably be an attribute on the text tag. |
| 488 | $out .= $indent . Xml::element( 'model', null, strval( $contentModel ) ) . "\n"; |
| 489 | $out .= $indent . Xml::element( 'format', null, strval( $contentFormat ) ) . "\n"; |
| 490 | |
| 491 | $textAttributes = [ |
| 492 | 'bytes' => $this->invokeLenient( |
| 493 | static function () use ( $slot ) { |
| 494 | return $slot->getSize(); |
| 495 | }, |
| 496 | 'failed to determine size for slot ' . $slot->getRole() . ' of revision ' |
| 497 | . $slot->getRevision() |
| 498 | ) ?: '0' |
| 499 | ]; |
| 500 | |
| 501 | if ( $isV11 ) { |
| 502 | $textAttributes['sha1'] = $this->invokeLenient( |
| 503 | static function () use ( $slot ) { |
| 504 | return $slot->getSha1(); |
| 505 | }, |
| 506 | 'failed to determine sha1 for slot ' . $slot->getRole() . ' of revision ' |
| 507 | . $slot->getRevision() |
| 508 | ) ?: ''; |
| 509 | } |
| 510 | |
| 511 | if ( $contentMode === self::WRITE_CONTENT ) { |
| 512 | $content = $this->invokeLenient( |
| 513 | static function () use ( $slot ) { |
| 514 | return $slot->getContent(); |
| 515 | }, |
| 516 | 'failed to load content for slot ' . $slot->getRole() . ' of revision ' |
| 517 | . $slot->getRevision() |
| 518 | ); |
| 519 | |
| 520 | if ( $content === null ) { |
| 521 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
| 522 | } else { |
| 523 | $out .= $this->writeText( $content, $textAttributes, $indent, $contentHandler, $contentFormat ); |
| 524 | } |
| 525 | } elseif ( $contentMode === self::WRITE_STUB_DELETED ) { |
| 526 | // write <text> placeholder tag |
| 527 | $textAttributes['deleted'] = 'deleted'; |
| 528 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
| 529 | } else { |
| 530 | // write <text> stub tag |
| 531 | if ( $isV11 ) { |
| 532 | $textAttributes['location'] = $slot->getAddress(); |
| 533 | } |
| 534 | $schema = null; |
| 535 | |
| 536 | if ( $isMain ) { |
| 537 | // Output the numerical text ID if possible, for backwards compatibility. |
| 538 | // Note that this is currently the ONLY reason we have a BlobStore here at all. |
| 539 | // When removing this line, check whether the BlobStore has become unused. |
| 540 | try { |
| 541 | // NOTE: this will only work for addresses of the form "tt:12345" or "es:DB://cluster1/1234". |
| 542 | // If we want to support other kinds of addresses in the future, |
| 543 | // we will have to silently ignore failures here. |
| 544 | // For now, this fails for "tt:0", which is present in the WMF production |
| 545 | // database as of July 2019, due to data corruption. |
| 546 | [ $schema, $textId ] = $this->getBlobStore()->splitBlobAddress( $slot->getAddress() ); |
| 547 | } catch ( InvalidArgumentException $ex ) { |
| 548 | MWDebug::warning( 'Bad content address for slot ' . $slot->getRole() |
| 549 | . ' of revision ' . $slot->getRevision() . ': ' . $ex->getMessage() ); |
| 550 | $textId = 0; |
| 551 | } |
| 552 | |
| 553 | if ( $schema === 'tt' ) { |
| 554 | $textAttributes['id'] = $textId; |
| 555 | } elseif ( $schema === 'es' ) { |
| 556 | $textAttributes['id'] = bin2hex( $textId ); |
| 557 | } |
| 558 | } |
| 559 | |
| 560 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
| 561 | } |
| 562 | |
| 563 | if ( !$isMain ) { |
| 564 | $out .= ' ' . Xml::closeElement( 'content' ) . "\n"; |
| 565 | } |
| 566 | |
| 567 | return $out; |
| 568 | } |
| 569 | |
| 570 | /** |
| 571 | * @param Content $content |
| 572 | * @param string[] $textAttributes |
| 573 | * @param string $indent |
| 574 | * @param ContentHandler $contentHandler Content handler for the content to be written. The content model may be |
| 575 | * invalid due to a removed extension and cause an exception (T415128), so we need to gracefully fall back to |
| 576 | * CONTENT_MODEL_UNKNOWN for invalid content models. Since the caller already does that, we simply let it pass |
| 577 | * the content handler as an argument. |
| 578 | * @param string $contentFormat Passed from the caller function just like the content handler. |
| 579 | * |
| 580 | * @return string |
| 581 | */ |
| 582 | private function writeText( |
| 583 | Content $content, |
| 584 | $textAttributes, |
| 585 | string $indent, |
| 586 | ContentHandler $contentHandler, |
| 587 | string $contentFormat |
| 588 | ) { |
| 589 | if ( $content instanceof TextContent ) { |
| 590 | // HACK: For text based models, bypass the serialization step. This allows extensions (like Flow) |
| 591 | // that use incompatible combinations of serialization format and content model. |
| 592 | $data = $content->getText(); |
| 593 | } else { |
| 594 | $data = $content->serialize( $contentFormat ); |
| 595 | } |
| 596 | |
| 597 | $data = $contentHandler->exportTransform( $data, $contentFormat ); |
| 598 | // make sure to use the actual size |
| 599 | $textAttributes['bytes'] = strlen( $data ); |
| 600 | $textAttributes['xml:space'] = 'preserve'; |
| 601 | return $indent . Xml::elementClean( 'text', $textAttributes, strval( $data ) ) . "\n"; |
| 602 | } |
| 603 | |
| 604 | /** |
| 605 | * Dumps a "<logitem>" section on the output stream, with |
| 606 | * data filled in from the given database row. |
| 607 | * |
| 608 | * @param \stdClass $row |
| 609 | * @return string |
| 610 | */ |
| 611 | public function writeLogItem( $row ) { |
| 612 | $out = " <logitem>\n"; |
| 613 | $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n"; |
| 614 | |
| 615 | $out .= $this->writeTimestamp( $row->log_timestamp, " " ); |
| 616 | |
| 617 | if ( $row->log_deleted & LogPage::DELETED_USER ) { |
| 618 | $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
| 619 | } else { |
| 620 | $out .= $this->writeContributor( $row->actor_user, $row->actor_name, " " ); |
| 621 | } |
| 622 | |
| 623 | if ( $row->log_deleted & LogPage::DELETED_COMMENT ) { |
| 624 | $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n"; |
| 625 | } else { |
| 626 | $comment = $this->commentStore->getComment( 'log_comment', $row )->text; |
| 627 | if ( $comment != '' ) { |
| 628 | $out .= " " . Xml::elementClean( 'comment', null, strval( $comment ) ) . "\n"; |
| 629 | } |
| 630 | } |
| 631 | |
| 632 | $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n"; |
| 633 | $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n"; |
| 634 | |
| 635 | if ( $row->log_deleted & LogPage::DELETED_ACTION ) { |
| 636 | $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n"; |
| 637 | } else { |
| 638 | $title = Title::makeTitle( $row->log_namespace, $row->log_title ); |
| 639 | $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n"; |
| 640 | $out .= " " . Xml::elementClean( 'params', |
| 641 | [ 'xml:space' => 'preserve' ], |
| 642 | strval( $row->log_params ) ) . "\n"; |
| 643 | } |
| 644 | |
| 645 | $out .= " </logitem>\n"; |
| 646 | |
| 647 | return $out; |
| 648 | } |
| 649 | |
| 650 | /** |
| 651 | * @param string $timestamp |
| 652 | * @param string $indent Default to six spaces |
| 653 | * @return string |
| 654 | */ |
| 655 | public function writeTimestamp( $timestamp, $indent = " " ) { |
| 656 | $ts = wfTimestamp( TS::ISO_8601, $timestamp ); |
| 657 | return $indent . Xml::element( 'timestamp', null, $ts ) . "\n"; |
| 658 | } |
| 659 | |
| 660 | /** |
| 661 | * @param int $id |
| 662 | * @param string $text |
| 663 | * @param string $indent Default to six spaces |
| 664 | * @return string |
| 665 | */ |
| 666 | public function writeContributor( $id, $text, $indent = " " ) { |
| 667 | $out = $indent . "<contributor>\n"; |
| 668 | if ( $id || !IPUtils::isValid( $text ) ) { |
| 669 | $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n"; |
| 670 | $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n"; |
| 671 | } else { |
| 672 | $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n"; |
| 673 | } |
| 674 | $out .= $indent . "</contributor>\n"; |
| 675 | return $out; |
| 676 | } |
| 677 | |
| 678 | /** |
| 679 | * Warning! This data is potentially inconsistent. :( |
| 680 | * @param \stdClass $row |
| 681 | * @param bool $dumpContents |
| 682 | * @return string |
| 683 | */ |
| 684 | public function writeUploads( $row, $dumpContents = false ) { |
| 685 | if ( $row->page_namespace == NS_FILE ) { |
| 686 | $img = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo() |
| 687 | ->newFile( $row->page_title ); |
| 688 | if ( $img && $img->exists() ) { |
| 689 | $out = ''; |
| 690 | foreach ( array_reverse( $img->getHistory() ) as $ver ) { |
| 691 | $out .= $this->writeUpload( $ver, $dumpContents ); |
| 692 | } |
| 693 | $out .= $this->writeUpload( $img, $dumpContents ); |
| 694 | return $out; |
| 695 | } |
| 696 | } |
| 697 | return ''; |
| 698 | } |
| 699 | |
| 700 | /** |
| 701 | * @param File $file |
| 702 | * @param bool $dumpContents |
| 703 | * @return string |
| 704 | */ |
| 705 | private function writeUpload( $file, $dumpContents = false ) { |
| 706 | if ( $file->isOld() ) { |
| 707 | /** @var OldLocalFile $file */ |
| 708 | '@phan-var OldLocalFile $file'; |
| 709 | $archiveName = " " . |
| 710 | Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n"; |
| 711 | } else { |
| 712 | $archiveName = ''; |
| 713 | } |
| 714 | if ( $dumpContents ) { |
| 715 | $be = $file->getRepo()->getBackend(); |
| 716 | # Dump file as base64 |
| 717 | # Uses only XML-safe characters, so does not need escaping |
| 718 | # @todo Too bad this loads the contents into memory (script might swap) |
| 719 | $contents = ' <contents encoding="base64">' . |
| 720 | chunk_split( base64_encode( |
| 721 | $be->getFileContents( [ 'src' => $file->getPath() ] ) ) ) . |
| 722 | " </contents>\n"; |
| 723 | } else { |
| 724 | $contents = ''; |
| 725 | } |
| 726 | $uploader = $file->getUploader( File::FOR_PUBLIC ); |
| 727 | if ( $uploader ) { |
| 728 | $uploader = $this->writeContributor( $uploader->getId(), $uploader->getName() ); |
| 729 | } else { |
| 730 | $uploader = Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
| 731 | } |
| 732 | $comment = $file->getDescription( File::FOR_PUBLIC ); |
| 733 | if ( ( $comment ?? '' ) !== '' ) { |
| 734 | $comment = Xml::elementClean( 'comment', null, $comment ); |
| 735 | } else { |
| 736 | $comment = Xml::element( 'comment', [ 'deleted' => 'deleted' ] ); |
| 737 | } |
| 738 | return " <upload>\n" . |
| 739 | $this->writeTimestamp( $file->getTimestamp() ) . |
| 740 | $uploader . |
| 741 | " " . $comment . "\n" . |
| 742 | " " . Xml::element( 'filename', null, $file->getName() ) . "\n" . |
| 743 | $archiveName . |
| 744 | " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" . |
| 745 | " " . Xml::element( 'size', null, (string)( $file->getSize() ?: 0 ) ) . "\n" . |
| 746 | " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" . |
| 747 | " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" . |
| 748 | $contents . |
| 749 | " </upload>\n"; |
| 750 | } |
| 751 | |
| 752 | /** |
| 753 | * Return prefixed text form of title, but using the content language's |
| 754 | * canonical namespace. This skips any special-casing such as gendered |
| 755 | * user namespaces -- which while useful, are not yet listed in the |
| 756 | * XML "<siteinfo>" data so are unsafe in export. |
| 757 | * |
| 758 | * @param Title $title |
| 759 | * @return string |
| 760 | * @since 1.18 |
| 761 | */ |
| 762 | public static function canonicalTitle( Title $title ) { |
| 763 | if ( $title->isExternal() ) { |
| 764 | return $title->getPrefixedText(); |
| 765 | } |
| 766 | |
| 767 | $prefix = MediaWikiServices::getInstance()->getContentLanguage()-> |
| 768 | getFormattedNsText( $title->getNamespace() ); |
| 769 | |
| 770 | // @todo Emit some kind of warning to the user if $title->getNamespace() !== |
| 771 | // NS_MAIN and $prefix === '' (viz. pages in an unregistered namespace) |
| 772 | |
| 773 | if ( $prefix !== '' ) { |
| 774 | $prefix .= ':'; |
| 775 | } |
| 776 | |
| 777 | return $prefix . $title->getText(); |
| 778 | } |
| 779 | } |
| 780 | |
| 781 | /** @deprecated class alias since 1.46 */ |
| 782 | class_alias( XmlDumpWriter::class, 'XmlDumpWriter' ); |