Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
2.59% |
8 / 309 |
|
0.00% |
0 / 24 |
CRAP | |
0.00% |
0 / 1 |
XmlDumpWriter | |
2.59% |
8 / 309 |
|
0.00% |
0 / 24 |
5995.67 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
2 | |||
openStream | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
2 | |||
siteInfo | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
sitename | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
dbname | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
generator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
homelink | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
caseSetting | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
namespaces | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 | |||
closeStream | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
openPage | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
20 | |||
closePage | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getRevisionStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getBlobStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
invokeLenient | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
writeRevision | |
0.00% |
0 / 56 |
|
0.00% |
0 / 1 |
210 | |||
writeSlot | |
0.00% |
0 / 67 |
|
0.00% |
0 / 1 |
306 | |||
writeText | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
2.01 | |||
writeLogItem | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
30 | |||
writeTimestamp | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
writeContributor | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
writeUploads | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
writeUpload | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
42 | |||
canonicalTitle | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | /** |
3 | * XmlDumpWriter |
4 | * |
5 | * Copyright © 2003, 2005, 2006 Brooke Vibber <bvibber@wikimedia.org> |
6 | * https://www.mediawiki.org/ |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License along |
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | * http://www.gnu.org/copyleft/gpl.html |
22 | * |
23 | * @file |
24 | */ |
25 | |
26 | use MediaWiki\CommentStore\CommentStore; |
27 | use MediaWiki\Content\TextContent; |
28 | use MediaWiki\Debug\MWDebug; |
29 | use MediaWiki\HookContainer\HookContainer; |
30 | use MediaWiki\HookContainer\HookRunner; |
31 | use MediaWiki\MainConfigNames; |
32 | use MediaWiki\MediaWikiServices; |
33 | use MediaWiki\Revision\RevisionAccessException; |
34 | use MediaWiki\Revision\RevisionRecord; |
35 | use MediaWiki\Revision\RevisionStore; |
36 | use MediaWiki\Revision\SlotRecord; |
37 | use MediaWiki\Revision\SuppressedDataException; |
38 | use MediaWiki\Storage\SqlBlobStore; |
39 | use MediaWiki\Title\Title; |
40 | use MediaWiki\Xml\Xml; |
41 | use Wikimedia\Assert\Assert; |
42 | use Wikimedia\IPUtils; |
43 | |
44 | /** |
45 | * @ingroup Dump |
46 | */ |
47 | class XmlDumpWriter { |
48 | |
49 | /** Output serialized revision content. */ |
50 | public const WRITE_CONTENT = 0; |
51 | |
52 | /** Only output subs for revision content. */ |
53 | public const WRITE_STUB = 1; |
54 | |
55 | /** |
56 | * Only output subs for revision content, indicating that the content has been |
57 | * deleted/suppressed. |
58 | */ |
59 | private const WRITE_STUB_DELETED = 2; |
60 | |
61 | /** |
62 | * @var string[] the schema versions supported for output |
63 | * @final |
64 | */ |
65 | public static $supportedSchemas = [ |
66 | XML_DUMP_SCHEMA_VERSION_10, |
67 | XML_DUMP_SCHEMA_VERSION_11 |
68 | ]; |
69 | |
70 | /** |
71 | * @var string which schema version the generated XML should comply to. |
72 | * One of the values from self::$supportedSchemas, using the SCHEMA_VERSION_XX |
73 | * constants. |
74 | */ |
75 | private $schemaVersion; |
76 | |
77 | /** |
78 | * Title of the currently processed page |
79 | * |
80 | * @var Title|null |
81 | */ |
82 | private $currentTitle = null; |
83 | |
84 | /** |
85 | * @var int Whether to output revision content or just stubs. WRITE_CONTENT or WRITE_STUB. |
86 | */ |
87 | private $contentMode; |
88 | |
89 | /** @var HookRunner */ |
90 | private $hookRunner; |
91 | |
92 | /** @var CommentStore */ |
93 | private $commentStore; |
94 | |
95 | /** |
96 | * @param int $contentMode WRITE_CONTENT or WRITE_STUB. |
97 | * @param string $schemaVersion which schema version the generated XML should comply to. |
98 | * One of the values from self::$supportedSchemas, using the XML_DUMP_SCHEMA_VERSION_XX |
99 | * constants. |
100 | * @param HookContainer|null $hookContainer |
101 | * @param CommentStore|null $commentStore |
102 | */ |
103 | public function __construct( |
104 | $contentMode = self::WRITE_CONTENT, |
105 | $schemaVersion = XML_DUMP_SCHEMA_VERSION_11, |
106 | ?HookContainer $hookContainer = null, |
107 | ?CommentStore $commentStore = null |
108 | ) { |
109 | Assert::parameter( |
110 | in_array( $contentMode, [ self::WRITE_CONTENT, self::WRITE_STUB ], true ), |
111 | '$contentMode', |
112 | 'must be one of the following constants: WRITE_CONTENT or WRITE_STUB.' |
113 | ); |
114 | |
115 | Assert::parameter( |
116 | in_array( $schemaVersion, self::$supportedSchemas, true ), |
117 | '$schemaVersion', |
118 | 'must be one of the following schema versions: ' |
119 | . implode( ',', self::$supportedSchemas ) |
120 | ); |
121 | |
122 | $this->contentMode = $contentMode; |
123 | $this->schemaVersion = $schemaVersion; |
124 | $this->hookRunner = new HookRunner( |
125 | $hookContainer ?? MediaWikiServices::getInstance()->getHookContainer() |
126 | ); |
127 | $this->commentStore = $commentStore ?? MediaWikiServices::getInstance()->getCommentStore(); |
128 | } |
129 | |
130 | /** |
131 | * Opens the XML output stream's root "<mediawiki>" element. |
132 | * This does not include an xml directive, so is safe to include |
133 | * as a subelement in a larger XML stream. Namespace and XML Schema |
134 | * references are included. |
135 | * |
136 | * Output will be encoded in UTF-8. |
137 | * |
138 | * @return string |
139 | */ |
140 | public function openStream() { |
141 | $ver = $this->schemaVersion; |
142 | return Xml::element( 'mediawiki', [ |
143 | 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", |
144 | 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", |
145 | /* |
146 | * When a new version of the schema is created, it needs staging on mediawiki.org. |
147 | * This requires a change in the operations/mediawiki-config git repo. |
148 | * |
149 | * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which |
150 | * you copy in the new xsd file. |
151 | * |
152 | * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging. |
153 | * echo "https://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki |
154 | */ |
155 | 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . |
156 | "http://www.mediawiki.org/xml/export-$ver.xsd", |
157 | 'version' => $ver, |
158 | 'xml:lang' => MediaWikiServices::getInstance()->getContentLanguage()->getHtmlCode() ], |
159 | null ) . |
160 | "\n" . |
161 | $this->siteInfo(); |
162 | } |
163 | |
164 | /** |
165 | * @return string |
166 | */ |
167 | private function siteInfo() { |
168 | $info = [ |
169 | $this->sitename(), |
170 | $this->dbname(), |
171 | $this->homelink(), |
172 | $this->generator(), |
173 | $this->caseSetting(), |
174 | $this->namespaces() ]; |
175 | return " <siteinfo>\n " . |
176 | implode( "\n ", $info ) . |
177 | "\n </siteinfo>\n"; |
178 | } |
179 | |
180 | /** |
181 | * @return string |
182 | */ |
183 | private function sitename() { |
184 | $sitename = MediaWikiServices::getInstance()->getMainConfig()->get( |
185 | MainConfigNames::Sitename ); |
186 | return Xml::element( 'sitename', [], $sitename ); |
187 | } |
188 | |
189 | /** |
190 | * @return string |
191 | */ |
192 | private function dbname() { |
193 | $dbname = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DBname ); |
194 | return Xml::element( 'dbname', [], $dbname ); |
195 | } |
196 | |
197 | /** |
198 | * @return string |
199 | */ |
200 | private function generator() { |
201 | return Xml::element( 'generator', [], 'MediaWiki ' . MW_VERSION ); |
202 | } |
203 | |
204 | /** |
205 | * @return string |
206 | */ |
207 | private function homelink() { |
208 | return Xml::element( 'base', [], Title::newMainPage()->getCanonicalURL() ); |
209 | } |
210 | |
211 | /** |
212 | * @return string |
213 | */ |
214 | private function caseSetting() { |
215 | $capitalLinks = MediaWikiServices::getInstance()->getMainConfig()->get( |
216 | MainConfigNames::CapitalLinks ); |
217 | // "case-insensitive" option is reserved for future |
218 | $sensitivity = $capitalLinks ? 'first-letter' : 'case-sensitive'; |
219 | return Xml::element( 'case', [], $sensitivity ); |
220 | } |
221 | |
222 | /** |
223 | * @return string |
224 | */ |
225 | private function namespaces() { |
226 | $spaces = "<namespaces>\n"; |
227 | $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo(); |
228 | foreach ( |
229 | MediaWikiServices::getInstance()->getContentLanguage()->getFormattedNamespaces() |
230 | as $ns => $title |
231 | ) { |
232 | $spaces .= ' ' . |
233 | Xml::element( 'namespace', |
234 | [ |
235 | 'key' => $ns, |
236 | 'case' => $nsInfo->isCapitalized( $ns ) |
237 | ? 'first-letter' : 'case-sensitive', |
238 | ], $title ) . "\n"; |
239 | } |
240 | $spaces .= " </namespaces>"; |
241 | return $spaces; |
242 | } |
243 | |
244 | /** |
245 | * Closes the output stream with the closing root element. |
246 | * Call when finished dumping things. |
247 | * |
248 | * @return string |
249 | */ |
250 | public function closeStream() { |
251 | return "</mediawiki>\n"; |
252 | } |
253 | |
254 | /** |
255 | * Opens a "<page>" section on the output stream, with data |
256 | * from the given database row. |
257 | * |
258 | * @param stdClass $row |
259 | * @return string |
260 | */ |
261 | public function openPage( $row ) { |
262 | $out = " <page>\n"; |
263 | $this->currentTitle = Title::newFromRow( $row ); |
264 | $canonicalTitle = self::canonicalTitle( $this->currentTitle ); |
265 | $out .= ' ' . Xml::elementClean( 'title', [], $canonicalTitle ) . "\n"; |
266 | $out .= ' ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n"; |
267 | $out .= ' ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n"; |
268 | if ( $row->page_is_redirect ) { |
269 | $services = MediaWikiServices::getInstance(); |
270 | $page = $services->getWikiPageFactory()->newFromTitle( $this->currentTitle ); |
271 | $redirectStore = $services->getRedirectStore(); |
272 | $redirect = $this->invokeLenient( |
273 | static function () use ( $page, $redirectStore ) { |
274 | return $redirectStore->getRedirectTarget( $page ); |
275 | }, |
276 | 'Failed to get redirect target of page ' . $page->getId() |
277 | ); |
278 | $redirect = Title::castFromLinkTarget( $redirect ); |
279 | if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) { |
280 | $out .= ' '; |
281 | $out .= Xml::element( 'redirect', [ 'title' => self::canonicalTitle( $redirect ) ] ); |
282 | $out .= "\n"; |
283 | } |
284 | } |
285 | $this->hookRunner->onXmlDumpWriterOpenPage( $this, $out, $row, $this->currentTitle ); |
286 | |
287 | return $out; |
288 | } |
289 | |
290 | /** |
291 | * Closes a "<page>" section on the output stream. |
292 | * |
293 | * @internal |
294 | * @return string |
295 | */ |
296 | public function closePage() { |
297 | if ( $this->currentTitle !== null ) { |
298 | $linkCache = MediaWikiServices::getInstance()->getLinkCache(); |
299 | // In rare cases, link cache has the same key for some pages which |
300 | // might be read as part of the same batch. T220424 and T220316 |
301 | $linkCache->clearLink( $this->currentTitle ); |
302 | } |
303 | return " </page>\n"; |
304 | } |
305 | |
306 | /** |
307 | * @return RevisionStore |
308 | */ |
309 | private function getRevisionStore() { |
310 | return MediaWikiServices::getInstance()->getRevisionStore(); |
311 | } |
312 | |
313 | /** |
314 | * @return SqlBlobStore |
315 | */ |
316 | private function getBlobStore() { |
317 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
318 | return MediaWikiServices::getInstance()->getBlobStore(); |
319 | } |
320 | |
321 | /** |
322 | * Invokes the given callback, catching and logging any exceptions. |
323 | * |
324 | * @param callable $callback |
325 | * @param string $warning The warning to output in case of a storage related exception. |
326 | * |
327 | * @return mixed Returns the method's return value, or null in case of an exception. |
328 | * @throws Exception |
329 | */ |
330 | private function invokeLenient( $callback, $warning ) { |
331 | try { |
332 | return $callback(); |
333 | } catch ( SuppressedDataException $ex ) { |
334 | return null; |
335 | } catch ( MWException | RuntimeException | InvalidArgumentException | ErrorException $ex ) { |
336 | MWDebug::warning( $warning . ': ' . $ex->getMessage() ); |
337 | return null; |
338 | } |
339 | } |
340 | |
341 | /** |
342 | * Dumps a "<revision>" section on the output stream, with |
343 | * data filled in from the given database row. |
344 | * |
345 | * @param stdClass $row |
346 | * @param null|stdClass[] $slotRows |
347 | * |
348 | * @return string |
349 | * @throws RevisionAccessException |
350 | */ |
351 | public function writeRevision( $row, $slotRows = null ) { |
352 | $rev = $this->getRevisionStore()->newRevisionFromRowAndSlots( |
353 | $row, |
354 | $slotRows, |
355 | 0, |
356 | $this->currentTitle |
357 | ); |
358 | |
359 | $out = " <revision>\n"; |
360 | $out .= " " . Xml::element( 'id', null, strval( $rev->getId() ) ) . "\n"; |
361 | |
362 | if ( $rev->getParentId() ) { |
363 | $out .= " " . Xml::element( 'parentid', null, strval( $rev->getParentId() ) ) . "\n"; |
364 | } |
365 | |
366 | $out .= $this->writeTimestamp( $rev->getTimestamp() ); |
367 | |
368 | if ( $rev->isDeleted( RevisionRecord::DELETED_USER ) ) { |
369 | $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
370 | } else { |
371 | // empty values get written out as uid 0, see T224221 |
372 | $user = $rev->getUser(); |
373 | $out .= $this->writeContributor( |
374 | $user ? $user->getId() : 0, |
375 | $user ? $user->getName() : '' |
376 | ); |
377 | } |
378 | |
379 | if ( $rev->isMinor() ) { |
380 | $out .= " <minor/>\n"; |
381 | } |
382 | if ( $rev->isDeleted( RevisionRecord::DELETED_COMMENT ) ) { |
383 | $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n"; |
384 | } else { |
385 | if ( $rev->getComment()->text != '' ) { |
386 | $out .= " " |
387 | . Xml::elementClean( 'comment', [], strval( $rev->getComment()->text ) ) |
388 | . "\n"; |
389 | } |
390 | } |
391 | |
392 | $contentMode = $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ? self::WRITE_STUB_DELETED |
393 | : $this->contentMode; |
394 | |
395 | $slots = $rev->getSlots()->getSlots(); |
396 | |
397 | // use predictable order, put main slot first |
398 | ksort( $slots ); |
399 | $out .= $this->writeSlot( $slots[SlotRecord::MAIN], $contentMode ); |
400 | |
401 | foreach ( $slots as $role => $slot ) { |
402 | if ( $role === SlotRecord::MAIN ) { |
403 | continue; |
404 | } |
405 | $out .= $this->writeSlot( $slot, $contentMode ); |
406 | } |
407 | |
408 | if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) { |
409 | $out .= " <sha1/>\n"; |
410 | } else { |
411 | $sha1 = $this->invokeLenient( |
412 | static function () use ( $rev ) { |
413 | return $rev->getSha1(); |
414 | }, |
415 | 'failed to determine sha1 for revision ' . $rev->getId() |
416 | ); |
417 | $out .= " " . Xml::element( 'sha1', null, strval( $sha1 ) ) . "\n"; |
418 | } |
419 | |
420 | $text = ''; |
421 | if ( $contentMode === self::WRITE_CONTENT ) { |
422 | /** @var Content $content */ |
423 | $content = $this->invokeLenient( |
424 | static function () use ( $rev ) { |
425 | return $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW ); |
426 | }, |
427 | 'Failed to load main slot content of revision ' . $rev->getId() |
428 | ); |
429 | |
430 | $text = $content ? $content->serialize() : ''; |
431 | } |
432 | $this->hookRunner->onXmlDumpWriterWriteRevision( $this, $out, $row, $text, $rev ); |
433 | |
434 | $out .= " </revision>\n"; |
435 | |
436 | return $out; |
437 | } |
438 | |
439 | /** |
440 | * @param SlotRecord $slot |
441 | * @param int $contentMode see the WRITE_XXX constants |
442 | * |
443 | * @return string |
444 | */ |
445 | private function writeSlot( SlotRecord $slot, $contentMode ) { |
446 | $isMain = $slot->getRole() === SlotRecord::MAIN; |
447 | $isV11 = $this->schemaVersion >= XML_DUMP_SCHEMA_VERSION_11; |
448 | |
449 | if ( !$isV11 && !$isMain ) { |
450 | // ignore extra slots |
451 | return ''; |
452 | } |
453 | |
454 | $out = ''; |
455 | $indent = ' '; |
456 | |
457 | if ( !$isMain ) { |
458 | // non-main slots are wrapped into an additional element. |
459 | $out .= ' ' . Xml::openElement( 'content' ) . "\n"; |
460 | $indent .= ' '; |
461 | $out .= $indent . Xml::element( 'role', null, strval( $slot->getRole() ) ) . "\n"; |
462 | } |
463 | |
464 | if ( $isV11 ) { |
465 | $out .= $indent . Xml::element( 'origin', null, strval( $slot->getOrigin() ) ) . "\n"; |
466 | } |
467 | |
468 | $contentModel = $slot->getModel(); |
469 | $contentHandler = MediaWikiServices::getInstance() |
470 | ->getContentHandlerFactory() |
471 | ->getContentHandler( $contentModel ); |
472 | $contentFormat = $contentHandler->getDefaultFormat(); |
473 | |
474 | // XXX: The content format is only relevant when actually outputting serialized content. |
475 | // It should probably be an attribute on the text tag. |
476 | $out .= $indent . Xml::element( 'model', null, strval( $contentModel ) ) . "\n"; |
477 | $out .= $indent . Xml::element( 'format', null, strval( $contentFormat ) ) . "\n"; |
478 | |
479 | $textAttributes = [ |
480 | 'bytes' => $this->invokeLenient( |
481 | static function () use ( $slot ) { |
482 | return $slot->getSize(); |
483 | }, |
484 | 'failed to determine size for slot ' . $slot->getRole() . ' of revision ' |
485 | . $slot->getRevision() |
486 | ) ?: '0' |
487 | ]; |
488 | |
489 | if ( $isV11 ) { |
490 | $textAttributes['sha1'] = $this->invokeLenient( |
491 | static function () use ( $slot ) { |
492 | return $slot->getSha1(); |
493 | }, |
494 | 'failed to determine sha1 for slot ' . $slot->getRole() . ' of revision ' |
495 | . $slot->getRevision() |
496 | ) ?: ''; |
497 | } |
498 | |
499 | if ( $contentMode === self::WRITE_CONTENT ) { |
500 | $content = $this->invokeLenient( |
501 | static function () use ( $slot ) { |
502 | return $slot->getContent(); |
503 | }, |
504 | 'failed to load content for slot ' . $slot->getRole() . ' of revision ' |
505 | . $slot->getRevision() |
506 | ); |
507 | |
508 | if ( $content === null ) { |
509 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
510 | } else { |
511 | $out .= $this->writeText( $content, $textAttributes, $indent ); |
512 | } |
513 | } elseif ( $contentMode === self::WRITE_STUB_DELETED ) { |
514 | // write <text> placeholder tag |
515 | $textAttributes['deleted'] = 'deleted'; |
516 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
517 | } else { |
518 | // write <text> stub tag |
519 | if ( $isV11 ) { |
520 | $textAttributes['location'] = $slot->getAddress(); |
521 | } |
522 | $schema = null; |
523 | |
524 | if ( $isMain ) { |
525 | // Output the numerical text ID if possible, for backwards compatibility. |
526 | // Note that this is currently the ONLY reason we have a BlobStore here at all. |
527 | // When removing this line, check whether the BlobStore has become unused. |
528 | try { |
529 | // NOTE: this will only work for addresses of the form "tt:12345" or "es:DB://cluster1/1234". |
530 | // If we want to support other kinds of addresses in the future, |
531 | // we will have to silently ignore failures here. |
532 | // For now, this fails for "tt:0", which is present in the WMF production |
533 | // database as of July 2019, due to data corruption. |
534 | [ $schema, $textId ] = $this->getBlobStore()->splitBlobAddress( $slot->getAddress() ); |
535 | } catch ( InvalidArgumentException $ex ) { |
536 | MWDebug::warning( 'Bad content address for slot ' . $slot->getRole() |
537 | . ' of revision ' . $slot->getRevision() . ': ' . $ex->getMessage() ); |
538 | $textId = 0; |
539 | } |
540 | |
541 | if ( $schema === 'tt' ) { |
542 | $textAttributes['id'] = $textId; |
543 | } elseif ( $schema === 'es' ) { |
544 | $textAttributes['id'] = bin2hex( $textId ); |
545 | } |
546 | } |
547 | |
548 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
549 | } |
550 | |
551 | if ( !$isMain ) { |
552 | $out .= ' ' . Xml::closeElement( 'content' ) . "\n"; |
553 | } |
554 | |
555 | return $out; |
556 | } |
557 | |
558 | /** |
559 | * @param Content $content |
560 | * @param string[] $textAttributes |
561 | * @param string $indent |
562 | * |
563 | * @return string |
564 | */ |
565 | private function writeText( Content $content, $textAttributes, $indent ) { |
566 | $contentHandler = $content->getContentHandler(); |
567 | $contentFormat = $contentHandler->getDefaultFormat(); |
568 | |
569 | if ( $content instanceof TextContent ) { |
570 | // HACK: For text based models, bypass the serialization step. This allows extensions (like Flow) |
571 | // that use incompatible combinations of serialization format and content model. |
572 | $data = $content->getText(); |
573 | } else { |
574 | $data = $content->serialize( $contentFormat ); |
575 | } |
576 | |
577 | $data = $contentHandler->exportTransform( $data, $contentFormat ); |
578 | // make sure to use the actual size |
579 | $textAttributes['bytes'] = strlen( $data ); |
580 | $textAttributes['xml:space'] = 'preserve'; |
581 | return $indent . Xml::elementClean( 'text', $textAttributes, strval( $data ) ) . "\n"; |
582 | } |
583 | |
584 | /** |
585 | * Dumps a "<logitem>" section on the output stream, with |
586 | * data filled in from the given database row. |
587 | * |
588 | * @param stdClass $row |
589 | * @return string |
590 | */ |
591 | public function writeLogItem( $row ) { |
592 | $out = " <logitem>\n"; |
593 | $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n"; |
594 | |
595 | $out .= $this->writeTimestamp( $row->log_timestamp, " " ); |
596 | |
597 | if ( $row->log_deleted & LogPage::DELETED_USER ) { |
598 | $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
599 | } else { |
600 | $out .= $this->writeContributor( $row->actor_user, $row->actor_name, " " ); |
601 | } |
602 | |
603 | if ( $row->log_deleted & LogPage::DELETED_COMMENT ) { |
604 | $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n"; |
605 | } else { |
606 | $comment = $this->commentStore->getComment( 'log_comment', $row )->text; |
607 | if ( $comment != '' ) { |
608 | $out .= " " . Xml::elementClean( 'comment', null, strval( $comment ) ) . "\n"; |
609 | } |
610 | } |
611 | |
612 | $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n"; |
613 | $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n"; |
614 | |
615 | if ( $row->log_deleted & LogPage::DELETED_ACTION ) { |
616 | $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n"; |
617 | } else { |
618 | $title = Title::makeTitle( $row->log_namespace, $row->log_title ); |
619 | $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n"; |
620 | $out .= " " . Xml::elementClean( 'params', |
621 | [ 'xml:space' => 'preserve' ], |
622 | strval( $row->log_params ) ) . "\n"; |
623 | } |
624 | |
625 | $out .= " </logitem>\n"; |
626 | |
627 | return $out; |
628 | } |
629 | |
630 | /** |
631 | * @param string $timestamp |
632 | * @param string $indent Default to six spaces |
633 | * @return string |
634 | */ |
635 | public function writeTimestamp( $timestamp, $indent = " " ) { |
636 | $ts = wfTimestamp( TS_ISO_8601, $timestamp ); |
637 | return $indent . Xml::element( 'timestamp', null, $ts ) . "\n"; |
638 | } |
639 | |
640 | /** |
641 | * @param int $id |
642 | * @param string $text |
643 | * @param string $indent Default to six spaces |
644 | * @return string |
645 | */ |
646 | public function writeContributor( $id, $text, $indent = " " ) { |
647 | $out = $indent . "<contributor>\n"; |
648 | if ( $id || !IPUtils::isValid( $text ) ) { |
649 | $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n"; |
650 | $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n"; |
651 | } else { |
652 | $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n"; |
653 | } |
654 | $out .= $indent . "</contributor>\n"; |
655 | return $out; |
656 | } |
657 | |
658 | /** |
659 | * Warning! This data is potentially inconsistent. :( |
660 | * @param stdClass $row |
661 | * @param bool $dumpContents |
662 | * @return string |
663 | */ |
664 | public function writeUploads( $row, $dumpContents = false ) { |
665 | if ( $row->page_namespace == NS_FILE ) { |
666 | $img = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo() |
667 | ->newFile( $row->page_title ); |
668 | if ( $img && $img->exists() ) { |
669 | $out = ''; |
670 | foreach ( array_reverse( $img->getHistory() ) as $ver ) { |
671 | $out .= $this->writeUpload( $ver, $dumpContents ); |
672 | } |
673 | $out .= $this->writeUpload( $img, $dumpContents ); |
674 | return $out; |
675 | } |
676 | } |
677 | return ''; |
678 | } |
679 | |
680 | /** |
681 | * @param File $file |
682 | * @param bool $dumpContents |
683 | * @return string |
684 | */ |
685 | private function writeUpload( $file, $dumpContents = false ) { |
686 | if ( $file->isOld() ) { |
687 | /** @var OldLocalFile $file */ |
688 | '@phan-var OldLocalFile $file'; |
689 | $archiveName = " " . |
690 | Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n"; |
691 | } else { |
692 | $archiveName = ''; |
693 | } |
694 | if ( $dumpContents ) { |
695 | $be = $file->getRepo()->getBackend(); |
696 | # Dump file as base64 |
697 | # Uses only XML-safe characters, so does not need escaping |
698 | # @todo Too bad this loads the contents into memory (script might swap) |
699 | $contents = ' <contents encoding="base64">' . |
700 | chunk_split( base64_encode( |
701 | $be->getFileContents( [ 'src' => $file->getPath() ] ) ) ) . |
702 | " </contents>\n"; |
703 | } else { |
704 | $contents = ''; |
705 | } |
706 | $uploader = $file->getUploader( File::FOR_PUBLIC ); |
707 | if ( $uploader ) { |
708 | $uploader = $this->writeContributor( $uploader->getId(), $uploader->getName() ); |
709 | } else { |
710 | $uploader = Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
711 | } |
712 | $comment = $file->getDescription( File::FOR_PUBLIC ); |
713 | if ( ( $comment ?? '' ) !== '' ) { |
714 | $comment = Xml::elementClean( 'comment', null, $comment ); |
715 | } else { |
716 | $comment = Xml::element( 'comment', [ 'deleted' => 'deleted' ] ); |
717 | } |
718 | return " <upload>\n" . |
719 | $this->writeTimestamp( $file->getTimestamp() ) . |
720 | $uploader . |
721 | " " . $comment . "\n" . |
722 | " " . Xml::element( 'filename', null, $file->getName() ) . "\n" . |
723 | $archiveName . |
724 | " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" . |
725 | " " . Xml::element( 'size', null, (string)( $file->getSize() ?: 0 ) ) . "\n" . |
726 | " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" . |
727 | " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" . |
728 | $contents . |
729 | " </upload>\n"; |
730 | } |
731 | |
732 | /** |
733 | * Return prefixed text form of title, but using the content language's |
734 | * canonical namespace. This skips any special-casing such as gendered |
735 | * user namespaces -- which while useful, are not yet listed in the |
736 | * XML "<siteinfo>" data so are unsafe in export. |
737 | * |
738 | * @param Title $title |
739 | * @return string |
740 | * @since 1.18 |
741 | */ |
742 | public static function canonicalTitle( Title $title ) { |
743 | if ( $title->isExternal() ) { |
744 | return $title->getPrefixedText(); |
745 | } |
746 | |
747 | $prefix = MediaWikiServices::getInstance()->getContentLanguage()-> |
748 | getFormattedNsText( $title->getNamespace() ); |
749 | |
750 | // @todo Emit some kind of warning to the user if $title->getNamespace() !== |
751 | // NS_MAIN and $prefix === '' (viz. pages in an unregistered namespace) |
752 | |
753 | if ( $prefix !== '' ) { |
754 | $prefix .= ':'; |
755 | } |
756 | |
757 | return $prefix . $title->getText(); |
758 | } |
759 | } |