MediaWiki REL1_33
XmlDumpWriter.php
Go to the documentation of this file.
1<?php
28
37 public static $supportedSchemas = [
39 ];
40
46 private $currentTitle = null;
47
58 function openStream() {
59 $ver = WikiExporter::schemaVersion();
60 return Xml::element( 'mediawiki', [
61 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/",
62 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
63 /*
64 * When a new version of the schema is created, it needs staging on mediawiki.org.
65 * This requires a change in the operations/mediawiki-config git repo.
66 *
67 * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which
68 * you copy in the new xsd file.
69 *
70 * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging.
71 * echo "https://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki
72 */
73 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
74 "http://www.mediawiki.org/xml/export-$ver.xsd",
75 'version' => $ver,
76 'xml:lang' => MediaWikiServices::getInstance()->getContentLanguage()->getHtmlCode() ],
77 null ) .
78 "\n" .
79 $this->siteInfo();
80 }
81
85 function siteInfo() {
86 $info = [
87 $this->sitename(),
88 $this->dbname(),
89 $this->homelink(),
90 $this->generator(),
91 $this->caseSetting(),
92 $this->namespaces() ];
93 return " <siteinfo>\n " .
94 implode( "\n ", $info ) .
95 "\n </siteinfo>\n";
96 }
97
101 function sitename() {
102 global $wgSitename;
103 return Xml::element( 'sitename', [], $wgSitename );
104 }
105
109 function dbname() {
110 global $wgDBname;
111 return Xml::element( 'dbname', [], $wgDBname );
112 }
113
117 function generator() {
118 global $wgVersion;
119 return Xml::element( 'generator', [], "MediaWiki $wgVersion" );
120 }
121
125 function homelink() {
126 return Xml::element( 'base', [], Title::newMainPage()->getCanonicalURL() );
127 }
128
132 function caseSetting() {
133 global $wgCapitalLinks;
134 // "case-insensitive" option is reserved for future
135 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
136 return Xml::element( 'case', [], $sensitivity );
137 }
138
142 function namespaces() {
143 $spaces = "<namespaces>\n";
144 foreach (
145 MediaWikiServices::getInstance()->getContentLanguage()->getFormattedNamespaces()
146 as $ns => $title
147 ) {
148 $spaces .= ' ' .
149 Xml::element( 'namespace',
150 [
151 'key' => $ns,
152 'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
153 ], $title ) . "\n";
154 }
155 $spaces .= " </namespaces>";
156 return $spaces;
157 }
158
165 function closeStream() {
166 return "</mediawiki>\n";
167 }
168
176 public function openPage( $row ) {
177 $out = " <page>\n";
178 $this->currentTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
179 $canonicalTitle = self::canonicalTitle( $this->currentTitle );
180 $out .= ' ' . Xml::elementClean( 'title', [], $canonicalTitle ) . "\n";
181 $out .= ' ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n";
182 $out .= ' ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n";
183 if ( $row->page_is_redirect ) {
184 $page = WikiPage::factory( $this->currentTitle );
185 $redirect = $page->getRedirectTarget();
186 if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) {
187 $out .= ' ';
188 $out .= Xml::element( 'redirect', [ 'title' => self::canonicalTitle( $redirect ) ] );
189 $out .= "\n";
190 }
191 }
192
193 if ( $row->page_restrictions != '' ) {
194 $out .= ' ' . Xml::element( 'restrictions', [],
195 strval( $row->page_restrictions ) ) . "\n";
196 }
197
198 Hooks::run( 'XmlDumpWriterOpenPage', [ $this, &$out, $row, $this->currentTitle ] );
199
200 return $out;
201 }
202
209 function closePage() {
210 if ( $this->currentTitle !== null ) {
211 $linkCache = MediaWikiServices::getInstance()->getLinkCache();
212 // In rare cases, link cache has the same key for some pages which
213 // might be read as part of the same batch. T220424 and T220316
214 $linkCache->clearLink( $this->currentTitle );
215 }
216 return " </page>\n";
217 }
218
222 private function getRevisionStore() {
223 return MediaWikiServices::getInstance()->getRevisionStore();
224 }
225
229 private function getBlobStore() {
230 return MediaWikiServices::getInstance()->getBlobStore();
231 }
232
241 function writeRevision( $row ) {
242 $out = " <revision>\n";
243 $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
244 if ( isset( $row->rev_parent_id ) && $row->rev_parent_id ) {
245 $out .= " " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n";
246 }
247
248 $out .= $this->writeTimestamp( $row->rev_timestamp );
249
250 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) {
251 $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
252 } else {
253 $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
254 }
255
256 if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) {
257 $out .= " <minor/>\n";
258 }
259 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) {
260 $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
261 } else {
262 $comment = CommentStore::getStore()->getComment( 'rev_comment', $row )->text;
263 if ( $comment != '' ) {
264 $out .= " " . Xml::elementClean( 'comment', [], strval( $comment ) ) . "\n";
265 }
266 }
267
268 // TODO: rev_content_model no longer exists with MCR, see T174031
269 if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
270 $content_model = strval( $row->rev_content_model );
271 } else {
272 // probably using $wgContentHandlerUseDB = false;
273 $content_model = ContentHandler::getDefaultModelFor( $this->currentTitle );
274 }
275
276 $content_handler = ContentHandler::getForModelID( $content_model );
277
278 // TODO: rev_content_format no longer exists with MCR, see T174031
279 if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
280 $content_format = strval( $row->rev_content_format );
281 } else {
282 // probably using $wgContentHandlerUseDB = false;
283 $content_format = $content_handler->getDefaultFormat();
284 }
285
286 $out .= " " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
287 $out .= " " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
288
289 $text = '';
290 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
291 $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
292 } elseif ( isset( $row->old_text ) ) {
293 // Raw text from the database may have invalid chars
294 $text = strval( Revision::getRevisionText( $row ) );
295 try {
296 $text = $content_handler->exportTransform( $text, $content_format );
297 }
298 catch ( Exception $ex ) {
299 if ( $ex instanceof MWException || $ex instanceof RuntimeException ) {
300 // leave text as is; that's the way it goes
301 wfLogWarning( 'exportTransform failed on text for revid ' . $row->rev_id . "\n" );
302 } else {
303 throw $ex;
304 }
305 }
306 $out .= " " . Xml::elementClean( 'text',
307 [ 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ],
308 strval( $text ) ) . "\n";
309 } elseif ( isset( $row->_load_content ) ) {
310 // TODO: make this fully MCR aware, see T174031
311 $rev = $this->getRevisionStore()->newRevisionFromRow( $row, 0, $this->currentTitle );
312 $slot = $rev->getSlot( 'main' );
313 try {
314 $content = $slot->getContent();
315
316 if ( $content instanceof TextContent ) {
317 // HACK: For text based models, bypass the serialization step.
318 // This allows extensions (like Flow)that use incompatible combinations
319 // of serialization format and content model.
320 $text = $content->getNativeData();
321 } else {
322 $text = $content->serialize( $content_format );
323 }
324 $text = $content_handler->exportTransform( $text, $content_format );
325 $out .= " " . Xml::elementClean( 'text',
326 [ 'xml:space' => 'preserve', 'bytes' => intval( $slot->getSize() ) ],
327 strval( $text ) ) . "\n";
328 }
329 catch ( Exception $ex ) {
330 if ( $ex instanceof MWException || $ex instanceof RuntimeException ) {
331 // there's no provsion in the schema for an attribute that will let
332 // the user know this element was unavailable due to error; an empty
333 // tag is the best we can do
334 $out .= " " . Xml::element( 'text' ) . "\n";
335 wfLogWarning( 'failed to load content for revid ' . $row->rev_id . "\n" );
336 } else {
337 throw $ex;
338 }
339 }
340 } elseif ( isset( $row->rev_text_id ) ) {
341 // Stub output for pre-MCR schema
342 // TODO: MCR: rev_text_id only exists in the pre-MCR schema. Remove this when
343 // we drop support for the old schema.
344 $out .= " " . Xml::element( 'text',
345 [ 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ],
346 "" ) . "\n";
347 } else {
348 // Backwards-compatible stub output for MCR aware schema
349 // TODO: MCR: emit content addresses instead of text ids, see T174031, T199121
350 $rev = $this->getRevisionStore()->newRevisionFromRow( $row, 0, $this->currentTitle );
351 $slot = $rev->getSlot( 'main' );
352
353 // Note that this is currently the ONLY reason we have a BlobStore here at all.
354 // When removing this line, check whether the BlobStore has become unused.
355 $textId = $this->getBlobStore()->getTextIdFromAddress( $slot->getAddress() );
356 $out .= " " . Xml::element( 'text',
357 [ 'id' => $textId, 'bytes' => intval( $slot->getSize() ) ],
358 "" ) . "\n";
359 }
360
361 if ( isset( $row->rev_sha1 )
362 && $row->rev_sha1
363 && !( $row->rev_deleted & Revision::DELETED_TEXT )
364 ) {
365 $out .= " " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n";
366 } else {
367 $out .= " <sha1/>\n";
368 }
369
370 // Avoid PHP 7.1 warning from passing $this by reference
371 $writer = $this;
372 Hooks::run( 'XmlDumpWriterWriteRevision', [ &$writer, &$out, $row, $text ] );
373
374 $out .= " </revision>\n";
375
376 return $out;
377 }
378
387 function writeLogItem( $row ) {
388 $out = " <logitem>\n";
389 $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
390
391 $out .= $this->writeTimestamp( $row->log_timestamp, " " );
392
393 if ( $row->log_deleted & LogPage::DELETED_USER ) {
394 $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
395 } else {
396 $out .= $this->writeContributor( $row->log_user, $row->user_name, " " );
397 }
398
399 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
400 $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
401 } else {
402 $comment = CommentStore::getStore()->getComment( 'log_comment', $row )->text;
403 if ( $comment != '' ) {
404 $out .= " " . Xml::elementClean( 'comment', null, strval( $comment ) ) . "\n";
405 }
406 }
407
408 $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
409 $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
410
411 if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
412 $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
413 } else {
414 $title = Title::makeTitle( $row->log_namespace, $row->log_title );
415 $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
416 $out .= " " . Xml::elementClean( 'params',
417 [ 'xml:space' => 'preserve' ],
418 strval( $row->log_params ) ) . "\n";
419 }
420
421 $out .= " </logitem>\n";
422
423 return $out;
424 }
425
431 function writeTimestamp( $timestamp, $indent = " " ) {
432 $ts = wfTimestamp( TS_ISO_8601, $timestamp );
433 return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
434 }
435
442 function writeContributor( $id, $text, $indent = " " ) {
443 $out = $indent . "<contributor>\n";
444 if ( $id || !IP::isValid( $text ) ) {
445 $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
446 $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n";
447 } else {
448 $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
449 }
450 $out .= $indent . "</contributor>\n";
451 return $out;
452 }
453
460 function writeUploads( $row, $dumpContents = false ) {
461 if ( $row->page_namespace == NS_FILE ) {
462 $img = wfLocalFile( $row->page_title );
463 if ( $img && $img->exists() ) {
464 $out = '';
465 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
466 $out .= $this->writeUpload( $ver, $dumpContents );
467 }
468 $out .= $this->writeUpload( $img, $dumpContents );
469 return $out;
470 }
471 }
472 return '';
473 }
474
480 function writeUpload( $file, $dumpContents = false ) {
481 if ( $file->isOld() ) {
482 $archiveName = " " .
483 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
484 } else {
485 $archiveName = '';
486 }
487 if ( $dumpContents ) {
488 $be = $file->getRepo()->getBackend();
489 # Dump file as base64
490 # Uses only XML-safe characters, so does not need escaping
491 # @todo Too bad this loads the contents into memory (script might swap)
492 $contents = ' <contents encoding="base64">' .
493 chunk_split( base64_encode(
494 $be->getFileContents( [ 'src' => $file->getPath() ] ) ) ) .
495 " </contents>\n";
496 } else {
497 $contents = '';
498 }
499 if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
500 $comment = Xml::element( 'comment', [ 'deleted' => 'deleted' ] );
501 } else {
502 $comment = Xml::elementClean( 'comment', null, strval( $file->getDescription() ) );
503 }
504 return " <upload>\n" .
505 $this->writeTimestamp( $file->getTimestamp() ) .
506 $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
507 " " . $comment . "\n" .
508 " " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
509 $archiveName .
510 " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" .
511 " " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
512 " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
513 " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
514 $contents .
515 " </upload>\n";
516 }
517
528 public static function canonicalTitle( Title $title ) {
529 if ( $title->isExternal() ) {
530 return $title->getPrefixedText();
531 }
532
533 $prefix = MediaWikiServices::getInstance()->getContentLanguage()->
534 getFormattedNsText( $title->getNamespace() );
535
536 // @todo Emit some kind of warning to the user if $title->getNamespace() !==
537 // NS_MAIN and $prefix === '' (viz. pages in an unregistered namespace)
538
539 if ( $prefix !== '' ) {
540 $prefix .= ':';
541 }
542
543 return $prefix . $title->getText();
544 }
545}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
to move a page</td >< td > &*You are moving the page across namespaces
$wgCapitalLinks
Set this to false to avoid forcing the first letter of links to capitals.
$wgSitename
Name of the site.
$wgVersion
MediaWiki version number.
wfLocalFile( $title)
Get an object referring to a locally registered file.
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
const DELETED_COMMENT
Definition File.php:55
const DELETED_USER
Definition LogPage.php:36
const DELETED_COMMENT
Definition LogPage.php:35
const DELETED_ACTION
Definition LogPage.php:34
MediaWiki exception.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Service for looking up page revisions.
Service for storing and loading Content objects.
static getRevisionText( $row, $prefix='old_', $wiki=false)
Get revision text associated with an old or archive row.
const DELETED_USER
Definition Revision.php:48
const DELETED_TEXT
Definition Revision.php:46
const DELETED_COMMENT
Definition Revision.php:47
Content object implementation for representing flat text.
Represents a title within MediaWiki.
Definition Title.php:40
isValidRedirectTarget()
Check if this Title is a valid redirect target.
Definition Title.php:4376
closeStream()
Closes the output stream with the closing root element.
static string[] $supportedSchemas
the schema versions supported for output @final
static canonicalTitle(Title $title)
Return prefixed text form of title, but using the content language's canonical namespace.
writeUpload( $file, $dumpContents=false)
writeLogItem( $row)
Dumps a "<logitem>" section on the output stream, with data filled in from the given database row.
writeTimestamp( $timestamp, $indent=" ")
writeUploads( $row, $dumpContents=false)
Warning! This data is potentially inconsistent.
closePage()
Closes a "<page>" section on the output stream.
openStream()
Opens the XML output stream's root "<mediawiki>" element.
openPage( $row)
Opens a "<page>" section on the output stream, with data from the given database row.
Title null $currentTitle
Title of the currently processed page.
writeRevision( $row)
Dumps a "<revision>" section on the output stream, with data filled in from the given database row.
writeContributor( $id, $text, $indent=" ")
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
const NS_FILE
Definition Defines.php:79
const XML_DUMP_SCHEMA_VERSION_10
Definition Defines.php:337
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition hooks.txt:855
namespace and then decline to actually register it file or subcat img or subcat $title
Definition hooks.txt:955
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition hooks.txt:1779
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
controlled by the following MediaWiki still creates a BagOStuff but calls it to it are no ops If the cache daemon can t be it should also disable itself fairly $wgDBname
$content
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Definition router.php:42