MediaWiki  1.32.0
XmlDumpWriter.php
Go to the documentation of this file.
1 <?php
27 
42  function openStream() {
44  return Xml::element( 'mediawiki', [
45  'xmlns' => "http://www.mediawiki.org/xml/export-$ver/",
46  'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
47  /*
48  * When a new version of the schema is created, it needs staging on mediawiki.org.
49  * This requires a change in the operations/mediawiki-config git repo.
50  *
51  * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which
52  * you copy in the new xsd file.
53  *
54  * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging.
55  * echo "https://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki
56  */
57  'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
58  "http://www.mediawiki.org/xml/export-$ver.xsd",
59  'version' => $ver,
60  'xml:lang' => MediaWikiServices::getInstance()->getContentLanguage()->getHtmlCode() ],
61  null ) .
62  "\n" .
63  $this->siteInfo();
64  }
65 
69  function siteInfo() {
70  $info = [
71  $this->sitename(),
72  $this->dbname(),
73  $this->homelink(),
74  $this->generator(),
75  $this->caseSetting(),
76  $this->namespaces() ];
77  return " <siteinfo>\n " .
78  implode( "\n ", $info ) .
79  "\n </siteinfo>\n";
80  }
81 
85  function sitename() {
86  global $wgSitename;
87  return Xml::element( 'sitename', [], $wgSitename );
88  }
89 
93  function dbname() {
94  global $wgDBname;
95  return Xml::element( 'dbname', [], $wgDBname );
96  }
97 
101  function generator() {
102  global $wgVersion;
103  return Xml::element( 'generator', [], "MediaWiki $wgVersion" );
104  }
105 
109  function homelink() {
110  return Xml::element( 'base', [], Title::newMainPage()->getCanonicalURL() );
111  }
112 
116  function caseSetting() {
117  global $wgCapitalLinks;
118  // "case-insensitive" option is reserved for future
119  $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
120  return Xml::element( 'case', [], $sensitivity );
121  }
122 
126  function namespaces() {
127  $spaces = "<namespaces>\n";
128  foreach (
129  MediaWikiServices::getInstance()->getContentLanguage()->getFormattedNamespaces()
130  as $ns => $title
131  ) {
132  $spaces .= ' ' .
133  Xml::element( 'namespace',
134  [
135  'key' => $ns,
136  'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
137  ], $title ) . "\n";
138  }
139  $spaces .= " </namespaces>";
140  return $spaces;
141  }
142 
149  function closeStream() {
150  return "</mediawiki>\n";
151  }
152 
160  public function openPage( $row ) {
161  $out = " <page>\n";
162  $title = Title::makeTitle( $row->page_namespace, $row->page_title );
163  $out .= ' ' . Xml::elementClean( 'title', [], self::canonicalTitle( $title ) ) . "\n";
164  $out .= ' ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n";
165  $out .= ' ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n";
166  if ( $row->page_is_redirect ) {
167  $page = WikiPage::factory( $title );
168  $redirect = $page->getRedirectTarget();
169  if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) {
170  $out .= ' ';
171  $out .= Xml::element( 'redirect', [ 'title' => self::canonicalTitle( $redirect ) ] );
172  $out .= "\n";
173  }
174  }
175 
176  if ( $row->page_restrictions != '' ) {
177  $out .= ' ' . Xml::element( 'restrictions', [],
178  strval( $row->page_restrictions ) ) . "\n";
179  }
180 
181  Hooks::run( 'XmlDumpWriterOpenPage', [ $this, &$out, $row, $title ] );
182 
183  return $out;
184  }
185 
192  function closePage() {
193  return " </page>\n";
194  }
195 
204  function writeRevision( $row ) {
205  $out = " <revision>\n";
206  $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
207  if ( isset( $row->rev_parent_id ) && $row->rev_parent_id ) {
208  $out .= " " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n";
209  }
210 
211  $out .= $this->writeTimestamp( $row->rev_timestamp );
212 
213  if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) {
214  $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
215  } else {
216  $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
217  }
218 
219  if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) {
220  $out .= " <minor/>\n";
221  }
222  if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) {
223  $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
224  } else {
225  $comment = CommentStore::getStore()->getComment( 'rev_comment', $row )->text;
226  if ( $comment != '' ) {
227  $out .= " " . Xml::elementClean( 'comment', [], strval( $comment ) ) . "\n";
228  }
229  }
230 
231  if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
232  $content_model = strval( $row->rev_content_model );
233  } else {
234  // probably using $wgContentHandlerUseDB = false;
235  $title = Title::makeTitle( $row->page_namespace, $row->page_title );
236  $content_model = ContentHandler::getDefaultModelFor( $title );
237  }
238 
239  $content_handler = ContentHandler::getForModelID( $content_model );
240 
241  if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
242  $content_format = strval( $row->rev_content_format );
243  } else {
244  // probably using $wgContentHandlerUseDB = false;
245  $content_format = $content_handler->getDefaultFormat();
246  }
247 
248  $out .= " " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
249  $out .= " " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
250 
251  $text = '';
252  if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
253  $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
254  } elseif ( isset( $row->old_text ) ) {
255  // Raw text from the database may have invalid chars
256  $text = strval( Revision::getRevisionText( $row ) );
257  $text = $content_handler->exportTransform( $text, $content_format );
258  $out .= " " . Xml::elementClean( 'text',
259  [ 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ],
260  strval( $text ) ) . "\n";
261  } else {
262  // Stub output
263  $out .= " " . Xml::element( 'text',
264  [ 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ],
265  "" ) . "\n";
266  }
267 
268  if ( isset( $row->rev_sha1 )
269  && $row->rev_sha1
270  && !( $row->rev_deleted & Revision::DELETED_TEXT )
271  ) {
272  $out .= " " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n";
273  } else {
274  $out .= " <sha1/>\n";
275  }
276 
277  // Avoid PHP 7.1 warning from passing $this by reference
278  $writer = $this;
279  Hooks::run( 'XmlDumpWriterWriteRevision', [ &$writer, &$out, $row, $text ] );
280 
281  $out .= " </revision>\n";
282 
283  return $out;
284  }
285 
294  function writeLogItem( $row ) {
295  $out = " <logitem>\n";
296  $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
297 
298  $out .= $this->writeTimestamp( $row->log_timestamp, " " );
299 
300  if ( $row->log_deleted & LogPage::DELETED_USER ) {
301  $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
302  } else {
303  $out .= $this->writeContributor( $row->log_user, $row->user_name, " " );
304  }
305 
306  if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
307  $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
308  } else {
309  $comment = CommentStore::getStore()->getComment( 'log_comment', $row )->text;
310  if ( $comment != '' ) {
311  $out .= " " . Xml::elementClean( 'comment', null, strval( $comment ) ) . "\n";
312  }
313  }
314 
315  $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
316  $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
317 
318  if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
319  $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
320  } else {
321  $title = Title::makeTitle( $row->log_namespace, $row->log_title );
322  $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
323  $out .= " " . Xml::elementClean( 'params',
324  [ 'xml:space' => 'preserve' ],
325  strval( $row->log_params ) ) . "\n";
326  }
327 
328  $out .= " </logitem>\n";
329 
330  return $out;
331  }
332 
338  function writeTimestamp( $timestamp, $indent = " " ) {
339  $ts = wfTimestamp( TS_ISO_8601, $timestamp );
340  return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
341  }
342 
349  function writeContributor( $id, $text, $indent = " " ) {
350  $out = $indent . "<contributor>\n";
351  if ( $id || !IP::isValid( $text ) ) {
352  $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
353  $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n";
354  } else {
355  $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
356  }
357  $out .= $indent . "</contributor>\n";
358  return $out;
359  }
360 
367  function writeUploads( $row, $dumpContents = false ) {
368  if ( $row->page_namespace == NS_FILE ) {
369  $img = wfLocalFile( $row->page_title );
370  if ( $img && $img->exists() ) {
371  $out = '';
372  foreach ( array_reverse( $img->getHistory() ) as $ver ) {
373  $out .= $this->writeUpload( $ver, $dumpContents );
374  }
375  $out .= $this->writeUpload( $img, $dumpContents );
376  return $out;
377  }
378  }
379  return '';
380  }
381 
387  function writeUpload( $file, $dumpContents = false ) {
388  if ( $file->isOld() ) {
389  $archiveName = " " .
390  Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
391  } else {
392  $archiveName = '';
393  }
394  if ( $dumpContents ) {
395  $be = $file->getRepo()->getBackend();
396  # Dump file as base64
397  # Uses only XML-safe characters, so does not need escaping
398  # @todo Too bad this loads the contents into memory (script might swap)
399  $contents = ' <contents encoding="base64">' .
400  chunk_split( base64_encode(
401  $be->getFileContents( [ 'src' => $file->getPath() ] ) ) ) .
402  " </contents>\n";
403  } else {
404  $contents = '';
405  }
406  if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
407  $comment = Xml::element( 'comment', [ 'deleted' => 'deleted' ] );
408  } else {
409  $comment = Xml::elementClean( 'comment', null, strval( $file->getDescription() ) );
410  }
411  return " <upload>\n" .
412  $this->writeTimestamp( $file->getTimestamp() ) .
413  $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
414  " " . $comment . "\n" .
415  " " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
416  $archiveName .
417  " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" .
418  " " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
419  " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
420  " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
421  $contents .
422  " </upload>\n";
423  }
424 
435  public static function canonicalTitle( Title $title ) {
436  if ( $title->isExternal() ) {
437  return $title->getPrefixedText();
438  }
439 
440  $prefix = MediaWikiServices::getInstance()->getContentLanguage()->
441  getFormattedNsText( $title->getNamespace() );
442 
443  // @todo Emit some kind of warning to the user if $title->getNamespace() !==
444  // NS_MAIN and $prefix === '' (viz. pages in an unregistered namespace)
445 
446  if ( $prefix !== '' ) {
447  $prefix .= ':';
448  }
449 
450  return $prefix . $title->getText();
451  }
452 }
XmlDumpWriter\openStream
openStream()
Opens the XML output stream's root "<mediawiki>" element.
Definition: XmlDumpWriter.php:42
Revision\DELETED_USER
const DELETED_USER
Definition: Revision.php:49
WikiExporter\schemaVersion
static schemaVersion()
Returns the export schema version.
Definition: WikiExporter.php:70
ContentHandler\getForModelID
static getForModelID( $modelId)
Returns the ContentHandler singleton for the given model ID.
Definition: ContentHandler.php:297
Revision\DELETED_COMMENT
const DELETED_COMMENT
Definition: Revision.php:48
$wgDBname
controlled by the following MediaWiki still creates a BagOStuff but calls it to it are no ops If the cache daemon can t be it should also disable itself fairly $wgDBname
Definition: memcached.txt:93
XmlDumpWriter\openPage
openPage( $row)
Opens a "<page>" section on the output stream, with data from the given database row.
Definition: XmlDumpWriter.php:160
XmlDumpWriter\dbname
dbname()
Definition: XmlDumpWriter.php:93
XmlDumpWriter\siteInfo
siteInfo()
Definition: XmlDumpWriter.php:69
Title\newMainPage
static newMainPage()
Create a new Title for the Main Page.
Definition: Title.php:597
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1954
$wgVersion
$wgVersion
MediaWiki version number.
Definition: DefaultSettings.php:74
XmlDumpWriter\homelink
homelink()
Definition: XmlDumpWriter.php:109
NS_FILE
const NS_FILE
Definition: Defines.php:70
Revision\getRevisionText
static getRevisionText( $row, $prefix='old_', $wiki=false)
Get revision text associated with an old or archive row.
Definition: Revision.php:1050
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
XmlDumpWriter\writeLogItem
writeLogItem( $row)
Dumps a "<logitem>" section on the output stream, with data filled in from the given database row.
Definition: XmlDumpWriter.php:294
Xml\elementClean
static elementClean( $element, $attribs=[], $contents='')
Format an XML element as with self::element(), but run text through the content language's normalize(...
Definition: Xml.php:92
ContentHandler\getDefaultModelFor
static getDefaultModelFor(Title $title)
Returns the name of the default content model to be used for the page with the given title.
Definition: ContentHandler.php:182
$title
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:964
File\DELETED_COMMENT
const DELETED_COMMENT
Definition: File.php:54
WikiPage\factory
static factory(Title $title)
Create a WikiPage object of the appropriate class for the given title.
Definition: WikiPage.php:127
LogPage\DELETED_COMMENT
const DELETED_COMMENT
Definition: LogPage.php:35
XmlDumpWriter\canonicalTitle
static canonicalTitle(Title $title)
Return prefixed text form of title, but using the content language's canonical namespace.
Definition: XmlDumpWriter.php:435
LogPage\DELETED_USER
const DELETED_USER
Definition: LogPage.php:36
XmlDumpWriter\writeContributor
writeContributor( $id, $text, $indent=" ")
Definition: XmlDumpWriter.php:349
Title\isValidRedirectTarget
isValidRedirectTarget()
Check if this Title is a valid redirect target.
Definition: Title.php:5014
Xml\element
static element( $element, $attribs=null, $contents='', $allowShortTag=true)
Format an XML element with given attributes and, optionally, text content.
Definition: Xml.php:41
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:545
XmlDumpWriter\namespaces
namespaces()
Definition: XmlDumpWriter.php:126
XmlDumpWriter\closeStream
closeStream()
Closes the output stream with the closing root element.
Definition: XmlDumpWriter.php:149
LogPage\DELETED_ACTION
const DELETED_ACTION
Definition: LogPage.php:34
XmlDumpWriter\caseSetting
caseSetting()
Definition: XmlDumpWriter.php:116
$wgSitename
$wgSitename
Name of the site.
Definition: DefaultSettings.php:79
XmlDumpWriter\writeUpload
writeUpload( $file, $dumpContents=false)
Definition: XmlDumpWriter.php:387
XmlDumpWriter\writeTimestamp
writeTimestamp( $timestamp, $indent=" ")
Definition: XmlDumpWriter.php:338
IP\isValid
static isValid( $ip)
Validate an IP address.
Definition: IP.php:111
XmlDumpWriter\sitename
sitename()
Definition: XmlDumpWriter.php:85
XmlDumpWriter\writeRevision
writeRevision( $row)
Dumps a "<revision>" section on the output stream, with data filled in from the given database row.
Definition: XmlDumpWriter.php:204
Title
Represents a title within MediaWiki.
Definition: Title.php:39
$wgCapitalLinks
$wgCapitalLinks
Set this to false to avoid forcing the first letter of links to capitals.
Definition: DefaultSettings.php:4030
XmlDumpWriter
Definition: XmlDumpWriter.php:31
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
XmlDumpWriter\generator
generator()
Definition: XmlDumpWriter.php:101
MWNamespace\isCapitalized
static isCapitalized( $index)
Is the namespace first-letter capitalized?
Definition: MWNamespace.php:417
MediaWikiServices
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
CommentStore\getStore
static getStore()
Definition: CommentStore.php:125
wfLocalFile
wfLocalFile( $title)
Get an object referring to a locally registered file.
Definition: GlobalFunctions.php:2745
Hooks\run
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:200
Revision\DELETED_TEXT
const DELETED_TEXT
Definition: Revision.php:47
XmlDumpWriter\closePage
closePage()
Closes a "<page>" section on the output stream.
Definition: XmlDumpWriter.php:192
XmlDumpWriter\writeUploads
writeUploads( $row, $dumpContents=false)
Warning! This data is potentially inconsistent.
Definition: XmlDumpWriter.php:367
$out
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:813