MediaWiki  master
XmlDumpWriter.php
Go to the documentation of this file.
1 <?php
40  function openStream() {
43  return Xml::element( 'mediawiki', [
44  'xmlns' => "http://www.mediawiki.org/xml/export-$ver/",
45  'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
46  /*
47  * When a new version of the schema is created, it needs staging on mediawiki.org.
48  * This requires a change in the operations/mediawiki-config git repo.
49  *
50  * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which
51  * you copy in the new xsd file.
52  *
53  * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging.
54  * echo "https://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki
55  */
56  'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
57  "http://www.mediawiki.org/xml/export-$ver.xsd",
58  'version' => $ver,
59  'xml:lang' => $wgContLang->getHtmlCode() ],
60  null ) .
61  "\n" .
62  $this->siteInfo();
63  }
64 
68  function siteInfo() {
69  $info = [
70  $this->sitename(),
71  $this->dbname(),
72  $this->homelink(),
73  $this->generator(),
74  $this->caseSetting(),
75  $this->namespaces() ];
76  return " <siteinfo>\n " .
77  implode( "\n ", $info ) .
78  "\n </siteinfo>\n";
79  }
80 
84  function sitename() {
86  return Xml::element( 'sitename', [], $wgSitename );
87  }
88 
92  function dbname() {
94  return Xml::element( 'dbname', [], $wgDBname );
95  }
96 
100  function generator() {
102  return Xml::element( 'generator', [], "MediaWiki $wgVersion" );
103  }
104 
108  function homelink() {
109  return Xml::element( 'base', [], Title::newMainPage()->getCanonicalURL() );
110  }
111 
115  function caseSetting() {
117  // "case-insensitive" option is reserved for future
118  $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
119  return Xml::element( 'case', [], $sensitivity );
120  }
121 
125  function namespaces() {
127  $spaces = "<namespaces>\n";
128  foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
129  $spaces .= ' ' .
130  Xml::element( 'namespace',
131  [
132  'key' => $ns,
133  'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
134  ], $title ) . "\n";
135  }
136  $spaces .= " </namespaces>";
137  return $spaces;
138  }
139 
146  function closeStream() {
147  return "</mediawiki>\n";
148  }
149 
157  public function openPage( $row ) {
158  $out = " <page>\n";
159  $title = Title::makeTitle( $row->page_namespace, $row->page_title );
160  $out .= ' ' . Xml::elementClean( 'title', [], self::canonicalTitle( $title ) ) . "\n";
161  $out .= ' ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n";
162  $out .= ' ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n";
163  if ( $row->page_is_redirect ) {
164  $page = WikiPage::factory( $title );
165  $redirect = $page->getRedirectTarget();
166  if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) {
167  $out .= ' ';
168  $out .= Xml::element( 'redirect', [ 'title' => self::canonicalTitle( $redirect ) ] );
169  $out .= "\n";
170  }
171  }
172 
173  if ( $row->page_restrictions != '' ) {
174  $out .= ' ' . Xml::element( 'restrictions', [],
175  strval( $row->page_restrictions ) ) . "\n";
176  }
177 
178  Hooks::run( 'XmlDumpWriterOpenPage', [ $this, &$out, $row, $title ] );
179 
180  return $out;
181  }
182 
189  function closePage() {
190  return " </page>\n";
191  }
192 
201  function writeRevision( $row ) {
202  $out = " <revision>\n";
203  $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
204  if ( isset( $row->rev_parent_id ) && $row->rev_parent_id ) {
205  $out .= " " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n";
206  }
207 
208  $out .= $this->writeTimestamp( $row->rev_timestamp );
209 
210  if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) {
211  $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
212  } else {
213  $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
214  }
215 
216  if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) {
217  $out .= " <minor/>\n";
218  }
219  if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) {
220  $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
221  } else {
222  $comment = CommentStore::newKey( 'rev_comment' )->getComment( $row )->text;
223  if ( $comment != '' ) {
224  $out .= " " . Xml::elementClean( 'comment', [], strval( $comment ) ) . "\n";
225  }
226  }
227 
228  if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
229  $content_model = strval( $row->rev_content_model );
230  } else {
231  // probably using $wgContentHandlerUseDB = false;
232  $title = Title::makeTitle( $row->page_namespace, $row->page_title );
233  $content_model = ContentHandler::getDefaultModelFor( $title );
234  }
235 
236  $content_handler = ContentHandler::getForModelID( $content_model );
237 
238  if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
239  $content_format = strval( $row->rev_content_format );
240  } else {
241  // probably using $wgContentHandlerUseDB = false;
242  $content_format = $content_handler->getDefaultFormat();
243  }
244 
245  $out .= " " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
246  $out .= " " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
247 
248  $text = '';
249  if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
250  $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
251  } elseif ( isset( $row->old_text ) ) {
252  // Raw text from the database may have invalid chars
253  $text = strval( Revision::getRevisionText( $row ) );
254  $text = $content_handler->exportTransform( $text, $content_format );
255  $out .= " " . Xml::elementClean( 'text',
256  [ 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ],
257  strval( $text ) ) . "\n";
258  } else {
259  // Stub output
260  $out .= " " . Xml::element( 'text',
261  [ 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ],
262  "" ) . "\n";
263  }
264 
265  if ( isset( $row->rev_sha1 )
266  && $row->rev_sha1
267  && !( $row->rev_deleted & Revision::DELETED_TEXT )
268  ) {
269  $out .= " " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n";
270  } else {
271  $out .= " <sha1/>\n";
272  }
273 
274  // Avoid PHP 7.1 warning from passing $this by reference
275  $writer = $this;
276  Hooks::run( 'XmlDumpWriterWriteRevision', [ &$writer, &$out, $row, $text ] );
277 
278  $out .= " </revision>\n";
279 
280  return $out;
281  }
282 
291  function writeLogItem( $row ) {
292  $out = " <logitem>\n";
293  $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
294 
295  $out .= $this->writeTimestamp( $row->log_timestamp, " " );
296 
297  if ( $row->log_deleted & LogPage::DELETED_USER ) {
298  $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
299  } else {
300  $out .= $this->writeContributor( $row->log_user, $row->user_name, " " );
301  }
302 
303  if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
304  $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
305  } else {
306  $comment = CommentStore::newKey( 'log_comment' )->getComment( $row )->text;
307  if ( $comment != '' ) {
308  $out .= " " . Xml::elementClean( 'comment', null, strval( $comment ) ) . "\n";
309  }
310  }
311 
312  $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
313  $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
314 
315  if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
316  $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
317  } else {
318  $title = Title::makeTitle( $row->log_namespace, $row->log_title );
319  $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
320  $out .= " " . Xml::elementClean( 'params',
321  [ 'xml:space' => 'preserve' ],
322  strval( $row->log_params ) ) . "\n";
323  }
324 
325  $out .= " </logitem>\n";
326 
327  return $out;
328  }
329 
335  function writeTimestamp( $timestamp, $indent = " " ) {
336  $ts = wfTimestamp( TS_ISO_8601, $timestamp );
337  return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
338  }
339 
346  function writeContributor( $id, $text, $indent = " " ) {
347  $out = $indent . "<contributor>\n";
348  if ( $id || !IP::isValid( $text ) ) {
349  $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
350  $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n";
351  } else {
352  $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
353  }
354  $out .= $indent . "</contributor>\n";
355  return $out;
356  }
357 
364  function writeUploads( $row, $dumpContents = false ) {
365  if ( $row->page_namespace == NS_FILE ) {
366  $img = wfLocalFile( $row->page_title );
367  if ( $img && $img->exists() ) {
368  $out = '';
369  foreach ( array_reverse( $img->getHistory() ) as $ver ) {
370  $out .= $this->writeUpload( $ver, $dumpContents );
371  }
372  $out .= $this->writeUpload( $img, $dumpContents );
373  return $out;
374  }
375  }
376  return '';
377  }
378 
384  function writeUpload( $file, $dumpContents = false ) {
385  if ( $file->isOld() ) {
386  $archiveName = " " .
387  Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
388  } else {
389  $archiveName = '';
390  }
391  if ( $dumpContents ) {
392  $be = $file->getRepo()->getBackend();
393  # Dump file as base64
394  # Uses only XML-safe characters, so does not need escaping
395  # @todo Too bad this loads the contents into memory (script might swap)
396  $contents = ' <contents encoding="base64">' .
397  chunk_split( base64_encode(
398  $be->getFileContents( [ 'src' => $file->getPath() ] ) ) ) .
399  " </contents>\n";
400  } else {
401  $contents = '';
402  }
403  if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
404  $comment = Xml::element( 'comment', [ 'deleted' => 'deleted' ] );
405  } else {
406  $comment = Xml::elementClean( 'comment', null, strval( $file->getDescription() ) );
407  }
408  return " <upload>\n" .
409  $this->writeTimestamp( $file->getTimestamp() ) .
410  $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
411  " " . $comment . "\n" .
412  " " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
413  $archiveName .
414  " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" .
415  " " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
416  " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
417  " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
418  $contents .
419  " </upload>\n";
420  }
421 
432  public static function canonicalTitle( Title $title ) {
433  if ( $title->isExternal() ) {
434  return $title->getPrefixedText();
435  }
436 
438  $prefix = $wgContLang->getFormattedNsText( $title->getNamespace() );
439 
440  // @todo Emit some kind of warning to the user if $title->getNamespace() !==
441  // NS_MAIN and $prefix === '' (viz. pages in an unregistered namespace)
442 
443  if ( $prefix !== '' ) {
444  $prefix .= ':';
445  }
446 
447  return $prefix . $title->getText();
448  }
449 }
static factory(Title $title)
Create a WikiPage object of the appropriate class for the given title.
Definition: WikiPage.php:121
writeUploads($row, $dumpContents=false)
Warning! This data is potentially inconsistent.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:782
const DELETED_COMMENT
Definition: File.php:54
static getRevisionText($row, $prefix= 'old_', $wiki=false)
Get revision text associated with an old or archive row.
Definition: Revision.php:1282
$wgVersion
MediaWiki version number.
$wgSitename
Name of the site.
static element($element, $attribs=null, $contents= '', $allowShortTag=true)
Format an XML element with given attributes and, optionally, text content.
Definition: Xml.php:39
getText()
Get the text form (spaces not underscores) of the main part.
Definition: Title.php:938
static newMainPage()
Create a new Title for the Main Page.
Definition: Title.php:581
static getForModelID($modelId)
Returns the ContentHandler singleton for the given model ID.
static getDefaultModelFor(Title $title)
Returns the name of the default content model to be used for the page with the given title...
closePage()
Closes a "" section on the output stream.
getPrefixedText()
Get the prefixed title with spaces.
Definition: Title.php:1551
closeStream()
Closes the output stream with the closing root element.
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
wfLocalFile($title)
Get an object referring to a locally registered file.
writeRevision($row)
Dumps a "" section on the output stream, with data filled in from the given database row...
static isCapitalized($index)
Is the namespace first-letter capitalized?
wfTimestamp($outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
const DELETED_COMMENT
Definition: LogPage.php:33
isExternal()
Is this Title interwiki?
Definition: Title.php:855
$wgCapitalLinks
Set this to false to avoid forcing the first letter of links to capitals.
writeLogItem($row)
Dumps a "" section on the output stream, with data filled in from the given database row...
writeContributor($id, $text, $indent=" ")
static isValid($ip)
Validate an IP address.
Definition: IP.php:111
static elementClean($element, $attribs=[], $contents= '')
Format an XML element as with self::element(), but run text through the $wgContLang->normalize() vali...
Definition: Xml.php:91
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:933
openStream()
Opens the XML output stream's root "" element.
static run($event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:203
getNamespace()
Get the namespace index, i.e.
Definition: Title.php:979
const NS_FILE
Definition: Defines.php:71
writeUpload($file, $dumpContents=false)
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
const DELETED_USER
Definition: LogPage.php:34
const DELETED_TEXT
Definition: Revision.php:90
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
const DELETED_USER
Definition: Revision.php:92
static schemaVersion()
Returns the export schema version.
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition: design.txt:56
static newKey($key)
Static constructor for easier chaining.
writeTimestamp($timestamp, $indent=" ")
const DELETED_COMMENT
Definition: Revision.php:91
const DELETED_ACTION
Definition: LogPage.php:32
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname
Definition: memcached.txt:96
static makeTitle($ns, $title, $fragment= '', $interwiki= '')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:529
static canonicalTitle(Title $title)
Return prefixed text form of title, but using the content language's canonical namespace.
openPage($row)
Opens a "" section on the output stream, with data from the given database row.