MediaWiki  1.28.0
XmlDumpWriter.php
Go to the documentation of this file.
1 <?php
40  function openStream() {
43  return Xml::element( 'mediawiki', [
44  'xmlns' => "http://www.mediawiki.org/xml/export-$ver/",
45  'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
46  /*
47  * When a new version of the schema is created, it needs staging on mediawiki.org.
48  * This requires a change in the operations/mediawiki-config git repo.
49  *
50  * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which
51  * you copy in the new xsd file.
52  *
53  * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging.
54  * echo "http://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki
55  */
56  'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
57  "http://www.mediawiki.org/xml/export-$ver.xsd",
58  'version' => $ver,
59  'xml:lang' => $wgLanguageCode ],
60  null ) .
61  "\n" .
62  $this->siteInfo();
63  }
64 
68  function siteInfo() {
69  $info = [
70  $this->sitename(),
71  $this->dbname(),
72  $this->homelink(),
73  $this->generator(),
74  $this->caseSetting(),
75  $this->namespaces() ];
76  return " <siteinfo>\n " .
77  implode( "\n ", $info ) .
78  "\n </siteinfo>\n";
79  }
80 
84  function sitename() {
86  return Xml::element( 'sitename', [], $wgSitename );
87  }
88 
92  function dbname() {
94  return Xml::element( 'dbname', [], $wgDBname );
95  }
96 
100  function generator() {
102  return Xml::element( 'generator', [], "MediaWiki $wgVersion" );
103  }
104 
108  function homelink() {
109  return Xml::element( 'base', [], Title::newMainPage()->getCanonicalURL() );
110  }
111 
115  function caseSetting() {
117  // "case-insensitive" option is reserved for future
118  $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
119  return Xml::element( 'case', [], $sensitivity );
120  }
121 
125  function namespaces() {
127  $spaces = "<namespaces>\n";
128  foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
129  $spaces .= ' ' .
130  Xml::element( 'namespace',
131  [
132  'key' => $ns,
133  'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
134  ], $title ) . "\n";
135  }
136  $spaces .= " </namespaces>";
137  return $spaces;
138  }
139 
146  function closeStream() {
147  return "</mediawiki>\n";
148  }
149 
157  public function openPage( $row ) {
158  $out = " <page>\n";
159  $title = Title::makeTitle( $row->page_namespace, $row->page_title );
160  $out .= ' ' . Xml::elementClean( 'title', [], self::canonicalTitle( $title ) ) . "\n";
161  $out .= ' ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n";
162  $out .= ' ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n";
163  if ( $row->page_is_redirect ) {
165  $redirect = $page->getRedirectTarget();
166  if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) {
167  $out .= ' ';
168  $out .= Xml::element( 'redirect', [ 'title' => self::canonicalTitle( $redirect ) ] );
169  $out .= "\n";
170  }
171  }
172 
173  if ( $row->page_restrictions != '' ) {
174  $out .= ' ' . Xml::element( 'restrictions', [],
175  strval( $row->page_restrictions ) ) . "\n";
176  }
177 
178  Hooks::run( 'XmlDumpWriterOpenPage', [ $this, &$out, $row, $title ] );
179 
180  return $out;
181  }
182 
189  function closePage() {
190  return " </page>\n";
191  }
192 
201  function writeRevision( $row ) {
202 
203  $out = " <revision>\n";
204  $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
205  if ( isset( $row->rev_parent_id ) && $row->rev_parent_id ) {
206  $out .= " " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n";
207  }
208 
209  $out .= $this->writeTimestamp( $row->rev_timestamp );
210 
211  if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) {
212  $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
213  } else {
214  $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
215  }
216 
217  if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) {
218  $out .= " <minor/>\n";
219  }
220  if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) {
221  $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
222  } elseif ( $row->rev_comment != '' ) {
223  $out .= " " . Xml::elementClean( 'comment', [], strval( $row->rev_comment ) ) . "\n";
224  }
225 
226  if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
227  $content_model = strval( $row->rev_content_model );
228  } else {
229  // probably using $wgContentHandlerUseDB = false;
230  $title = Title::makeTitle( $row->page_namespace, $row->page_title );
231  $content_model = ContentHandler::getDefaultModelFor( $title );
232  }
233 
234  $content_handler = ContentHandler::getForModelID( $content_model );
235 
236  if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
237  $content_format = strval( $row->rev_content_format );
238  } else {
239  // probably using $wgContentHandlerUseDB = false;
240  $content_format = $content_handler->getDefaultFormat();
241  }
242 
243  $out .= " " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
244  $out .= " " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
245 
246  $text = '';
247  if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
248  $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
249  } elseif ( isset( $row->old_text ) ) {
250  // Raw text from the database may have invalid chars
251  $text = strval( Revision::getRevisionText( $row ) );
252  $text = $content_handler->exportTransform( $text, $content_format );
253  $out .= " " . Xml::elementClean( 'text',
254  [ 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ],
255  strval( $text ) ) . "\n";
256  } else {
257  // Stub output
258  $out .= " " . Xml::element( 'text',
259  [ 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ],
260  "" ) . "\n";
261  }
262 
263  if ( isset( $row->rev_sha1 )
264  && $row->rev_sha1
265  && !( $row->rev_deleted & Revision::DELETED_TEXT )
266  ) {
267  $out .= " " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n";
268  } else {
269  $out .= " <sha1/>\n";
270  }
271 
272  Hooks::run( 'XmlDumpWriterWriteRevision', [ &$this, &$out, $row, $text ] );
273 
274  $out .= " </revision>\n";
275 
276  return $out;
277  }
278 
287  function writeLogItem( $row ) {
288 
289  $out = " <logitem>\n";
290  $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
291 
292  $out .= $this->writeTimestamp( $row->log_timestamp, " " );
293 
294  if ( $row->log_deleted & LogPage::DELETED_USER ) {
295  $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
296  } else {
297  $out .= $this->writeContributor( $row->log_user, $row->user_name, " " );
298  }
299 
300  if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
301  $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
302  } elseif ( $row->log_comment != '' ) {
303  $out .= " " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n";
304  }
305 
306  $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
307  $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
308 
309  if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
310  $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
311  } else {
312  $title = Title::makeTitle( $row->log_namespace, $row->log_title );
313  $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
314  $out .= " " . Xml::elementClean( 'params',
315  [ 'xml:space' => 'preserve' ],
316  strval( $row->log_params ) ) . "\n";
317  }
318 
319  $out .= " </logitem>\n";
320 
321  return $out;
322  }
323 
329  function writeTimestamp( $timestamp, $indent = " " ) {
331  return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
332  }
333 
340  function writeContributor( $id, $text, $indent = " " ) {
341  $out = $indent . "<contributor>\n";
342  if ( $id || !IP::isValid( $text ) ) {
343  $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
344  $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n";
345  } else {
346  $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
347  }
348  $out .= $indent . "</contributor>\n";
349  return $out;
350  }
351 
358  function writeUploads( $row, $dumpContents = false ) {
359  if ( $row->page_namespace == NS_FILE ) {
360  $img = wfLocalFile( $row->page_title );
361  if ( $img && $img->exists() ) {
362  $out = '';
363  foreach ( array_reverse( $img->getHistory() ) as $ver ) {
364  $out .= $this->writeUpload( $ver, $dumpContents );
365  }
366  $out .= $this->writeUpload( $img, $dumpContents );
367  return $out;
368  }
369  }
370  return '';
371  }
372 
378  function writeUpload( $file, $dumpContents = false ) {
379  if ( $file->isOld() ) {
380  $archiveName = " " .
381  Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
382  } else {
383  $archiveName = '';
384  }
385  if ( $dumpContents ) {
386  $be = $file->getRepo()->getBackend();
387  # Dump file as base64
388  # Uses only XML-safe characters, so does not need escaping
389  # @todo Too bad this loads the contents into memory (script might swap)
390  $contents = ' <contents encoding="base64">' .
391  chunk_split( base64_encode(
392  $be->getFileContents( [ 'src' => $file->getPath() ] ) ) ) .
393  " </contents>\n";
394  } else {
395  $contents = '';
396  }
397  if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
398  $comment = Xml::element( 'comment', [ 'deleted' => 'deleted' ] );
399  } else {
400  $comment = Xml::elementClean( 'comment', null, $file->getDescription() );
401  }
402  return " <upload>\n" .
403  $this->writeTimestamp( $file->getTimestamp() ) .
404  $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
405  " " . $comment . "\n" .
406  " " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
407  $archiveName .
408  " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" .
409  " " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
410  " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
411  " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
412  $contents .
413  " </upload>\n";
414  }
415 
426  public static function canonicalTitle( Title $title ) {
427  if ( $title->isExternal() ) {
428  return $title->getPrefixedText();
429  }
430 
432  $prefix = $wgContLang->getFormattedNsText( $title->getNamespace() );
433 
434  if ( $prefix !== '' ) {
435  $prefix .= ':';
436  }
437 
438  return $prefix . $title->getText();
439  }
440 }
static factory(Title $title)
Create a WikiPage object of the appropriate class for the given title.
Definition: WikiPage.php:115
writeUploads($row, $dumpContents=false)
Warning! This data is potentially inconsistent.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:802
const DELETED_COMMENT
Definition: File.php:53
static getRevisionText($row, $prefix= 'old_', $wiki=false)
Get revision text associated with an old or archive row $row is usually an object from wfFetchRow()...
Definition: Revision.php:1273
$wgVersion
MediaWiki version number.
$wgSitename
Name of the site.
static element($element, $attribs=null, $contents= '', $allowShortTag=true)
Format an XML element with given attributes and, optionally, text content.
Definition: Xml.php:39
getText()
Get the text form (spaces not underscores) of the main part.
Definition: Title.php:880
static newMainPage()
Create a new Title for the Main Page.
Definition: Title.php:556
static getForModelID($modelId)
Returns the ContentHandler singleton for the given model ID.
$comment
static getDefaultModelFor(Title $title)
Returns the name of the default content model to be used for the page with the given title...
closePage()
Closes a "" section on the output stream.
getPrefixedText()
Get the prefixed title with spaces.
Definition: Title.php:1455
closeStream()
Closes the output stream with the closing root element.
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
const TS_ISO_8601
ISO 8601 format with no timezone: 1986-02-09T20:00:00Z.
Definition: defines.php:28
wfLocalFile($title)
Get an object referring to a locally registered file.
writeRevision($row)
Dumps a "" section on the output stream, with data filled in from the given database row...
static isCapitalized($index)
Is the namespace first-letter capitalized?
wfTimestamp($outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
$wgLanguageCode
Site language code.
const DELETED_COMMENT
Definition: LogPage.php:34
isExternal()
Is this Title interwiki?
Definition: Title.php:797
$wgCapitalLinks
Set this to false to avoid forcing the first letter of links to capitals.
writeLogItem($row)
Dumps a "" section on the output stream, with data filled in from the given database row...
writeContributor($id, $text, $indent=" ")
if($limit) $timestamp
static isValid($ip)
Validate an IP address.
Definition: IP.php:113
static elementClean($element, $attribs=[], $contents= '')
Format an XML element as with self::element(), but run text through the $wgContLang->normalize() vali...
Definition: Xml.php:91
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:953
openStream()
Opens the XML output stream's root "" element.
static run($event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
getNamespace()
Get the namespace index, i.e.
Definition: Title.php:921
const NS_FILE
Definition: Defines.php:62
writeUpload($file, $dumpContents=false)
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
const DELETED_USER
Definition: LogPage.php:35
const DELETED_TEXT
Definition: Revision.php:85
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
const DELETED_USER
Definition: Revision.php:87
static schemaVersion()
Returns the export schema version.
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition: design.txt:56
writeTimestamp($timestamp, $indent=" ")
const DELETED_COMMENT
Definition: Revision.php:86
const DELETED_ACTION
Definition: LogPage.php:33
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname
Definition: memcached.txt:96
static makeTitle($ns, $title, $fragment= '', $interwiki= '')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:511
static canonicalTitle(Title $title)
Return prefixed text form of title, but using the content language's canonical namespace.
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached $page
Definition: hooks.txt:2491
openPage($row)
Opens a "" section on the output stream, with data from the given database row.