MediaWiki REL1_32
XmlDumpWriter.php
Go to the documentation of this file.
1<?php
27
42 function openStream() {
43 $ver = WikiExporter::schemaVersion();
44 return Xml::element( 'mediawiki', [
45 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/",
46 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
47 /*
48 * When a new version of the schema is created, it needs staging on mediawiki.org.
49 * This requires a change in the operations/mediawiki-config git repo.
50 *
51 * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which
52 * you copy in the new xsd file.
53 *
54 * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging.
55 * echo "https://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki
56 */
57 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
58 "http://www.mediawiki.org/xml/export-$ver.xsd",
59 'version' => $ver,
60 'xml:lang' => MediaWikiServices::getInstance()->getContentLanguage()->getHtmlCode() ],
61 null ) .
62 "\n" .
63 $this->siteInfo();
64 }
65
69 function siteInfo() {
70 $info = [
71 $this->sitename(),
72 $this->dbname(),
73 $this->homelink(),
74 $this->generator(),
75 $this->caseSetting(),
76 $this->namespaces() ];
77 return " <siteinfo>\n " .
78 implode( "\n ", $info ) .
79 "\n </siteinfo>\n";
80 }
81
85 function sitename() {
86 global $wgSitename;
87 return Xml::element( 'sitename', [], $wgSitename );
88 }
89
93 function dbname() {
94 global $wgDBname;
95 return Xml::element( 'dbname', [], $wgDBname );
96 }
97
101 function generator() {
102 global $wgVersion;
103 return Xml::element( 'generator', [], "MediaWiki $wgVersion" );
104 }
105
109 function homelink() {
110 return Xml::element( 'base', [], Title::newMainPage()->getCanonicalURL() );
111 }
112
116 function caseSetting() {
117 global $wgCapitalLinks;
118 // "case-insensitive" option is reserved for future
119 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
120 return Xml::element( 'case', [], $sensitivity );
121 }
122
126 function namespaces() {
127 $spaces = "<namespaces>\n";
128 foreach (
129 MediaWikiServices::getInstance()->getContentLanguage()->getFormattedNamespaces()
130 as $ns => $title
131 ) {
132 $spaces .= ' ' .
133 Xml::element( 'namespace',
134 [
135 'key' => $ns,
136 'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
137 ], $title ) . "\n";
138 }
139 $spaces .= " </namespaces>";
140 return $spaces;
141 }
142
149 function closeStream() {
150 return "</mediawiki>\n";
151 }
152
160 public function openPage( $row ) {
161 $out = " <page>\n";
162 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
163 $out .= ' ' . Xml::elementClean( 'title', [], self::canonicalTitle( $title ) ) . "\n";
164 $out .= ' ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n";
165 $out .= ' ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n";
166 if ( $row->page_is_redirect ) {
167 $page = WikiPage::factory( $title );
168 $redirect = $page->getRedirectTarget();
169 if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) {
170 $out .= ' ';
171 $out .= Xml::element( 'redirect', [ 'title' => self::canonicalTitle( $redirect ) ] );
172 $out .= "\n";
173 }
174 }
175
176 if ( $row->page_restrictions != '' ) {
177 $out .= ' ' . Xml::element( 'restrictions', [],
178 strval( $row->page_restrictions ) ) . "\n";
179 }
180
181 Hooks::run( 'XmlDumpWriterOpenPage', [ $this, &$out, $row, $title ] );
182
183 return $out;
184 }
185
192 function closePage() {
193 return " </page>\n";
194 }
195
204 function writeRevision( $row ) {
205 $out = " <revision>\n";
206 $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
207 if ( isset( $row->rev_parent_id ) && $row->rev_parent_id ) {
208 $out .= " " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n";
209 }
210
211 $out .= $this->writeTimestamp( $row->rev_timestamp );
212
213 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) {
214 $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
215 } else {
216 $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
217 }
218
219 if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) {
220 $out .= " <minor/>\n";
221 }
222 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) {
223 $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
224 } else {
225 $comment = CommentStore::getStore()->getComment( 'rev_comment', $row )->text;
226 if ( $comment != '' ) {
227 $out .= " " . Xml::elementClean( 'comment', [], strval( $comment ) ) . "\n";
228 }
229 }
230
231 if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
232 $content_model = strval( $row->rev_content_model );
233 } else {
234 // probably using $wgContentHandlerUseDB = false;
235 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
236 $content_model = ContentHandler::getDefaultModelFor( $title );
237 }
238
239 $content_handler = ContentHandler::getForModelID( $content_model );
240
241 if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
242 $content_format = strval( $row->rev_content_format );
243 } else {
244 // probably using $wgContentHandlerUseDB = false;
245 $content_format = $content_handler->getDefaultFormat();
246 }
247
248 $out .= " " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
249 $out .= " " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
250
251 $text = '';
252 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
253 $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
254 } elseif ( isset( $row->old_text ) ) {
255 // Raw text from the database may have invalid chars
256 $text = strval( Revision::getRevisionText( $row ) );
257 $text = $content_handler->exportTransform( $text, $content_format );
258 $out .= " " . Xml::elementClean( 'text',
259 [ 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ],
260 strval( $text ) ) . "\n";
261 } else {
262 // Stub output
263 $out .= " " . Xml::element( 'text',
264 [ 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ],
265 "" ) . "\n";
266 }
267
268 if ( isset( $row->rev_sha1 )
269 && $row->rev_sha1
270 && !( $row->rev_deleted & Revision::DELETED_TEXT )
271 ) {
272 $out .= " " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n";
273 } else {
274 $out .= " <sha1/>\n";
275 }
276
277 // Avoid PHP 7.1 warning from passing $this by reference
278 $writer = $this;
279 Hooks::run( 'XmlDumpWriterWriteRevision', [ &$writer, &$out, $row, $text ] );
280
281 $out .= " </revision>\n";
282
283 return $out;
284 }
285
294 function writeLogItem( $row ) {
295 $out = " <logitem>\n";
296 $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
297
298 $out .= $this->writeTimestamp( $row->log_timestamp, " " );
299
300 if ( $row->log_deleted & LogPage::DELETED_USER ) {
301 $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
302 } else {
303 $out .= $this->writeContributor( $row->log_user, $row->user_name, " " );
304 }
305
306 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
307 $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
308 } else {
309 $comment = CommentStore::getStore()->getComment( 'log_comment', $row )->text;
310 if ( $comment != '' ) {
311 $out .= " " . Xml::elementClean( 'comment', null, strval( $comment ) ) . "\n";
312 }
313 }
314
315 $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
316 $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
317
318 if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
319 $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
320 } else {
321 $title = Title::makeTitle( $row->log_namespace, $row->log_title );
322 $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
323 $out .= " " . Xml::elementClean( 'params',
324 [ 'xml:space' => 'preserve' ],
325 strval( $row->log_params ) ) . "\n";
326 }
327
328 $out .= " </logitem>\n";
329
330 return $out;
331 }
332
338 function writeTimestamp( $timestamp, $indent = " " ) {
339 $ts = wfTimestamp( TS_ISO_8601, $timestamp );
340 return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
341 }
342
349 function writeContributor( $id, $text, $indent = " " ) {
350 $out = $indent . "<contributor>\n";
351 if ( $id || !IP::isValid( $text ) ) {
352 $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
353 $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n";
354 } else {
355 $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
356 }
357 $out .= $indent . "</contributor>\n";
358 return $out;
359 }
360
367 function writeUploads( $row, $dumpContents = false ) {
368 if ( $row->page_namespace == NS_FILE ) {
369 $img = wfLocalFile( $row->page_title );
370 if ( $img && $img->exists() ) {
371 $out = '';
372 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
373 $out .= $this->writeUpload( $ver, $dumpContents );
374 }
375 $out .= $this->writeUpload( $img, $dumpContents );
376 return $out;
377 }
378 }
379 return '';
380 }
381
387 function writeUpload( $file, $dumpContents = false ) {
388 if ( $file->isOld() ) {
389 $archiveName = " " .
390 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
391 } else {
392 $archiveName = '';
393 }
394 if ( $dumpContents ) {
395 $be = $file->getRepo()->getBackend();
396 # Dump file as base64
397 # Uses only XML-safe characters, so does not need escaping
398 # @todo Too bad this loads the contents into memory (script might swap)
399 $contents = ' <contents encoding="base64">' .
400 chunk_split( base64_encode(
401 $be->getFileContents( [ 'src' => $file->getPath() ] ) ) ) .
402 " </contents>\n";
403 } else {
404 $contents = '';
405 }
406 if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
407 $comment = Xml::element( 'comment', [ 'deleted' => 'deleted' ] );
408 } else {
409 $comment = Xml::elementClean( 'comment', null, strval( $file->getDescription() ) );
410 }
411 return " <upload>\n" .
412 $this->writeTimestamp( $file->getTimestamp() ) .
413 $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
414 " " . $comment . "\n" .
415 " " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
416 $archiveName .
417 " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" .
418 " " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
419 " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
420 " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
421 $contents .
422 " </upload>\n";
423 }
424
435 public static function canonicalTitle( Title $title ) {
436 if ( $title->isExternal() ) {
437 return $title->getPrefixedText();
438 }
439
440 $prefix = MediaWikiServices::getInstance()->getContentLanguage()->
441 getFormattedNsText( $title->getNamespace() );
442
443 // @todo Emit some kind of warning to the user if $title->getNamespace() !==
444 // NS_MAIN and $prefix === '' (viz. pages in an unregistered namespace)
445
446 if ( $prefix !== '' ) {
447 $prefix .= ':';
448 }
449
450 return $prefix . $title->getText();
451 }
452}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
$wgCapitalLinks
Set this to false to avoid forcing the first letter of links to capitals.
$wgSitename
Name of the site.
$wgVersion
MediaWiki version number.
wfLocalFile( $title)
Get an object referring to a locally registered file.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
const DELETED_COMMENT
Definition File.php:54
const DELETED_USER
Definition LogPage.php:36
const DELETED_COMMENT
Definition LogPage.php:35
const DELETED_ACTION
Definition LogPage.php:34
MediaWikiServices is the service locator for the application scope of MediaWiki.
static getRevisionText( $row, $prefix='old_', $wiki=false)
Get revision text associated with an old or archive row.
const DELETED_USER
Definition Revision.php:49
const DELETED_TEXT
Definition Revision.php:47
const DELETED_COMMENT
Definition Revision.php:48
Represents a title within MediaWiki.
Definition Title.php:39
isValidRedirectTarget()
Check if this Title is a valid redirect target.
Definition Title.php:5018
closeStream()
Closes the output stream with the closing root element.
static canonicalTitle(Title $title)
Return prefixed text form of title, but using the content language's canonical namespace.
writeUpload( $file, $dumpContents=false)
writeLogItem( $row)
Dumps a "<logitem>" section on the output stream, with data filled in from the given database row.
writeTimestamp( $timestamp, $indent=" ")
writeUploads( $row, $dumpContents=false)
Warning! This data is potentially inconsistent.
closePage()
Closes a "<page>" section on the output stream.
openStream()
Opens the XML output stream's root "<mediawiki>" element.
openPage( $row)
Opens a "<page>" section on the output stream, with data from the given database row.
writeRevision( $row)
Dumps a "<revision>" section on the output stream, with data filled in from the given database row.
writeContributor( $id, $text, $indent=" ")
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
const NS_FILE
Definition Defines.php:70
namespace and then decline to actually register it file or subcat img or subcat $title
Definition hooks.txt:994
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition hooks.txt:894
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
controlled by the following MediaWiki still creates a BagOStuff but calls it to it are no ops If the cache daemon can t be it should also disable itself fairly $wgDBname