MediaWiki REL1_28
XmlDumpWriter.php
Go to the documentation of this file.
1<?php
40 function openStream() {
43 return Xml::element( 'mediawiki', [
44 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/",
45 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
46 /*
47 * When a new version of the schema is created, it needs staging on mediawiki.org.
48 * This requires a change in the operations/mediawiki-config git repo.
49 *
50 * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which
51 * you copy in the new xsd file.
52 *
53 * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging.
54 * echo "http://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki
55 */
56 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
57 "http://www.mediawiki.org/xml/export-$ver.xsd",
58 'version' => $ver,
59 'xml:lang' => $wgLanguageCode ],
60 null ) .
61 "\n" .
62 $this->siteInfo();
63 }
64
68 function siteInfo() {
69 $info = [
70 $this->sitename(),
71 $this->dbname(),
72 $this->homelink(),
73 $this->generator(),
74 $this->caseSetting(),
75 $this->namespaces() ];
76 return " <siteinfo>\n " .
77 implode( "\n ", $info ) .
78 "\n </siteinfo>\n";
79 }
80
84 function sitename() {
86 return Xml::element( 'sitename', [], $wgSitename );
87 }
88
92 function dbname() {
94 return Xml::element( 'dbname', [], $wgDBname );
95 }
96
100 function generator() {
102 return Xml::element( 'generator', [], "MediaWiki $wgVersion" );
103 }
104
108 function homelink() {
109 return Xml::element( 'base', [], Title::newMainPage()->getCanonicalURL() );
110 }
111
115 function caseSetting() {
117 // "case-insensitive" option is reserved for future
118 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
119 return Xml::element( 'case', [], $sensitivity );
120 }
121
125 function namespaces() {
127 $spaces = "<namespaces>\n";
128 foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
129 $spaces .= ' ' .
130 Xml::element( 'namespace',
131 [
132 'key' => $ns,
133 'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
134 ], $title ) . "\n";
135 }
136 $spaces .= " </namespaces>";
137 return $spaces;
138 }
139
146 function closeStream() {
147 return "</mediawiki>\n";
148 }
149
157 public function openPage( $row ) {
158 $out = " <page>\n";
159 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
160 $out .= ' ' . Xml::elementClean( 'title', [], self::canonicalTitle( $title ) ) . "\n";
161 $out .= ' ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n";
162 $out .= ' ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n";
163 if ( $row->page_is_redirect ) {
165 $redirect = $page->getRedirectTarget();
166 if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) {
167 $out .= ' ';
168 $out .= Xml::element( 'redirect', [ 'title' => self::canonicalTitle( $redirect ) ] );
169 $out .= "\n";
170 }
171 }
172
173 if ( $row->page_restrictions != '' ) {
174 $out .= ' ' . Xml::element( 'restrictions', [],
175 strval( $row->page_restrictions ) ) . "\n";
176 }
177
178 Hooks::run( 'XmlDumpWriterOpenPage', [ $this, &$out, $row, $title ] );
179
180 return $out;
181 }
182
189 function closePage() {
190 return " </page>\n";
191 }
192
201 function writeRevision( $row ) {
202
203 $out = " <revision>\n";
204 $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
205 if ( isset( $row->rev_parent_id ) && $row->rev_parent_id ) {
206 $out .= " " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n";
207 }
208
209 $out .= $this->writeTimestamp( $row->rev_timestamp );
210
211 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) {
212 $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
213 } else {
214 $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
215 }
216
217 if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) {
218 $out .= " <minor/>\n";
219 }
220 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) {
221 $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
222 } elseif ( $row->rev_comment != '' ) {
223 $out .= " " . Xml::elementClean( 'comment', [], strval( $row->rev_comment ) ) . "\n";
224 }
225
226 if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
227 $content_model = strval( $row->rev_content_model );
228 } else {
229 // probably using $wgContentHandlerUseDB = false;
230 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
231 $content_model = ContentHandler::getDefaultModelFor( $title );
232 }
233
234 $content_handler = ContentHandler::getForModelID( $content_model );
235
236 if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
237 $content_format = strval( $row->rev_content_format );
238 } else {
239 // probably using $wgContentHandlerUseDB = false;
240 $content_format = $content_handler->getDefaultFormat();
241 }
242
243 $out .= " " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
244 $out .= " " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
245
246 $text = '';
247 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
248 $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
249 } elseif ( isset( $row->old_text ) ) {
250 // Raw text from the database may have invalid chars
251 $text = strval( Revision::getRevisionText( $row ) );
252 $text = $content_handler->exportTransform( $text, $content_format );
253 $out .= " " . Xml::elementClean( 'text',
254 [ 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ],
255 strval( $text ) ) . "\n";
256 } else {
257 // Stub output
258 $out .= " " . Xml::element( 'text',
259 [ 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ],
260 "" ) . "\n";
261 }
262
263 if ( isset( $row->rev_sha1 )
264 && $row->rev_sha1
265 && !( $row->rev_deleted & Revision::DELETED_TEXT )
266 ) {
267 $out .= " " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n";
268 } else {
269 $out .= " <sha1/>\n";
270 }
271
272 // Avoid PHP 7.1 warning from passing $this by reference
273 $writer = $this;
274 Hooks::run( 'XmlDumpWriterWriteRevision', [ &$writer, &$out, $row, $text ] );
275
276 $out .= " </revision>\n";
277
278 return $out;
279 }
280
289 function writeLogItem( $row ) {
290
291 $out = " <logitem>\n";
292 $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
293
294 $out .= $this->writeTimestamp( $row->log_timestamp, " " );
295
296 if ( $row->log_deleted & LogPage::DELETED_USER ) {
297 $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
298 } else {
299 $out .= $this->writeContributor( $row->log_user, $row->user_name, " " );
300 }
301
302 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
303 $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
304 } elseif ( $row->log_comment != '' ) {
305 $out .= " " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n";
306 }
307
308 $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
309 $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
310
311 if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
312 $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
313 } else {
314 $title = Title::makeTitle( $row->log_namespace, $row->log_title );
315 $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
316 $out .= " " . Xml::elementClean( 'params',
317 [ 'xml:space' => 'preserve' ],
318 strval( $row->log_params ) ) . "\n";
319 }
320
321 $out .= " </logitem>\n";
322
323 return $out;
324 }
325
331 function writeTimestamp( $timestamp, $indent = " " ) {
333 return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
334 }
335
342 function writeContributor( $id, $text, $indent = " " ) {
343 $out = $indent . "<contributor>\n";
344 if ( $id || !IP::isValid( $text ) ) {
345 $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
346 $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n";
347 } else {
348 $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
349 }
350 $out .= $indent . "</contributor>\n";
351 return $out;
352 }
353
360 function writeUploads( $row, $dumpContents = false ) {
361 if ( $row->page_namespace == NS_FILE ) {
362 $img = wfLocalFile( $row->page_title );
363 if ( $img && $img->exists() ) {
364 $out = '';
365 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
366 $out .= $this->writeUpload( $ver, $dumpContents );
367 }
368 $out .= $this->writeUpload( $img, $dumpContents );
369 return $out;
370 }
371 }
372 return '';
373 }
374
380 function writeUpload( $file, $dumpContents = false ) {
381 if ( $file->isOld() ) {
382 $archiveName = " " .
383 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
384 } else {
385 $archiveName = '';
386 }
387 if ( $dumpContents ) {
388 $be = $file->getRepo()->getBackend();
389 # Dump file as base64
390 # Uses only XML-safe characters, so does not need escaping
391 # @todo Too bad this loads the contents into memory (script might swap)
392 $contents = ' <contents encoding="base64">' .
393 chunk_split( base64_encode(
394 $be->getFileContents( [ 'src' => $file->getPath() ] ) ) ) .
395 " </contents>\n";
396 } else {
397 $contents = '';
398 }
399 if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
400 $comment = Xml::element( 'comment', [ 'deleted' => 'deleted' ] );
401 } else {
402 $comment = Xml::elementClean( 'comment', null, $file->getDescription() );
403 }
404 return " <upload>\n" .
405 $this->writeTimestamp( $file->getTimestamp() ) .
406 $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
407 " " . $comment . "\n" .
408 " " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
409 $archiveName .
410 " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" .
411 " " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
412 " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
413 " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
414 $contents .
415 " </upload>\n";
416 }
417
428 public static function canonicalTitle( Title $title ) {
429 if ( $title->isExternal() ) {
430 return $title->getPrefixedText();
431 }
432
434 $prefix = $wgContLang->getFormattedNsText( $title->getNamespace() );
435
436 if ( $prefix !== '' ) {
437 $prefix .= ':';
438 }
439
440 return $prefix . $title->getText();
441 }
442}
$wgLanguageCode
Site language code.
$wgCapitalLinks
Set this to false to avoid forcing the first letter of links to capitals.
$wgSitename
Name of the site.
$wgVersion
MediaWiki version number.
wfLocalFile( $title)
Get an object referring to a locally registered file.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
static getForModelID( $modelId)
Returns the ContentHandler singleton for the given model ID.
static getDefaultModelFor(Title $title)
Returns the name of the default content model to be used for the page with the given title.
const DELETED_COMMENT
Definition File.php:53
static isValid( $ip)
Validate an IP address.
Definition IP.php:113
const DELETED_USER
Definition LogPage.php:35
const DELETED_COMMENT
Definition LogPage.php:34
const DELETED_ACTION
Definition LogPage.php:33
static getRevisionText( $row, $prefix='old_', $wiki=false)
Get revision text associated with an old or archive row $row is usually an object from wfFetchRow(),...
const DELETED_USER
Definition Revision.php:87
const DELETED_TEXT
Definition Revision.php:85
const DELETED_COMMENT
Definition Revision.php:86
Represents a title within MediaWiki.
Definition Title.php:36
isValidRedirectTarget()
Check if this Title is a valid redirect target.
Definition Title.php:4600
static schemaVersion()
Returns the export schema version.
static factory(Title $title)
Create a WikiPage object of the appropriate class for the given title.
Definition WikiPage.php:115
closeStream()
Closes the output stream with the closing root element.
static canonicalTitle(Title $title)
Return prefixed text form of title, but using the content language's canonical namespace.
writeUpload( $file, $dumpContents=false)
writeLogItem( $row)
Dumps a "<logitem>" section on the output stream, with data filled in from the given database row.
writeTimestamp( $timestamp, $indent=" ")
writeUploads( $row, $dumpContents=false)
Warning! This data is potentially inconsistent.
closePage()
Closes a "<page>" section on the output stream.
openStream()
Opens the XML output stream's root "<mediawiki>" element.
openPage( $row)
Opens a "<page>" section on the output stream, with data from the given database row.
writeRevision( $row)
Dumps a "<revision>" section on the output stream, with data filled in from the given database row.
writeContributor( $id, $text, $indent=" ")
static element( $element, $attribs=null, $contents='', $allowShortTag=true)
Format an XML element with given attributes and, optionally, text content.
Definition Xml.php:39
static elementClean( $element, $attribs=[], $contents='')
Format an XML element as with self::element(), but run text through the $wgContLang->normalize() vali...
Definition Xml.php:91
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition design.txt:57
when a variable name is used in a it is silently declared as a new local masking the global
Definition design.txt:95
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
const NS_FILE
Definition Defines.php:62
namespace and then decline to actually register it file or subcat img or subcat $title
Definition hooks.txt:986
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition hooks.txt:886
namespace are movable Hooks may change this value to override the return value of MWNamespace::isMovable(). 'NewDifferenceEngine' do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached $page
Definition hooks.txt:2534
$comment
if( $limit) $timestamp
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname
const TS_ISO_8601
ISO 8601 format with no timezone: 1986-02-09T20:00:00Z.
Definition defines.php:28