MediaWiki REL1_31
XmlDumpWriter.php
Go to the documentation of this file.
1<?php
40 function openStream() {
41 global $wgContLang;
42 $ver = WikiExporter::schemaVersion();
43 return Xml::element( 'mediawiki', [
44 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/",
45 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
46 /*
47 * When a new version of the schema is created, it needs staging on mediawiki.org.
48 * This requires a change in the operations/mediawiki-config git repo.
49 *
50 * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which
51 * you copy in the new xsd file.
52 *
53 * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging.
54 * echo "https://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki
55 */
56 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
57 "http://www.mediawiki.org/xml/export-$ver.xsd",
58 'version' => $ver,
59 'xml:lang' => $wgContLang->getHtmlCode() ],
60 null ) .
61 "\n" .
62 $this->siteInfo();
63 }
64
68 function siteInfo() {
69 $info = [
70 $this->sitename(),
71 $this->dbname(),
72 $this->homelink(),
73 $this->generator(),
74 $this->caseSetting(),
75 $this->namespaces() ];
76 return " <siteinfo>\n " .
77 implode( "\n ", $info ) .
78 "\n </siteinfo>\n";
79 }
80
84 function sitename() {
85 global $wgSitename;
86 return Xml::element( 'sitename', [], $wgSitename );
87 }
88
92 function dbname() {
93 global $wgDBname;
94 return Xml::element( 'dbname', [], $wgDBname );
95 }
96
100 function generator() {
101 global $wgVersion;
102 return Xml::element( 'generator', [], "MediaWiki $wgVersion" );
103 }
104
108 function homelink() {
109 return Xml::element( 'base', [], Title::newMainPage()->getCanonicalURL() );
110 }
111
115 function caseSetting() {
116 global $wgCapitalLinks;
117 // "case-insensitive" option is reserved for future
118 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
119 return Xml::element( 'case', [], $sensitivity );
120 }
121
125 function namespaces() {
126 global $wgContLang;
127 $spaces = "<namespaces>\n";
128 foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
129 $spaces .= ' ' .
130 Xml::element( 'namespace',
131 [
132 'key' => $ns,
133 'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
134 ], $title ) . "\n";
135 }
136 $spaces .= " </namespaces>";
137 return $spaces;
138 }
139
146 function closeStream() {
147 return "</mediawiki>\n";
148 }
149
157 public function openPage( $row ) {
158 $out = " <page>\n";
159 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
160 $out .= ' ' . Xml::elementClean( 'title', [], self::canonicalTitle( $title ) ) . "\n";
161 $out .= ' ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n";
162 $out .= ' ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n";
163 if ( $row->page_is_redirect ) {
164 $page = WikiPage::factory( $title );
165 $redirect = $page->getRedirectTarget();
166 if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) {
167 $out .= ' ';
168 $out .= Xml::element( 'redirect', [ 'title' => self::canonicalTitle( $redirect ) ] );
169 $out .= "\n";
170 }
171 }
172
173 if ( $row->page_restrictions != '' ) {
174 $out .= ' ' . Xml::element( 'restrictions', [],
175 strval( $row->page_restrictions ) ) . "\n";
176 }
177
178 Hooks::run( 'XmlDumpWriterOpenPage', [ $this, &$out, $row, $title ] );
179
180 return $out;
181 }
182
189 function closePage() {
190 return " </page>\n";
191 }
192
201 function writeRevision( $row ) {
202 $out = " <revision>\n";
203 $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
204 if ( isset( $row->rev_parent_id ) && $row->rev_parent_id ) {
205 $out .= " " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n";
206 }
207
208 $out .= $this->writeTimestamp( $row->rev_timestamp );
209
210 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) {
211 $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
212 } else {
213 $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
214 }
215
216 if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) {
217 $out .= " <minor/>\n";
218 }
219 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) {
220 $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
221 } else {
222 $comment = CommentStore::getStore()->getComment( 'rev_comment', $row )->text;
223 if ( $comment != '' ) {
224 $out .= " " . Xml::elementClean( 'comment', [], strval( $comment ) ) . "\n";
225 }
226 }
227
228 if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
229 $content_model = strval( $row->rev_content_model );
230 } else {
231 // probably using $wgContentHandlerUseDB = false;
232 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
233 $content_model = ContentHandler::getDefaultModelFor( $title );
234 }
235
236 $content_handler = ContentHandler::getForModelID( $content_model );
237
238 if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
239 $content_format = strval( $row->rev_content_format );
240 } else {
241 // probably using $wgContentHandlerUseDB = false;
242 $content_format = $content_handler->getDefaultFormat();
243 }
244
245 $out .= " " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
246 $out .= " " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
247
248 $text = '';
249 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
250 $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
251 } elseif ( isset( $row->old_text ) ) {
252 // Raw text from the database may have invalid chars
253 $text = strval( Revision::getRevisionText( $row ) );
254 $text = $content_handler->exportTransform( $text, $content_format );
255 $out .= " " . Xml::elementClean( 'text',
256 [ 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ],
257 strval( $text ) ) . "\n";
258 } else {
259 // Stub output
260 $out .= " " . Xml::element( 'text',
261 [ 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ],
262 "" ) . "\n";
263 }
264
265 if ( isset( $row->rev_sha1 )
266 && $row->rev_sha1
267 && !( $row->rev_deleted & Revision::DELETED_TEXT )
268 ) {
269 $out .= " " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n";
270 } else {
271 $out .= " <sha1/>\n";
272 }
273
274 // Avoid PHP 7.1 warning from passing $this by reference
275 $writer = $this;
276 Hooks::run( 'XmlDumpWriterWriteRevision', [ &$writer, &$out, $row, $text ] );
277
278 $out .= " </revision>\n";
279
280 return $out;
281 }
282
291 function writeLogItem( $row ) {
292 $out = " <logitem>\n";
293 $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
294
295 $out .= $this->writeTimestamp( $row->log_timestamp, " " );
296
297 if ( $row->log_deleted & LogPage::DELETED_USER ) {
298 $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
299 } else {
300 $out .= $this->writeContributor( $row->log_user, $row->user_name, " " );
301 }
302
303 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
304 $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
305 } else {
306 $comment = CommentStore::getStore()->getComment( 'log_comment', $row )->text;
307 if ( $comment != '' ) {
308 $out .= " " . Xml::elementClean( 'comment', null, strval( $comment ) ) . "\n";
309 }
310 }
311
312 $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
313 $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
314
315 if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
316 $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
317 } else {
318 $title = Title::makeTitle( $row->log_namespace, $row->log_title );
319 $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
320 $out .= " " . Xml::elementClean( 'params',
321 [ 'xml:space' => 'preserve' ],
322 strval( $row->log_params ) ) . "\n";
323 }
324
325 $out .= " </logitem>\n";
326
327 return $out;
328 }
329
335 function writeTimestamp( $timestamp, $indent = " " ) {
336 $ts = wfTimestamp( TS_ISO_8601, $timestamp );
337 return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
338 }
339
346 function writeContributor( $id, $text, $indent = " " ) {
347 $out = $indent . "<contributor>\n";
348 if ( $id || !IP::isValid( $text ) ) {
349 $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
350 $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n";
351 } else {
352 $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
353 }
354 $out .= $indent . "</contributor>\n";
355 return $out;
356 }
357
364 function writeUploads( $row, $dumpContents = false ) {
365 if ( $row->page_namespace == NS_FILE ) {
366 $img = wfLocalFile( $row->page_title );
367 if ( $img && $img->exists() ) {
368 $out = '';
369 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
370 $out .= $this->writeUpload( $ver, $dumpContents );
371 }
372 $out .= $this->writeUpload( $img, $dumpContents );
373 return $out;
374 }
375 }
376 return '';
377 }
378
384 function writeUpload( $file, $dumpContents = false ) {
385 if ( $file->isOld() ) {
386 $archiveName = " " .
387 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
388 } else {
389 $archiveName = '';
390 }
391 if ( $dumpContents ) {
392 $be = $file->getRepo()->getBackend();
393 # Dump file as base64
394 # Uses only XML-safe characters, so does not need escaping
395 # @todo Too bad this loads the contents into memory (script might swap)
396 $contents = ' <contents encoding="base64">' .
397 chunk_split( base64_encode(
398 $be->getFileContents( [ 'src' => $file->getPath() ] ) ) ) .
399 " </contents>\n";
400 } else {
401 $contents = '';
402 }
403 if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
404 $comment = Xml::element( 'comment', [ 'deleted' => 'deleted' ] );
405 } else {
406 $comment = Xml::elementClean( 'comment', null, strval( $file->getDescription() ) );
407 }
408 return " <upload>\n" .
409 $this->writeTimestamp( $file->getTimestamp() ) .
410 $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
411 " " . $comment . "\n" .
412 " " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
413 $archiveName .
414 " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" .
415 " " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
416 " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
417 " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
418 $contents .
419 " </upload>\n";
420 }
421
432 public static function canonicalTitle( Title $title ) {
433 if ( $title->isExternal() ) {
434 return $title->getPrefixedText();
435 }
436
437 global $wgContLang;
438 $prefix = $wgContLang->getFormattedNsText( $title->getNamespace() );
439
440 // @todo Emit some kind of warning to the user if $title->getNamespace() !==
441 // NS_MAIN and $prefix === '' (viz. pages in an unregistered namespace)
442
443 if ( $prefix !== '' ) {
444 $prefix .= ':';
445 }
446
447 return $prefix . $title->getText();
448 }
449}
$wgCapitalLinks
Set this to false to avoid forcing the first letter of links to capitals.
$wgSitename
Name of the site.
$wgVersion
MediaWiki version number.
wfLocalFile( $title)
Get an object referring to a locally registered file.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
const DELETED_COMMENT
Definition File.php:54
const DELETED_USER
Definition LogPage.php:34
const DELETED_COMMENT
Definition LogPage.php:33
const DELETED_ACTION
Definition LogPage.php:32
Represents a title within MediaWiki.
Definition Title.php:39
isValidRedirectTarget()
Check if this Title is a valid redirect target.
Definition Title.php:4863
closeStream()
Closes the output stream with the closing root element.
static canonicalTitle(Title $title)
Return prefixed text form of title, but using the content language's canonical namespace.
writeUpload( $file, $dumpContents=false)
writeLogItem( $row)
Dumps a "<logitem>" section on the output stream, with data filled in from the given database row.
writeTimestamp( $timestamp, $indent=" ")
writeUploads( $row, $dumpContents=false)
Warning! This data is potentially inconsistent.
closePage()
Closes a "<page>" section on the output stream.
openStream()
Opens the XML output stream's root "<mediawiki>" element.
openPage( $row)
Opens a "<page>" section on the output stream, with data from the given database row.
writeRevision( $row)
Dumps a "<revision>" section on the output stream, with data filled in from the given database row.
writeContributor( $id, $text, $indent=" ")
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition design.txt:57
namespace and then decline to actually register it file or subcat img or subcat $title
Definition hooks.txt:964
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition hooks.txt:864
const NS_FILE
Definition Defines.php:80
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname