MediaWiki master
BaseDump.php
Go to the documentation of this file.
1<?php
14
16use XMLReader;
17
33class BaseDump {
35 protected $reader = null;
37 protected $atEnd = false;
39 protected $atPageEnd = false;
41 protected $lastPage = 0;
43 protected $lastRev = 0;
45 protected $infiles = null;
46
50 public function __construct( $infile ) {
51 $this->infiles = explode( ';', $infile );
52 $this->reader = new XMLReader();
53 $infile = array_shift( $this->infiles );
54 if ( !$this->reader->open( $infile, null, LIBXML_PARSEHUGE ) ) {
55 $this->debug( __METHOD__ . ' was unable to open xml' );
56 $this->atEnd = true;
57 }
58 }
59
70 public function prefetch( $page, $rev, $slot = SlotRecord::MAIN ) {
71 $page = intval( $page );
72 $rev = intval( $rev );
73 while ( $this->lastPage < $page && !$this->atEnd ) {
74 $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" );
75 $this->nextPage();
76 }
77 if ( $this->lastPage > $page || $this->atEnd ) {
78 $this->debug( "BaseDump::prefetch already past page $page or failed to open/read input file, "
79 . "looking for rev $rev [$this->lastPage, $this->lastRev]" );
80
81 return null;
82 }
83 while ( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
84 $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, "
85 . "looking for $page, $rev" );
86 $this->nextRev();
87 }
88 if ( $this->lastRev == $rev && !$this->atEnd ) {
89 $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" );
90
91 if ( $slot !== SlotRecord::MAIN ) {
92 $lastSlot = SlotRecord::MAIN;
93 while ( $lastSlot !== $slot ) {
94 if ( !$this->skipTo( 'content', 'revision' ) ||
95 !$this->skipTo( 'role', 'revision' )
96 ) {
97 return null;
98 }
99 $lastSlot = $this->nodeContents();
100 }
101 }
102
103 return $this->nextText();
104 } else {
105 $this->debug( "BaseDump::prefetch already past rev $rev on page $page "
106 . "[$this->lastPage, $this->lastRev]" );
107
108 return null;
109 }
110 }
111
115 protected function debug( $str ) {
116 wfDebug( $str );
117 // global $dumper;
118 // $dumper->progress( $str );
119 }
120
121 private function nextPage() {
122 if ( $this->skipTo( 'page', 'mediawiki' ) ) {
123 if ( $this->skipTo( 'id' ) ) {
124 $this->lastPage = intval( $this->nodeContents() );
125 $this->lastRev = 0;
126 $this->atPageEnd = false;
127 }
128 } else {
129 $this->close();
130 if ( count( $this->infiles ) ) {
131 $infile = array_shift( $this->infiles );
132 if ( !$this->reader->open( $infile, null, LIBXML_PARSEHUGE ) ) {
133 $this->debug( __METHOD__ . ' was unable to open xml' );
134 $this->atEnd = true;
135 } else {
136 $this->atEnd = false;
137 }
138 }
139 }
140 }
141
142 private function nextRev() {
143 if ( $this->skipTo( 'revision' ) ) {
144 if ( $this->skipTo( 'id' ) ) {
145 $this->lastRev = intval( $this->nodeContents() );
146 }
147 } else {
148 $this->atPageEnd = true;
149 }
150 }
151
155 private function nextText() {
156 if ( !$this->skipTo( 'text', 'revision' ) ) {
157 return null;
158 }
159
160 return strval( $this->nodeContents() );
161 }
162
168 private function skipTo( $name, $parent = 'page' ) {
169 if ( $this->atEnd ) {
170 return false;
171 }
172 while ( $this->reader->read() ) {
173 if ( $this->reader->nodeType == XMLReader::ELEMENT
174 && $this->reader->name == $name
175 ) {
176 return true;
177 }
178 if ( $this->reader->nodeType == XMLReader::END_ELEMENT
179 && $this->reader->name == $parent
180 ) {
181 $this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" );
182
183 return false;
184 }
185 }
186
187 return $this->close();
188 }
189
197 private function nodeContents() {
198 if ( $this->atEnd ) {
199 return null;
200 }
201 if ( $this->reader->isEmptyElement ) {
202 return "";
203 }
204 $buffer = "";
205 while ( $this->reader->read() ) {
206 switch ( $this->reader->nodeType ) {
207 case XMLReader::TEXT:
208 // case XMLReader::WHITESPACE:
209 case XMLReader::SIGNIFICANT_WHITESPACE:
210 $buffer .= $this->reader->value;
211 break;
212 case XMLReader::END_ELEMENT:
213 return $buffer;
214 }
215 }
216
217 return $this->close();
218 }
219
223 public function close() {
224 $this->reader->close();
225 $this->atEnd = true;
226
227 return null;
228 }
229}
230
232class_alias( BaseDump::class, 'BaseDump' );
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Readahead helper for making large MediaWiki data dumps; reads in a previous XML dump to sequentially ...
Definition BaseDump.php:33
prefetch( $page, $rev, $slot=SlotRecord::MAIN)
Attempts to fetch the text of a particular page revision from the dump stream.
Definition BaseDump.php:70
XMLReader null $reader
Definition BaseDump.php:35
string[] null $infiles
Definition BaseDump.php:45
Value object representing a content slot associated with a page revision.