62 $this->infiles = explode(
';', $infile );
63 $this->reader =
new XMLReader();
64 $infile = array_shift( $this->infiles );
65 $this->reader->open( $infile,
null, LIBXML_PARSEHUGE );
78 public function prefetch( $page, $rev, $slot = SlotRecord::MAIN ) {
79 $page = intval( $page );
80 $rev = intval( $rev );
81 while ( $this->lastPage < $page && !$this->atEnd ) {
82 $this->
debug(
"BaseDump::prefetch at page $this->lastPage, looking for $page" );
85 if ( $this->lastPage > $page || $this->atEnd ) {
86 $this->
debug(
"BaseDump::prefetch already past page $page "
87 .
"looking for rev $rev [$this->lastPage, $this->lastRev]" );
91 while ( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
92 $this->
debug(
"BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, "
93 .
"looking for $page, $rev" );
96 if ( $this->lastRev == $rev && !$this->atEnd ) {
97 $this->
debug(
"BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" );
99 if ( $slot !== SlotRecord::MAIN ) {
100 $lastSlot = SlotRecord::MAIN;
101 while ( $lastSlot !== $slot ) {
102 if ( !$this->
skipTo(
'content',
'revision' ) ) {
105 if ( !$this->
skipTo(
'role',
'revision' ) ) {
114 $this->
debug(
"BaseDump::prefetch already past rev $rev on page $page "
115 .
"[$this->lastPage, $this->lastRev]" );
131 if ( $this->
skipTo(
'page',
'mediawiki' ) ) {
132 if ( $this->
skipTo(
'id' ) ) {
135 $this->atPageEnd =
false;
139 if ( count( $this->infiles ) ) {
140 $infile = array_shift( $this->infiles );
141 $this->reader->open( $infile,
null, LIBXML_PARSEHUGE );
142 $this->atEnd =
false;
148 if ( $this->
skipTo(
'revision' ) ) {
149 if ( $this->
skipTo(
'id' ) ) {
153 $this->atPageEnd =
true;
161 if ( !$this->
skipTo(
'text',
'revision' ) ) {
173 private function skipTo( $name, $parent =
'page' ) {
174 if ( $this->atEnd ) {
177 while ( $this->reader->read() ) {
178 if ( $this->reader->nodeType == XMLReader::ELEMENT
179 && $this->reader->name == $name
183 if ( $this->reader->nodeType == XMLReader::END_ELEMENT
184 && $this->reader->name == $parent
186 $this->
debug(
"BaseDump::skipTo found </$parent> searching for <$name>" );
192 return $this->
close();
203 if ( $this->atEnd ) {
206 if ( $this->reader->isEmptyElement ) {
210 while ( $this->reader->read() ) {
211 switch ( $this->reader->nodeType ) {
212 case XMLReader::TEXT:
214 case XMLReader::SIGNIFICANT_WHITESPACE:
215 $buffer .= $this->reader->value;
217 case XMLReader::END_ELEMENT:
222 return $this->
close();
229 $this->reader->close();
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Readahead helper for making large MediaWiki data dumps; reads in a previous XML dump to sequentially ...
nodeContents()
Shouldn't something like this be built-in to XMLReader? Fetches text contents of the current element,...
skipTo( $name, $parent='page')
prefetch( $page, $rev, $slot=SlotRecord::MAIN)
Attempts to fetch the text of a particular page revision from the dump stream.