MediaWiki  master
SiteImporter.php
Go to the documentation of this file.
1 <?php
21 use Wikimedia\RequestTimeout\TimeoutException;
22 
32 class SiteImporter {
33 
37  private $store;
38 
42  private $exceptionCallback;
43 
47  public function __construct( SiteStore $store ) {
48  $this->store = $store;
49  }
50 
54  public function getExceptionCallback() {
55  return $this->exceptionCallback;
56  }
57 
61  public function setExceptionCallback( $exceptionCallback ) {
62  $this->exceptionCallback = $exceptionCallback;
63  }
64 
68  public function importFromFile( $file ) {
69  $xml = file_get_contents( $file );
70 
71  if ( $xml === false ) {
72  throw new RuntimeException( 'Failed to read ' . $file . '!' );
73  }
74 
75  $this->importFromXML( $xml );
76  }
77 
83  public function importFromXML( $xml ) {
84  $document = new DOMDocument();
85 
86  $oldLibXmlErrors = libxml_use_internal_errors( true );
87  // phpcs:ignore Generic.PHP.NoSilencedErrors -- suppress deprecation per T268847
88  $oldDisable = @libxml_disable_entity_loader( true );
89  $ok = $document->loadXML( $xml, LIBXML_NONET );
90 
91  if ( !$ok ) {
92  $errors = libxml_get_errors();
93  libxml_use_internal_errors( $oldLibXmlErrors );
94  // phpcs:ignore Generic.PHP.NoSilencedErrors
95  @libxml_disable_entity_loader( $oldDisable );
96 
97  foreach ( $errors as $error ) {
99  throw new InvalidArgumentException(
100  'Malformed XML: ' . $error->message . ' in line ' . $error->line
101  );
102  }
103 
104  throw new InvalidArgumentException( 'Malformed XML!' );
105  }
106 
107  libxml_use_internal_errors( $oldLibXmlErrors );
108  // phpcs:ignore Generic.PHP.NoSilencedErrors
109  @libxml_disable_entity_loader( $oldDisable );
110  $this->importFromDOM( $document->documentElement );
111  }
112 
116  private function importFromDOM( DOMElement $root ) {
117  $sites = $this->makeSiteList( $root );
118  $this->store->saveSites( $sites );
119  }
120 
126  private function makeSiteList( DOMElement $root ) {
127  $sites = [];
128 
129  // Old sites, to get the row IDs that correspond to the global site IDs.
130  // TODO: Get rid of internal row IDs, they just get in the way. Get rid of ORMRow, too.
131  $oldSites = $this->store->getSites();
132 
133  $current = $root->firstChild;
134  while ( $current ) {
135  if ( $current instanceof DOMElement && $current->tagName === 'site' ) {
136  try {
137  $site = $this->makeSite( $current );
138  $key = $site->getGlobalId();
139 
140  if ( $oldSites->hasSite( $key ) ) {
141  $oldSite = $oldSites->getSite( $key );
142  $site->setInternalId( $oldSite->getInternalId() );
143  }
144 
145  $sites[$key] = $site;
146  } catch ( TimeoutException $e ) {
147  throw $e;
148  } catch ( Exception $ex ) {
149  $this->handleException( $ex );
150  }
151  }
152 
153  $current = $current->nextSibling;
154  }
155 
156  return $sites;
157  }
158 
165  public function makeSite( DOMElement $siteElement ) {
166  if ( $siteElement->tagName !== 'site' ) {
167  throw new InvalidArgumentException( 'Expected <site> tag, found ' . $siteElement->tagName );
168  }
169 
170  $type = $this->getAttributeValue( $siteElement, 'type', Site::TYPE_UNKNOWN );
171  $site = Site::newForType( $type );
172 
173  $site->setForward( $this->hasChild( $siteElement, 'forward' ) );
174  $site->setGlobalId( $this->getChildText( $siteElement, 'globalid' ) );
175  $site->setGroup( $this->getChildText( $siteElement, 'group', Site::GROUP_NONE ) );
176  $site->setSource( $this->getChildText( $siteElement, 'source', Site::SOURCE_LOCAL ) );
177 
178  $pathTags = $siteElement->getElementsByTagName( 'path' );
179  for ( $i = 0; $i < $pathTags->length; $i++ ) {
180  $pathElement = $pathTags->item( $i );
181  '@phan-var DOMElement $pathElement';
182  $pathType = $this->getAttributeValue( $pathElement, 'type' );
183  $path = $pathElement->textContent;
184 
185  $site->setPath( $pathType, $path );
186  }
187 
188  $idTags = $siteElement->getElementsByTagName( 'localid' );
189  for ( $i = 0; $i < $idTags->length; $i++ ) {
190  $idElement = $idTags->item( $i );
191  '@phan-var DOMElement $idElement';
192  $idType = $this->getAttributeValue( $idElement, 'type' );
193  $id = $idElement->textContent;
194 
195  $site->addLocalId( $idType, $id );
196  }
197 
198  // @todo: import <data>
199  // @todo: import <config>
200 
201  return $site;
202  }
203 
212  private function getAttributeValue( DOMElement $element, $name, $default = false ) {
213  $node = $element->getAttributeNode( $name );
214 
215  if ( !$node ) {
216  if ( $default !== false ) {
217  return $default;
218  } else {
219  throw new MWException(
220  'Required ' . $name . ' attribute not found in <' . $element->tagName . '> tag'
221  );
222  }
223  }
224 
225  return $node->textContent;
226  }
227 
236  private function getChildText( DOMElement $element, $name, $default = false ) {
237  $elements = $element->getElementsByTagName( $name );
238 
239  if ( $elements->length < 1 ) {
240  if ( $default !== false ) {
241  return $default;
242  } else {
243  throw new MWException(
244  'Required <' . $name . '> tag not found inside <' . $element->tagName . '> tag'
245  );
246  }
247  }
248 
249  $node = $elements->item( 0 );
250  return $node->textContent;
251  }
252 
260  private function hasChild( DOMElement $element, $name ) {
261  return $this->getChildText( $element, $name, null ) !== null;
262  }
263 
267  private function handleException( Exception $ex ) {
268  if ( $this->exceptionCallback ) {
269  call_user_func( $this->exceptionCallback, $ex );
270  } else {
271  wfLogWarning( $ex->getMessage() );
272  }
273  }
274 
275 }
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
MediaWiki exception.
Definition: MWException.php:33
Utility for importing site entries from XML.
__construct(SiteStore $store)
importFromFile( $file)
importFromXML( $xml)
setExceptionCallback( $exceptionCallback)
makeSite(DOMElement $siteElement)
const GROUP_NONE
Definition: Site.php:36
static newForType( $siteType)
Definition: Site.php:623
const TYPE_UNKNOWN
Definition: Site.php:33
const SOURCE_LOCAL
Definition: Site.php:41
Interface for storing and retrieving Site objects.
Definition: SiteStore.php:30
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42