29 require_once __DIR__ .
'/Maintenance.php';
84 public $skipRedirects;
98 public $priorities =
array();
146 parent::__construct();
147 $this->mDescription =
"Creates a sitemap for the site";
148 $this->
addOption(
'fspath',
'The file system path to save to, e.g. /tmp/sitemap; defaults to current directory',
false,
true );
149 $this->
addOption(
'urlpath',
'The URL path corresponding to --fspath, prepended to filenames in the index; defaults to an empty string',
false,
true );
150 $this->
addOption(
'compress',
'Compress the sitemap files, can take value yes|no, default yes',
false,
true );
151 $this->
addOption(
'skip-redirects',
'Do not include redirecting articles in the sitemap' );
152 $this->
addOption(
'identifier',
'What site identifier to use for the wiki, defaults to $wgDBname',
false,
true );
159 $this->setNamespacePriorities();
160 $this->url_limit = 50000;
161 $this->size_limit = pow( 2, 20 ) * 10;
162 $this->fspath = self::init_path( $this->
getOption(
'fspath', getcwd() ) );
163 $this->urlpath = $this->
getOption(
'urlpath',
"" );
164 if ( $this->urlpath !==
"" && substr( $this->urlpath, -1 ) !==
'/' ) {
165 $this->urlpath .=
'/';
168 $this->compress = $this->
getOption(
'compress',
'yes' ) !==
'no';
169 $this->skipRedirects = $this->
getOption(
'skip-redirects',
false ) !==
false;
171 $this->generateNamespaces();
173 $this->findex = fopen(
"{$this->fspath}sitemap-index-{$this->identifier}.xml",
'wb' );
177 private function setNamespacePriorities() {
178 global $wgSitemapNamespacesPriorities;
181 $this->priorities[self::GS_MAIN] =
'0.5';
183 $this->priorities[self::GS_TALK] =
'0.1';
185 $this->priorities[
NS_MAIN] =
'1.0';
186 $this->priorities[
NS_TALK] =
'0.1';
187 $this->priorities[
NS_USER] =
'0.5';
191 $this->priorities[
NS_FILE] =
'0.5';
197 $this->priorities[
NS_HELP] =
'0.5';
203 if ( $wgSitemapNamespacesPriorities !==
false ) {
207 foreach ( $wgSitemapNamespacesPriorities
as $namespace => $priority ) {
208 $float = floatval( $priority );
209 if ( $float > 1.0 ) {
211 } elseif ( $float < 0.0 ) {
214 $this->priorities[$namespace] = $priority;
224 private static function init_path( $fspath ) {
225 if ( !isset( $fspath ) ) {
228 # Create directory if needed
229 if ( $fspath && !is_dir( $fspath ) ) {
230 wfMkdirParents( $fspath,
null, __METHOD__ ) or die(
"Can not create directory $fspath.\n" );
233 return realpath( $fspath ) . DIRECTORY_SEPARATOR;
239 function generateNamespaces() {
241 global $wgSitemapNamespaces;
242 if ( is_array( $wgSitemapNamespaces ) ) {
247 $res = $this->dbr->select(
'page',
248 array(
'page_namespace' ),
252 'GROUP BY' =>
'page_namespace',
253 'ORDER BY' =>
'page_namespace',
257 foreach (
$res as $row ) {
268 function priority( $namespace ) {
269 return isset( $this->priorities[$namespace] ) ? $this->priorities[$namespace] : $this->guessPriority( $namespace );
280 function guessPriority( $namespace ) {
281 return MWNamespace::isSubject( $namespace ) ? $this->priorities[self::GS_MAIN] : $this->priorities[self::GS_TALK];
290 function getPageRes( $namespace ) {
291 return $this->dbr->select(
'page',
298 array(
'page_namespace' => $namespace ),
306 public function main() {
309 fwrite( $this->findex, $this->openIndex() );
312 $res = $this->getPageRes( $namespace );
314 $this->generateLimit( $namespace );
315 $length = $this->limit[0];
318 $fns =
$wgContLang->getFormattedNsText( $namespace );
319 $this->
output(
"$namespace ($fns)\n" );
320 $skippedRedirects = 0;
321 foreach (
$res as $row ) {
322 if ( $this->skipRedirects && $row->page_is_redirect ) {
327 if ( $i++ === 0 || $i === $this->url_limit + 1 || $length + $this->limit[1] + $this->limit[2] > $this->size_limit ) {
328 if ( $this->
file !==
false ) {
329 $this->write( $this->
file, $this->closeFile() );
330 $this->close( $this->
file );
332 $filename = $this->sitemapFilename( $namespace, $smcount++ );
333 $this->
file = $this->
open( $this->fspath . $filename,
'wb' );
334 $this->write( $this->
file, $this->openFile() );
335 fwrite( $this->findex, $this->indexEntry( $filename ) );
336 $this->
output(
"\t$this->fspath$filename\n" );
337 $length = $this->limit[0];
342 $entry = $this->fileEntry(
$title->getCanonicalURL(), $date, $this->priority( $namespace ) );
343 $length += strlen( $entry );
344 $this->write( $this->
file, $entry );
348 foreach ( $variants
as $vCode ) {
352 $entry = $this->fileEntry(
$title->getCanonicalURL(
'', $vCode ), $date, $this->priority( $namespace ) );
353 $length += strlen( $entry );
354 $this->write( $this->
file, $entry );
359 if ( $this->skipRedirects && $skippedRedirects > 0 ) {
360 $this->
output(
" skipped $skippedRedirects redirect(s)\n" );
364 $this->write( $this->
file, $this->closeFile() );
365 $this->close( $this->
file );
368 fwrite( $this->findex, $this->closeIndex() );
369 fclose( $this->findex );
379 if ( $resource ===
false ) {
380 throw new MWException( __METHOD__ .
" error opening file $file with flags $flags. Check permissions?" );
388 function write( &$handle, $str ) {
389 if ( $handle ===
true || $handle ===
false ) {
390 throw new MWException( __METHOD__ .
" was passed a boolean as a file handle.\n" );
392 if ( $this->compress ) {
393 gzwrite( $handle, $str );
395 fwrite( $handle, $str );
402 function close( &$handle ) {
403 if ( $this->compress ) {
417 function sitemapFilename( $namespace,
$count ) {
418 $ext = $this->compress ?
'.gz' :
'';
419 return "sitemap-{$this->identifier}-NS_$namespace-$count.xml$ext";
428 return '<?xml version="1.0" encoding="UTF-8"?>' .
"\n";
436 function xmlSchema() {
437 return 'http://www.sitemaps.org/schemas/sitemap/0.9';
445 function openIndex() {
446 return $this->xmlHead() .
'<sitemapindex xmlns="' . $this->xmlSchema() .
'">' .
"\n";
455 function indexEntry( $filename ) {
458 "\t\t<loc>{$this->urlpath}$filename</loc>\n" .
459 "\t\t<lastmod>{$this->timestamp}</lastmod>\n" .
468 function closeIndex() {
469 return "</sitemapindex>\n";
477 function openFile() {
478 return $this->xmlHead() .
'<urlset xmlns="' . $this->xmlSchema() .
'">' .
"\n";
489 function fileEntry( $url, $date, $priority ) {
493 "\t\t<loc>" . htmlspecialchars( $url ) .
"</loc>\n" .
494 "\t\t<lastmod>$date</lastmod>\n" .
495 "\t\t<priority>$priority</priority>\n" .
504 function closeFile() {
505 return "</urlset>\n";
511 function generateLimit( $namespace ) {
515 $this->limit =
array(
516 strlen( $this->openFile() ),
518 strlen( $this->closeFile() )