20use Wikimedia\Timestamp\TimestampFormat as TS;
23require_once __DIR__ .
'/Maintenance.php';
113 parent::__construct();
117 'The file system path to save to, e.g. /tmp/sitemap; defaults to current directory',
123 'The URL path corresponding to --fspath, prepended to filenames in the index; '
124 .
'defaults to an empty string',
130 'Compress the sitemap files, can take value yes|no, default yes',
134 $this->
addOption(
'skip-redirects',
'Do not include redirecting articles in the sitemap' );
137 'What site identifier to use for the wiki, defaults to $wgDBname',
143 'Only include pages in these namespaces in the sitemap, ' .
144 'defaults to the value of wgSitemapNamespaces if not defined.',
145 false,
true,
false,
true
149 'Maximum number of URLs per sitemap file. Default 50,000.',
159 $this->url_limit = $this->
getOption(
'limit', 50_000 );
161 # Create directory if needed
164 $this->
fatalError(
"Can not create directory $fspath." );
167 $dbDomain = WikiMap::getCurrentWikiDbDomain()->getId();
168 $this->fspath = realpath(
$fspath ) . DIRECTORY_SEPARATOR;
169 $this->urlpath = $this->
getOption(
'urlpath',
"" );
170 if ( $this->urlpath !==
"" && substr( $this->urlpath, -1 ) !==
'/' ) {
171 $this->urlpath .=
'/';
173 $this->identifier = $this->
getOption(
'identifier', $dbDomain );
174 $this->compress = $this->
getOption(
'compress',
'yes' ) !==
'no';
175 $this->skipRedirects = $this->
hasOption(
'skip-redirects' );
178 $encIdentifier = rawurlencode( $this->identifier );
179 $indexPath =
"{$this->fspath}sitemap-index-{$encIdentifier}.xml";
180 $this->indexFile = fopen(
"{$this->fspath}sitemap-index-{$encIdentifier}.xml",
'wb' );
182 $this->
output(
"Wrote index: $indexPath\n" );
189 private function getNamespaces() {
192 ?? $this->
getConfig()->get( MainConfigNames::SitemapNamespaces )
201 $contLang = $services->getContentLanguage();
202 $serverUrl = $services->getUrlUtils()->getServer(
PROTO_CANONICAL ) ??
'';
204 fwrite( $this->indexFile, $this->openIndex() );
208 $services->getLanguageConverterFactory(),
209 $services->getGenderCache()
211 $generator->skipRedirects( $this->skipRedirects )
212 ->namespaces( $this->getNamespaces() )
213 ->limit( $this->url_limit );
217 $filename = $this->sitemapFilename( $sitemapId++ );
218 $filePath = $this->fspath . $filename;
219 $file = $this->open( $filePath,
'wb' );
220 $xml = $generator->getXml( $this->dbr );
221 $this->write(
$file, $xml );
222 $this->close(
$file );
223 fwrite( $this->indexFile, $this->indexEntry( $filename, $serverUrl ) );
224 $this->
output(
"Wrote sitemap: $filePath\n" );
225 }
while ( $generator->nextBatch() );
227 fwrite( $this->indexFile, $this->closeIndex() );
228 fclose( $this->indexFile );
238 private function open(
$file, $flags ) {
239 $resource = $this->compress ? gzopen(
$file, $flags ) : fopen(
$file, $flags );
240 if ( $resource ===
false ) {
241 throw new RuntimeException( __METHOD__
242 .
" error opening file $file with flags $flags. Check permissions?" );
254 private function write( &$handle, $str ) {
255 if ( $handle ===
true || $handle ===
false ) {
256 throw new InvalidArgumentException( __METHOD__ .
" was passed a boolean as a file handle.\n" );
258 if ( $this->compress ) {
259 gzwrite( $handle, $str );
261 fwrite( $handle, $str );
270 private function close( &$handle ) {
271 if ( $this->compress ) {
284 private function sitemapFilename( $count ) {
285 $ext = $this->compress ?
'.gz' :
'';
287 return "sitemap-{$this->identifier}-$count.xml$ext";
295 private function xmlHead() {
296 return '<?xml version="1.0" encoding="UTF-8"?>' .
"\n";
304 private function xmlSchema() {
305 return 'http://www.sitemaps.org/schemas/sitemap/0.9';
313 private function openIndex() {
314 return $this->xmlHead() .
'<sitemapindex xmlns="' . $this->xmlSchema() .
'">' .
"\n";
324 private function indexEntry( $filename, $serverUrl ) {
325 return "\t<sitemap>\n" .
326 "\t\t<loc>" . $serverUrl .
327 ( substr( $this->urlpath, 0, 1 ) ===
"/" ?
"" :
"/" ) .
328 "{$this->urlpath}$filename</loc>\n" .
329 "\t\t<lastmod>{$this->timestamp}</lastmod>\n" .
338 private function closeIndex() {
339 return "</sitemapindex>\n";
345require_once RUN_MAINTENANCE_IF_MAIN;
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfMkdirParents( $dir, $mode=null, $caller=null)
Make directory, and make all parent directories if they don't exist.
Maintenance script that generates a sitemap for the site.
string $timestamp
When this sitemap batch was generated.
string $fspath
The path to prepend to the filename.
IReadableDatabase $dbr
A database replica DB object.
bool $skipRedirects
Whether or not to include redirection pages.
__construct()
Default constructor.
resource $indexFile
A resource pointing to the sitemap index file.
bool $compress
Whether or not to use compression.
array $namespaces
A one-dimensional array of namespaces in the wiki.
string $urlpath
The URL path to prepend to filenames in the index; should resolve to the same directory as $fspath.
int $url_limit
The maximum amount of urls in a sitemap file.
resource false $file
A resource pointing to a sitemap file.
A class containing constants representing the names of configuration variables.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
output( $out, $channel=null)
Throw some output to the user.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
getReplicaDB(string|false $virtualDomain=false)
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
Utility for generating a sitemap.