8use MediaWiki\Languages\LanguageConverterFactory;
20 private ?array $selectedNamespaces =
null;
21 private ?array $allowedNamespaces =
null;
22 private ?array $excludedNamespaces =
null;
23 private ?
int $startId =
null;
24 private ?
int $endId =
null;
25 private array $variants = [];
26 private bool $skipRedirects =
false;
27 private ?
int $limit =
null;
28 private ?
int $nextBatchStart =
null;
39 LanguageConverterFactory $languageConverterFactory,
41 $converter = $languageConverterFactory->getLanguageConverter( $contLang );
43 foreach ( $converter->getVariants() as $vCode ) {
45 if ( $vCode !== $contLang->
getCode() ) {
54 LanguageConverterFactory $languageConverterFactory,
68 $this->selectedNamespaces = $namespaces;
81 if ( $namespaces && $this->selectedNamespaces !==
null ) {
82 $this->selectedNamespaces = array_unique( array_merge(
83 $this->selectedNamespaces, $namespaces
96 $this->allowedNamespaces =
null;
97 $this->excludedNamespaces =
null;
100 if ( $sitemapNamespaces ) {
101 $this->selectedNamespaces = $sitemapNamespaces;
106 if ( self::isNoIndex( $defaultPolicy ) ) {
108 foreach ( $namespacePolicies as $ns => $policy ) {
109 if ( !self::isNoIndex( $policy ) ) {
113 $this->allowedNamespaces = $namespaces;
116 foreach ( $namespacePolicies as $ns => $policy ) {
117 if ( self::isNoIndex( $policy ) ) {
122 $this->excludedNamespaces = $excluded;
134 private static function isNoIndex( $policy ) {
136 return ( $policyArray[
'index'] ??
'' ) ===
'noindex';
147 public function idRange( ?
int $startId, ?
int $endId ) {
148 $this->startId = $startId;
149 $this->endId = $endId;
170 public function limit(
int $limit ) {
171 $this->
limit = $limit;
185 if ( $this->nextBatchStart !==
null ) {
186 $this->startId = $this->nextBatchStart;
202 ->select( [
'page_id',
'page_namespace',
'page_title',
'page_touched' ] )
204 ->leftJoin(
'page_props',
null, [
'page_id = pp_page',
'pp_propname' =>
'noindex' ] )
205 ->where( [
'pp_propname' =>
null ] )
206 ->orderBy(
'page_id' )
207 ->caller( __METHOD__ );
209 if ( $this->startId !==
null ) {
210 $sqb->where( $dbr->
expr(
'page_id',
'>=', $this->startId ) );
212 if ( $this->endId !==
null ) {
213 $sqb->where( $dbr->
expr(
'page_id',
'<', $this->endId ) );
215 $namespaces = $this->getSelectedAndAllowedNamespaces();
216 if ( $namespaces !==
null ) {
217 if ( $namespaces === [] ) {
220 $sqb->where( [
'page_namespace' => $namespaces ] );
223 if ( $this->excludedNamespaces !==
null ) {
224 $sqb->where( [ $dbr->
expr(
'page_namespace',
'!=', $this->excludedNamespaces ) ] );
227 $sqb->where( [
'page_is_redirect' => 0 ] );
229 $variants = [
null, ...$this->variants ];
230 if ( $this->
limit ) {
231 $pageLimit = (int)( $this->
limit / count( $variants ) );
232 $sqb->limit( $pageLimit + 1 );
237 $res = $empty ? [] : $sqb->fetchResultSet();
238 $this->genderCache?->doPageRows( $res );
240 $xml =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" .
241 "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n";
244 foreach ( $res as $row ) {
245 if ( $pageLimit !==
null && ++$count > $pageLimit ) {
246 $nextBatchStart = (int)$row->page_id;
250 foreach ( $variants as $variant ) {
251 $query = $variant ===
null ?
'' :
'variant=' . urlencode( $variant );
253 Xml::element(
'loc',
null, $title->getCanonicalURL( $query ) ) .
258 $xml .=
"</urlset>\n";
260 if ( $nextBatchStart ) {
261 $this->nextBatchStart = $nextBatchStart;
263 $this->nextBatchStart =
null;
275 private function getSelectedAndAllowedNamespaces() {
276 if ( $this->selectedNamespaces !==
null ) {
277 if ( $this->allowedNamespaces !==
null ) {
278 return array_intersect( $this->selectedNamespaces, $this->allowedNamespaces );
280 return $this->selectedNamespaces;
283 return $this->allowedNamespaces;
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
A class containing constants representing the names of configuration variables.
const NamespaceRobotPolicies
Name constant for the NamespaceRobotPolicies setting, for use with Config::get()
const SitemapNamespaces
Name constant for the SitemapNamespaces setting, for use with Config::get()
const DefaultRobotPolicy
Name constant for the DefaultRobotPolicy setting, for use with Config::get()
static formatRobotPolicy( $policy)
Converts a String robot policy into an associative array, to allow merging of several policies using ...
Utility for generating a sitemap.
nextBatch()
If a previous call to getXml() reached the limit set by limit() and there were still more rows,...
__construct(Language $contLang, LanguageConverterFactory $languageConverterFactory, GenderCache $genderCache,)
namespacesFromConfig(Config $config)
Set the included/excluded namespace list based on configuration.
limit(int $limit)
Limit the number of returned results.
idRange(?int $startId, ?int $endId)
Limit the page_id range to the given half-open interval.
namespaces(?array $namespaces)
Set the selected namespaces.
static getVariants(Language $contLang, LanguageConverterFactory $languageConverterFactory,)
skipRedirects(bool $skip=true)
Skip redirects.
additionalNamespaces(?array $namespaces)
Add namespaces to the selected namespace list.
getXml(IReadableDatabase $dbr)
Use the previously set options to generate an XML sitemap.