8use MediaWiki\Languages\LanguageConverterFactory;
13use Wikimedia\Timestamp\TimestampFormat as TS;
21 private ?array $selectedNamespaces =
null;
22 private ?array $allowedNamespaces =
null;
23 private ?array $excludedNamespaces =
null;
24 private ?
int $startId =
null;
25 private ?
int $endId =
null;
26 private array $variants = [];
27 private bool $skipRedirects =
false;
28 private ?
int $limit =
null;
29 private ?
int $nextBatchStart =
null;
40 LanguageConverterFactory $languageConverterFactory,
42 $converter = $languageConverterFactory->getLanguageConverter( $contLang );
44 foreach ( $converter->getVariants() as $vCode ) {
46 if ( $vCode !== $contLang->
getCode() ) {
55 LanguageConverterFactory $languageConverterFactory,
69 $this->selectedNamespaces = $namespaces;
82 if ( $namespaces && $this->selectedNamespaces !==
null ) {
83 $this->selectedNamespaces = array_unique( array_merge(
84 $this->selectedNamespaces, $namespaces
97 $this->allowedNamespaces =
null;
98 $this->excludedNamespaces =
null;
101 if ( $sitemapNamespaces ) {
102 $this->selectedNamespaces = $sitemapNamespaces;
107 if ( self::isNoIndex( $defaultPolicy ) ) {
109 foreach ( $namespacePolicies as $ns => $policy ) {
110 if ( !self::isNoIndex( $policy ) ) {
114 $this->allowedNamespaces = $namespaces;
117 foreach ( $namespacePolicies as $ns => $policy ) {
118 if ( self::isNoIndex( $policy ) ) {
123 $this->excludedNamespaces = $excluded;
135 private static function isNoIndex( $policy ) {
137 return ( $policyArray[
'index'] ??
'' ) ===
'noindex';
148 public function idRange( ?
int $startId, ?
int $endId ) {
149 $this->startId = $startId;
150 $this->endId = $endId;
171 public function limit(
int $limit ) {
172 $this->
limit = $limit;
186 if ( $this->nextBatchStart !==
null ) {
187 $this->startId = $this->nextBatchStart;
203 ->select( [
'page_id',
'page_namespace',
'page_title',
'page_touched' ] )
205 ->leftJoin(
'page_props',
null, [
'page_id = pp_page',
'pp_propname' =>
'noindex' ] )
206 ->where( [
'pp_propname' =>
null ] )
207 ->orderBy(
'page_id' )
208 ->caller( __METHOD__ );
210 if ( $this->startId !==
null ) {
211 $sqb->where( $dbr->
expr(
'page_id',
'>=', $this->startId ) );
213 if ( $this->endId !==
null ) {
214 $sqb->where( $dbr->
expr(
'page_id',
'<', $this->endId ) );
216 $namespaces = $this->getSelectedAndAllowedNamespaces();
217 if ( $namespaces !==
null ) {
218 if ( $namespaces === [] ) {
221 $sqb->where( [
'page_namespace' => $namespaces ] );
224 if ( $this->excludedNamespaces !==
null ) {
225 $sqb->where( [ $dbr->
expr(
'page_namespace',
'!=', $this->excludedNamespaces ) ] );
228 $sqb->where( [
'page_is_redirect' => 0 ] );
230 $variants = [
null, ...$this->variants ];
231 if ( $this->
limit ) {
232 $pageLimit = (int)( $this->
limit / count( $variants ) );
233 $sqb->limit( $pageLimit + 1 );
238 $res = $empty ? [] : $sqb->fetchResultSet();
239 $this->genderCache?->doPageRows( $res );
241 $xml =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" .
242 "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n";
245 foreach ( $res as $row ) {
246 if ( $pageLimit !==
null && ++$count > $pageLimit ) {
247 $nextBatchStart = (int)$row->page_id;
251 foreach ( $variants as $variant ) {
252 $query = $variant ===
null ?
'' :
'variant=' . urlencode( $variant );
254 Xml::element(
'loc',
null, $title->getCanonicalURL( $query ) ) .
259 $xml .=
"</urlset>\n";
261 if ( $nextBatchStart ) {
262 $this->nextBatchStart = $nextBatchStart;
264 $this->nextBatchStart =
null;
276 private function getSelectedAndAllowedNamespaces() {
277 if ( $this->selectedNamespaces !==
null ) {
278 if ( $this->allowedNamespaces !==
null ) {
279 return array_intersect( $this->selectedNamespaces, $this->allowedNamespaces );
281 return $this->selectedNamespaces;
284 return $this->allowedNamespaces;
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
A class containing constants representing the names of configuration variables.
const NamespaceRobotPolicies
Name constant for the NamespaceRobotPolicies setting, for use with Config::get()
const SitemapNamespaces
Name constant for the SitemapNamespaces setting, for use with Config::get()
const DefaultRobotPolicy
Name constant for the DefaultRobotPolicy setting, for use with Config::get()
static formatRobotPolicy( $policy)
Converts a String robot policy into an associative array, to allow merging of several policies using ...
Utility for generating a sitemap.
nextBatch()
If a previous call to getXml() reached the limit set by limit() and there were still more rows,...
__construct(Language $contLang, LanguageConverterFactory $languageConverterFactory, GenderCache $genderCache,)
namespacesFromConfig(Config $config)
Set the included/excluded namespace list based on configuration.
limit(int $limit)
Limit the number of returned results.
idRange(?int $startId, ?int $endId)
Limit the page_id range to the given half-open interval.
namespaces(?array $namespaces)
Set the selected namespaces.
static getVariants(Language $contLang, LanguageConverterFactory $languageConverterFactory,)
skipRedirects(bool $skip=true)
Skip redirects.
additionalNamespaces(?array $namespaces)
Add namespaces to the selected namespace list.
getXml(IReadableDatabase $dbr)
Use the previously set options to generate an XML sitemap.