"""Objects representing site info data contents."""
#
# (C) Pywikibot team, 2008-2024
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations
import copy
import datetime
import re
from collections.abc import Container
from contextlib import suppress
from typing import Any
import pywikibot
from pywikibot.exceptions import APIError
from pywikibot.tools.collections import EMPTY_DEFAULT
[docs]
class Siteinfo(Container):
"""A 'dictionary' like container for siteinfo.
This class queries the server to get the requested siteinfo property.
Optionally it can cache this directly in the instance so that later
requests don't need to query the server.
All values of the siteinfo property 'general' are directly available.
"""
WARNING_REGEX = re.compile(r'Unrecognized values? for parameter '
r'["\']siprop["\']: (.+?)\.?')
# Until we get formatversion=2, we have to convert empty-string properties
# into booleans so they are easier to use.
BOOLEAN_PROPS = {
'general': [
'imagewhitelistenabled',
'langconversion',
'titleconversion',
'rtl',
'readonly',
'writeapi',
'variantarticlepath',
'misermode',
'uploadsenabled',
],
'namespaces': [ # for each namespace
'subpages',
'content',
'nonincludable',
],
'magicwords': [ # for each magicword
'case-sensitive',
],
}
def __init__(self, site) -> None:
"""Initialise it with an empty cache."""
self._site = site
self._cache: dict[str, Any] = {}
[docs]
def clear(self) -> None:
"""Remove all items from Siteinfo.
.. versionadded:: 7.1
"""
self._cache.clear()
@staticmethod
def _post_process(prop, data) -> None:
"""Do some default handling of data. Directly modifies data."""
# Be careful with version tests inside this here as it might need to
# query this method to actually get the version number
# Convert boolean props from empty strings to actual boolean values
if prop in Siteinfo.BOOLEAN_PROPS:
# siprop=namespaces and
# magicwords has properties per item in result
if prop in ('namespaces', 'magicwords'):
for index, value in enumerate(data):
# namespaces uses a dict, while magicwords uses a list
key = index if isinstance(data, list) else value
for p in Siteinfo.BOOLEAN_PROPS[prop]:
data[key][p] = p in data[key]
else:
for p in Siteinfo.BOOLEAN_PROPS[prop]:
data[p] = p in data
def _get_siteinfo(self, prop, expiry) -> dict:
"""Retrieve a siteinfo property.
All properties which the site doesn't
support contain the default value. Because pre-1.12 no data was
returned when a property doesn't exists, it queries each property
independently if a property is invalid.
.. seealso:: :api:Siteinfo
:param prop: The property names of the siteinfo.
:type prop: str or iterable
:param expiry: The expiry date of the cached request.
:type expiry: int (days), :py:obj:`datetime.timedelta`, False (config)
:return: A dictionary with the properties of the site. Each entry in
the dictionary is a tuple of the value and a boolean to save if it
is the default value.
"""
def warn_handler(mod, message) -> bool:
"""Return True if the warning is handled."""
matched = Siteinfo.WARNING_REGEX.fullmatch(message)
if mod == 'siteinfo' and matched:
invalid_properties.extend(
prop.strip() for prop in matched[1].split(','))
return True
return False
props = [prop] if isinstance(prop, str) else prop
if not props:
raise ValueError('At least one property name must be provided.')
invalid_properties: list[str] = []
request = self._site._request(
expiry=pywikibot.config.API_config_expiry
if expiry is False else expiry,
parameters={
'action': 'query', 'meta': 'siteinfo', 'siprop': props,
}
)
# warnings are handled later
request._warning_handler = warn_handler
try:
data = request.submit()
except APIError as e:
if e.code == 'siunknown_siprop':
if len(props) == 1:
pywikibot.log(f"Unable to get siprop '{props[0]}'")
return {props[0]: (EMPTY_DEFAULT, False)}
pywikibot.log('Unable to get siteinfo, because at least '
"one property is unknown: '{}'"
.format("', '".join(props)))
results = {}
for p in props:
results.update(self._get_siteinfo(p, expiry))
return results
raise
result = {}
if invalid_properties:
for invalid_prop in invalid_properties:
result[invalid_prop] = (EMPTY_DEFAULT, False)
pywikibot.log("Unable to get siprop(s) '{}'"
.format("', '".join(invalid_properties)))
if 'query' in data:
# If the request is a CachedRequest, use the _cachetime attr.
cache_time = getattr(
request, '_cachetime', None) or pywikibot.Timestamp.nowutc()
for prop in props:
if prop in data['query']:
self._post_process(prop, data['query'][prop])
result[prop] = (data['query'][prop], cache_time)
return result
@staticmethod
def _is_expired(cache_date, expire):
"""Return true if the cache date is expired."""
if isinstance(expire, bool):
return expire
if not cache_date: # default values are always expired
return True
# cached date + expiry are in the past if it's expired
return cache_date + expire < pywikibot.Timestamp.nowutc()
def _get_general(self, key: str, expiry):
"""Return a siteinfo property which is loaded by default.
The property 'general' will be queried if it wasn't yet or it's forced.
Additionally all uncached default properties are queried. This way
multiple default properties are queried with one request. It'll cache
always all results.
:param key: The key to search for.
:param expiry: If the cache is older than the expiry it ignores the
cache and queries the server to get the newest value.
:type expiry: int (days), :py:obj:`datetime.timedelta`, False (never)
:return: If that property was retrieved via this method. Returns None
if the key was not in the retrieved values.
:rtype: various (the value), bool (if the default value is used)
"""
if 'general' not in self._cache:
pywikibot.debug('general siteinfo not loaded yet.')
force = True
props = ['namespaces', 'namespacealiases']
else:
force = Siteinfo._is_expired(self._cache['general'][1], expiry)
props = []
if force:
props = [prop for prop in props if prop not in self._cache]
if props:
pywikibot.debug(
"Load siteinfo properties '{}' along with 'general'"
.format("', '".join(props)))
props.append('general')
default_info = self._get_siteinfo(props, expiry)
for prop in props:
self._cache[prop] = default_info[prop]
if key in default_info:
return default_info[key]
if key in self._cache['general'][0]:
return self._cache['general'][0][key], self._cache['general']
return None
def __getitem__(self, key: str):
"""Return a siteinfo property, caching and not forcing it."""
return self.get(key, False) # caches and doesn't force it
[docs]
def get(
self,
key: str,
get_default: bool = True,
cache: bool = True,
expiry: datetime.datetime | float | bool = False
) -> Any:
"""Return a siteinfo property.
It will never throw an APIError if it only stated, that the siteinfo
property doesn't exist. Instead it will use the default value.
.. seealso:: :py:obj:`_get_siteinfo`
:param key: The name of the siteinfo property.
:param get_default: Whether to throw an KeyError if the key is invalid.
:param cache: Caches the result internally so that future accesses via
this method won't query the server.
:param expiry: If the cache is older than the expiry it ignores the
cache and queries the server to get the newest value.
:return: The gathered property
:raises KeyError: If the key is not a valid siteinfo property and the
get_default option is set to False.
"""
# If expiry is True, convert it to 0 to be coherent with
# _get_siteinfo() and _get_general() docstring.
if expiry is True:
expiry = 0
# If expiry is a float or int convert to timedelta
# Note: bool is an instance of int
if isinstance(expiry, float) or type(expiry) is int:
expiry = datetime.timedelta(expiry)
# expire = 0 (or timedelta(0)) are always expired and their bool is
# False, so skip them EXCEPT if it's literally False, then they expire
# never.
if expiry and expiry is not True or expiry is False:
try:
cached = self._get_cached(key)
except KeyError:
pass
else: # cached value available
# is a default value, but isn't accepted
if not cached[1] and not get_default:
raise KeyError(key)
if not Siteinfo._is_expired(cached[1], expiry):
return copy.deepcopy(cached[0])
preloaded = self._get_general(key, expiry)
if not preloaded:
preloaded = self._get_siteinfo(key, expiry)[key]
else:
cache = False
if not preloaded[1] and not get_default:
raise KeyError(key)
if cache:
self._cache[key] = preloaded
return copy.deepcopy(preloaded[0])
def _get_cached(self, key: str):
"""Return the cached value or a KeyError exception if not cached."""
if 'general' in self._cache:
if key in self._cache['general'][0]:
return (self._cache['general'][0][key],
self._cache['general'][1])
return self._cache[key]
raise KeyError(key)
[docs]
def is_cached(self, key: str) -> bool:
"""Return whether the value is cached.
.. versionadded:: 7.1
"""
try:
self._get_cached(key)
except KeyError:
return False
return True
def __contains__(self, key: str) -> bool:
"""Return whether the value is in Siteinfo container.
.. versionchanged:: 7.1
Previous implementation only checked for cached keys.
"""
try:
self[key]
except KeyError:
return False
return True
[docs]
def is_recognised(self, key: str) -> bool | None:
"""Return if 'key' is a valid property name. 'None' if not cached."""
time = self.get_requested_time(key)
return None if time is None else bool(time)
[docs]
def get_requested_time(self, key: str):
"""Return when 'key' was successfully requested from the server.
If the property is actually in the siprop 'general' it returns the
last request from the 'general' siprop.
:param key: The siprop value or a property of 'general'.
:return: The last time the siprop of 'key' was requested.
:rtype: None (never), False (default),
:py:obj:`datetime.datetime` (cached)
"""
with suppress(KeyError):
return self._get_cached(key)[1]
return None