Source code for pywikibot.data.api._generators

"""Objects representing API/Query generators.

.. versionchanged:: 7.6
   All Objects were changed from Iterable object to a Generator object.
   They are subclassed from
   :class:`tools.collections.GeneratorWrapper`
"""
#
# (C) Pywikibot team, 2008-2024
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations

from abc import ABC, abstractmethod
from contextlib import suppress
from typing import Any
from warnings import warn

import pywikibot
from pywikibot import config
from pywikibot.backports import Callable, Iterable
from pywikibot.exceptions import (
    Error,
    InvalidTitleError,
    UnknownSiteError,
    UnsupportedPageError,
)
from pywikibot.site import Namespace
from pywikibot.tools import deprecated
from pywikibot.tools.collections import GeneratorWrapper


__all__ = (
    'APIGenerator',
    'ListGenerator',
    'LogEntryListGenerator',
    'PageGenerator',
    'PropertyGenerator',
    'QueryGenerator',
    'update_page',
)


[docs] class APIGeneratorBase(ABC): """A wrapper class to handle the usage of the ``parameters`` parameter. .. versionchanged:: 7.6 renamed from _RequestWrapper """ def _clean_kwargs(self, kwargs, **mw_api_args): """Clean kwargs, define site and request class.""" if 'site' not in kwargs: warn(f'{self.__class__.__name__} invoked without a site', RuntimeWarning, 3) kwargs['site'] = pywikibot.Site() assert not hasattr(self, 'site') or self.site == kwargs['site'] self.site = kwargs['site'] self.request_class = kwargs['site']._request_class(kwargs) kwargs = self.request_class.clean_kwargs(kwargs) kwargs['parameters'].update(mw_api_args) return kwargs
[docs] @abstractmethod def set_maximum_items(self, value: int | str | None) -> None: """Set the maximum number of items to be retrieved from the wiki. .. versionadded:: 7.1 .. versionchanged:: 7.6 become an abstract method """ raise NotImplementedError
[docs] class APIGenerator(APIGeneratorBase, GeneratorWrapper): """Generator that handle API responses containing lists. The generator will iterate each item in the query response and use the continue request parameter to retrieve the next portion of items automatically. If the limit attribute is set, the iterator will stop after iterating that many values. .. versionchanged:: 7.6 subclassed from :class:`tools.collections.GeneratorWrapper` """ def __init__( self, action: str, continue_name: str = 'continue', limit_name: str = 'limit', data_name: str = 'data', **kwargs ) -> None: """Initialize an APIGenerator object. kwargs are used to create a Request object; see that object's documentation for values. :param action: API action name. :param continue_name: Name of the continue API parameter. :param limit_name: Name of the limit API parameter. :param data_name: Name of the data in API response. """ kwargs = self._clean_kwargs(kwargs, action=action) self.continue_name = continue_name self.limit_name = limit_name self.data_name = data_name self.query_increment: int | None if config.step > 0: self.query_increment = config.step else: self.query_increment = None self.limit: int | None = None self.starting_offset = kwargs['parameters'].pop(self.continue_name, 0) self.request = self.request_class(**kwargs) self.request[self.limit_name] = self.query_increment
[docs] def set_query_increment(self, value: int) -> None: """Set the maximum number of items to be retrieved per API query. If not called, the default is config.step. :param value: The value of maximum number of items to be retrieved per API request to set. """ self.query_increment = int(value) self.request[self.limit_name] = self.query_increment pywikibot.debug(f'{type(self).__name__}: Set query_increment to ' f'{self.query_increment}.')
[docs] def set_maximum_items(self, value: int | str | None) -> None: """Set the maximum number of items to be retrieved from the wiki. If not called, most queries will continue as long as there is more data to be retrieved from the API. :param value: The value of maximum number of items to be retrieved in total to set. Ignores None value. """ if value is not None and int(value) > 0: self.limit = int(value) if self.query_increment and self.limit < self.query_increment: self.request[self.limit_name] = self.limit pywikibot.debug(f'{type(self).__name__}: Set request item ' f'limit to {self.limit}') pywikibot.debug(f'{type(self).__name__}: Set limit ' f'(maximum_items) to {self.limit}.')
@property def generator(self): """Submit request and iterate the response. Continues response as needed until limit (if defined) is reached. .. versionchanged:: 7.6 changed from iterator method to generator property """ offset = self.starting_offset n = 0 while True: self.request[self.continue_name] = offset pywikibot.debug(f'{type(self).__name__}: Request: {self.request}') data = self.request.submit() n_items = len(data[self.data_name]) pywikibot.debug( f'{type(self).__name__}: Retrieved {n_items} items') if n_items > 0: for item in data[self.data_name]: yield item n += 1 if self.limit is not None and n >= self.limit: pywikibot.debug( f'{type(self).__name__}: Stopped iterating due to' ' exceeding item limit.' ) return offset += n_items else: pywikibot.debug(f'{type(self).__name__}: Stopped iterating' ' due to empty list in response.') break
[docs] class QueryGenerator(APIGeneratorBase, GeneratorWrapper): """Base class for generators that handle responses to API action=query. By default, the generator will iterate each item in the query response, and use the (query-)continue element, if present, to continue iterating as long as the wiki returns additional values. However, if the generators's limit attribute is set to a positive int, the generators will stop after iterating that many values. If limit is negative, the limit parameter will not be passed to the API at all. Most common query types are more efficiently handled by subclasses, but this class can be used directly for custom queries and miscellaneous types (such as "meta=...") that don't return the usual list of pages or links. See the API documentation for specific query options. .. versionchanged:: 7.6 subclassed from :class:`tools.collections.GeneratorWrapper` """ # Should results be filtered during iteration according to set_namespace? # Used if the API module does not support multiple namespaces. # Override in subclasses by defining a function that returns True if # the result's namespace is in self._namespaces. _check_result_namespace: Callable[[Any], bool] = NotImplemented # Set of allowed namespaces will be assigned to _namespaces during # set_namespace call. Only to be used by _check_result_namespace. _namespaces: set[int] | bool | None = None def __init__(self, **kwargs) -> None: """Initialize a QueryGenerator object. kwargs are used to create a Request object; see that object's documentation for values. 'action'='query' is assumed. """ if not hasattr(self, 'site'): kwargs = self._clean_kwargs(kwargs) # hasn't been called yet parameters = kwargs['parameters'] if 'action' in parameters and parameters['action'] != 'query': raise Error("{}: 'action' must be 'query', not {}" .format(self.__class__.__name__, kwargs['action'])) parameters['action'] = 'query' # make sure request type is valid, and get limit key if any for modtype in ('generator', 'list', 'prop', 'meta'): if modtype in parameters: self.modules = parameters[modtype].split('|') break else: raise Error(f'{type(self).__name__}: No query module name found' ' in arguments.') parameters['indexpageids'] = True # always ask for list of pageids self.continue_name = 'continue' # Explicitly enable the simplified continuation parameters['continue'] = True self.request = self.request_class(**kwargs) self.site._paraminfo.fetch('query+' + mod for mod in self.modules) limited_modules = {mod for mod in self.modules if self.site._paraminfo.parameter('query+' + mod, 'limit')} if not limited_modules: self.limited_module = None elif len(limited_modules) == 1: self.limited_module = limited_modules.pop() else: # Select the first limited module in the request. # Query will continue as needed until limit (if any) for this # module is reached. for module in self.modules: if module in limited_modules: self.limited_module = module limited_modules.remove(module) break pywikibot.log( f'{type(self).__name__}: multiple requested query modules' ' support limits; using the first such module ' f"{self.limited_module}' of {self.modules!r}" ) # Set limits for all remaining limited modules to max value. # Default values will only cause more requests and make the query # slower. for module in limited_modules: param = self.site._paraminfo.parameter('query+' + module, 'limit') prefix = self.site._paraminfo['query+' + module]['prefix'] if self.site.logged_in() \ and self.site.has_right('apihighlimits'): self.request[prefix + 'limit'] = int(param['highmax']) else: self.request[prefix + 'limit'] = int(param['max']) self.api_limit: int | None if config.step > 0: self.api_limit = config.step else: self.api_limit = None if self.limited_module: self.prefix = self.site._paraminfo['query+' + self.limited_module]['prefix'] self._update_limit() if self.api_limit is not None and 'generator' in parameters: self.prefix = 'g' + self.prefix self.limit: int | None = None self.query_limit = self.api_limit if 'generator' in parameters: # name of the "query" subelement key to look for when iterating self.resultkey = 'pages' else: self.resultkey = self.modules[0] self._add_slots() @property @deprecated(since='8.4.0') def continuekey(self) -> list[str]: """Return deprecated continuekey which is self.modules.""" return self.modules def _add_slots(self) -> None: """Add slots to params if the site supports multi-content revisions. On MW 1.32+ the following query parameters require slots to be given when content or contentmodel is requested. * prop=revisions * prop=deletedrevisions or * list=allrevisions * list=alldeletedrevisions More info: https://lists.wikimedia.org/hyperkitty/list/mediawiki-api-announce@lists.wikimedia.org/message/AXO4G4OOMTG7CEUU5TGAWXBI2LD4G3BC/ """ if self.site.mw_version < '1.32': return request = self.request # If using any deprecated_params, do not add slots. Usage of # these parameters together with slots is forbidden and the user will # get an API warning anyway. props = request.get('prop') if props: if 'revisions' in props: deprecated_params = { 'rvexpandtemplates', 'rvparse', 'rvdiffto', 'rvdifftotext', 'rvdifftotextpst', 'rvcontentformat', 'parsetree'} if not set(request) & deprecated_params: request['rvslots'] = '*' if 'deletedrevisions' in props: deprecated_params = { 'drvexpandtemplates', 'drvparse', 'drvdiffto', 'drvdifftotext', 'drvdifftotextpst', 'drvcontentformat', 'parsetree'} if not set(request) & deprecated_params: request['drvslots'] = '*' lists = request.get('list') if lists: if 'allrevisions' in lists: deprecated_params = { 'arvexpandtemplates', 'arvparse', 'arvdiffto', 'arvdifftotext', 'arvdifftotextpst', 'arvcontentformat', 'parsetree'} if not set(request) & deprecated_params: request['arvslots'] = '*' if 'alldeletedrevisions' in lists: deprecated_params = { 'adrexpandtemplates', 'adrparse', 'adrdiffto', 'adrdifftotext', 'adrdifftotextpst', 'adrcontentformat', 'parsetree'} if not set(request) & deprecated_params: request['adrslots'] = '*'
[docs] def set_query_increment(self, value) -> None: """Set the maximum number of items to be retrieved per API query. If not called, the default is to ask for "max" items and let the API decide how many to send. """ limit = int(value) # don't update if limit is greater than maximum allowed by API if self.api_limit is None: self.query_limit = limit else: self.query_limit = min(self.api_limit, limit) pywikibot.debug( f'{type(self).__name__}: Set query_limit to {self.query_limit}.' )
[docs] def set_maximum_items(self, value: int | str | None) -> None: """Set the maximum number of items to be retrieved from the wiki. If not called, most queries will continue as long as there is more data to be retrieved from the API. If set to -1 (or any negative value), the "limit" parameter will be omitted from the request. For some request types (such as prop=revisions), this is necessary to signal that only current revision is to be returned. :param value: The value of maximum number of items to be retrieved in total to set. Ignores None value. """ if value is not None: self.limit = int(value)
def _update_limit(self) -> None: """Set query limit for self.module based on api response.""" assert self.limited_module is not None param = self.site._paraminfo.parameter('query+' + self.limited_module, 'limit') if self.site.logged_in() and self.site.has_right('apihighlimits'): limit = int(param['highmax']) else: limit = int(param['max']) if self.api_limit is None or limit < self.api_limit: self.api_limit = limit pywikibot.debug( f'{type(self).__name__}: Set query_limit to {self.api_limit}.' )
[docs] def support_namespace(self) -> bool: """Check if namespace is a supported parameter on this query. .. note:: this function will be removed when :meth:`set_namespace` will throw TypeError() instead of just giving a warning. See :phab:`T196619`. :return: True if yes, False otherwise """ assert self.limited_module # some modules do not have a prefix return bool( self.site._paraminfo.parameter('query+' + self.limited_module, 'namespace'))
[docs] def set_namespace(self, namespaces): """Set a namespace filter on this query. :param namespaces: namespace identifiers to limit query results :type namespaces: iterable of str or Namespace key, or a single instance of those types. May be a '|' separated list of namespace identifiers. An empty iterator clears any namespace restriction. :raises KeyError: a namespace identifier was not resolved """ # TODO: T196619 # :raises TypeError: module does not support a namespace parameter # or a namespace identifier has an inappropriate # type such as NoneType or bool, or more than one namespace # if the API module does not support multiple namespaces assert self.limited_module # some modules do not have a prefix param = self.site._paraminfo.parameter('query+' + self.limited_module, 'namespace') if not param: pywikibot.warning(f'{self.limited_module} module does not support' ' a namespace parameter') warn('set_namespace() will be modified to raise TypeError ' 'when namespace parameter is not supported. ' 'It will be a Breaking Change, please update your code ' 'ASAP, due date July, 31st 2019.', FutureWarning, 2) # TODO: T196619 # raise TypeError('{} module does not support a namespace ' # 'parameter'.format(self.limited_module)) return False if isinstance(namespaces, str): namespaces = namespaces.split('|') # Use Namespace id (int) here; Request will cast int to str namespaces = [ns.id for ns in self.site.namespaces.resolve(namespaces)] if 'multi' not in param and len(namespaces) != 1: if self._check_result_namespace is NotImplemented: raise TypeError(f'{self.limited_module} module does not' ' support multiple namespaces') self._namespaces = set(namespaces) namespaces = None if namespaces: self.request[self.prefix + 'namespace'] = namespaces elif self.prefix + 'namespace' in self.request: del self.request[self.prefix + 'namespace'] return None
[docs] def continue_update(self) -> None: """Update query with continue parameters. .. versionadded:: 3.0 .. versionchanged:: 4.0 explicit return a bool value to be used in :meth:`generator` .. versionchanged:: 6.0 always return *False* .. versionchanged:: 8.4 return *None* instead of *False*. """ for key, value in self.data['continue'].items(): # query-continue can return ints (continue too?) if isinstance(value, int): value = str(value) self.request[key] = value
def _handle_query_limit(self, prev_limit, new_limit, had_data): """Handle query limit.""" if self.query_limit is None or self.limited_module is None: return prev_limit, new_limit prev_limit = new_limit if self.limit is None: new_limit = self.query_limit elif self.limit > 0: if had_data: # self.resultkey in data in last request.submit() new_limit = min(self.query_limit, self.limit - self._count) else: # only "(query-)continue" returned. See Bug T74209. # increase new_limit to advance faster until new # useful data are found again. new_limit = min(new_limit * 2, self.query_limit) else: new_limit = None if new_limit and 'rvprop' in self.request \ and 'content' in self.request['rvprop']: # queries that retrieve page content have lower limits # Note: although API allows up to 500 pages for content # queries, these sometimes result in server-side errors # so use 250 as a safer limit new_limit = min(new_limit, self.api_limit // 10, 250) if new_limit is not None: self.request[self.prefix + 'limit'] = str(new_limit) if prev_limit != new_limit: pywikibot.debug( '{name}: query_limit: {query}, api_limit: {api}, ' 'limit: {limit}, new_limit: {new}, count: {count}\n' '{name}: {prefix}limit: {value}' .format(name=self.__class__.__name__, query=self.query_limit, api=self.api_limit, limit=self.limit, new=new_limit, count=self._count, prefix=self.prefix, value=self.request[self.prefix + 'limit'])) return prev_limit, new_limit def _get_resultdata(self): """Get resultdata and verify result.""" resultdata = keys = self.data['query'][self.resultkey] if isinstance(resultdata, dict): keys = list(resultdata) if 'results' in resultdata: resultdata = resultdata['results'] elif 'pageids' in self.data['query']: # this ensures that page data will be iterated # in the same order as received from server resultdata = [resultdata[k] for k in self.data['query']['pageids']] else: resultdata = [resultdata[k] for k in sorted(resultdata)] pywikibot.debug( f'{type(self).__name__} received {keys}; limit={self.limit}') return resultdata def _extract_results(self, resultdata): """Extract results from resultdata.""" for item in resultdata: result = self.result(item) if self._namespaces and not self._check_result_namespace(result): continue yield result modules_item_intersection = set(self.modules) & set(item) if isinstance(item, dict) and modules_item_intersection: # if we need to count elements contained in items in # self.data["query"]["pages"], we want to count # item[self.modules] (e.g. 'revisions') and not # self.resultkey (i.e. 'pages') for key in modules_item_intersection: self._count += len(item[key]) # otherwise we proceed as usual else: self._count += 1 # note: self.limit could be -1 if self.limit and 0 < self.limit <= self._count: raise RuntimeError( 'QueryGenerator._extract_results reached the limit') @property def generator(self): """Submit request and iterate the response based on self.resultkey. Continues response as needed until limit (if any) is reached. .. versionchanged:: 7.6 changed from iterator method to generator property """ previous_result_had_data = True prev_limit = new_limit = None self._count = 0 while True: prev_limit, new_limit = self._handle_query_limit( prev_limit, new_limit, previous_result_had_data) if not hasattr(self, 'data'): self.data = self.request.submit() if not self.data or not isinstance(self.data, dict): pywikibot.debug(f'{type(self).__name__}: stopped iteration' ' because no dict retrieved from api.') break if 'query' in self.data and self.resultkey in self.data['query']: resultdata = self._get_resultdata() if 'normalized' in self.data['query']: self.normalized = { item['to']: item['from'] for item in self.data['query']['normalized']} else: self.normalized = {} try: yield from self._extract_results(resultdata) except RuntimeError: break # self.resultkey in data in last request.submit() previous_result_had_data = True else: if 'query' not in self.data: pywikibot.log("%s: 'query' not found in api response." % self.__class__.__name__) pywikibot.log(str(self.data)) # if (query-)continue is present, self.resultkey might not have # been fetched yet if self.continue_name not in self.data: break # No results. # self.resultkey not in data in last request.submit() # only "(query-)continue" was retrieved. previous_result_had_data = False if self.modules[0] == 'random': # "random" module does not return "(query-)continue" # now we loop for a new random query del self.data # a new request is needed continue if self.continue_name not in self.data: break self.continue_update() del self.data # a new request with continue is needed
[docs] def result(self, data): """Process result data as needed for particular subclass.""" return data
[docs] class PageGenerator(QueryGenerator): """Generator for response to a request of type action=query&generator=foo. This class can be used for any of the query types that are listed in the API documentation as being able to be used as a generator. Instances of this class iterate Page objects. """ def __init__( self, generator: str, g_content: bool = False, **kwargs ) -> None: """Initializer. Required and optional parameters are as for ``Request``, except that ``action=query`` is assumed and generator is required. .. versionchanged:: 9.1 retrieve the same imageinfo properties as in :meth:`APISite.loadimageinfo() <pywikibot.site._apisite.APISite.loadimageinfo>` with default parameters. :param generator: the "generator=" type from api.php :param g_content: if True, retrieve the contents of the current version of each Page (default False) """ # If possible, use self.request after __init__ instead of append_params def append_params(params, key, value) -> None: if key in params: params[key] += '|' + value else: params[key] = value kwargs = self._clean_kwargs(kwargs) parameters = kwargs['parameters'] # get some basic information about every page generated append_params(parameters, 'prop', 'info|imageinfo|categoryinfo') if g_content: # retrieve the current revision append_params(parameters, 'prop', 'revisions') append_params(parameters, 'rvprop', 'ids|timestamp|flags|comment|user|content') if not ('inprop' in parameters and 'protection' in parameters['inprop']): append_params(parameters, 'inprop', 'protection') append_params(parameters, 'iiprop', pywikibot.site._IIPROP) append_params(parameters, 'iilimit', 'max') # T194233 parameters['generator'] = generator super().__init__(**kwargs) self.resultkey = 'pages' # element to look for in result self.props = self.request['prop']
[docs] def result(self, pagedata: dict[str, Any]) -> pywikibot.Page: """Convert page dict entry from api to Page object. This can be overridden in subclasses to return a different type of object. .. versionchanged:: 9.5 No longer raise :exc:`exceptions.UnsupportedPageError` but return a generic :class:`pywikibot.Page` object. The exception is raised when getting the content for example. .. versionchanged:: 9.6 Upcast to :class:`page.FilePage` if *pagedata* has ``imageinfo`` contents even if the file extension is invalid. """ p = pywikibot.Page(self.site, pagedata['title'], pagedata['ns']) ns = pagedata['ns'] # Upcast to proper Page subclass. if ns == Namespace.USER: p = pywikibot.User(p) elif ns == Namespace.FILE: with suppress(ValueError): p = pywikibot.FilePage( p, ignore_extension='imageinfo' in pagedata) elif ns == Namespace.CATEGORY: p = pywikibot.Category(p) with suppress(UnsupportedPageError): update_page(p, pagedata, self.props) return p
[docs] class PropertyGenerator(QueryGenerator): """Generator for queries of type action=query&prop=foo. See the API documentation for types of page properties that can be queried. This generator yields one or more dict object(s) corresponding to each "page" item(s) from the API response; the calling module has to decide what to do with the contents of the dict. There will be one dict for each page queried via a titles= or ids= parameter (which must be supplied when instantiating this class). """ def __init__(self, prop: str, **kwargs) -> None: """Initializer. Required and optional parameters are as for ``Request``, except that action=query is assumed and prop is required. :param prop: the "prop=" type from api.php """ kwargs = self._clean_kwargs(kwargs, prop=prop) super().__init__(**kwargs) self._props = frozenset(prop.split('|')) self.resultkey = 'pages' @property def props(self): """The requested property names.""" return self._props @property def generator(self): """Yield results. .. versionchanged:: 7.6 changed from iterator method to generator property """ self._previous_dicts = {} yield from super().generator yield from self._previous_dicts.values() def _extract_results(self, resultdata): """Yield completed page_data of consecutive API requests.""" yield from self._fully_retrieved_data_dicts(resultdata) for data_dict in super()._extract_results(resultdata): if 'title' in data_dict: d = self._previous_dicts.setdefault(data_dict['title'], data_dict) if d is not data_dict: self._update_old_result_dict(d, data_dict) else: pywikibot.warn('Skipping result without title: ' + str(data_dict)) def _fully_retrieved_data_dicts(self, resultdata): """Yield items of self._previous_dicts that are not in resultdata.""" resultdata_titles = {d['title'] for d in resultdata if 'title' in d} for prev_title, prev_dict in self._previous_dicts.copy().items(): if prev_title not in resultdata_titles: yield prev_dict del self._previous_dicts[prev_title] @staticmethod def _update_old_result_dict(old_dict, new_dict) -> None: """Update old result dict with new_dict.""" for k, v in new_dict.items(): if isinstance(v, (str, int)): old_dict.setdefault(k, v) elif isinstance(v, list): old_dict.setdefault(k, []).extend(v) else: raise ValueError(f'continued API result had an unexpected ' f'type: {type(v).__name__}')
[docs] class ListGenerator(QueryGenerator): """Generator for queries of type action=query&list=foo. See the API documentation for types of lists that can be queried. Lists include both site-wide information (such as 'allpages') and page-specific information (such as 'backlinks'). This generator yields a dict object for each member of the list returned by the API, with the format of the dict depending on the particular list command used. For those lists that contain page information, it may be easier to use the PageGenerator class instead, as that will convert the returned information into a Page object. """ def __init__(self, listaction: str, **kwargs) -> None: """Initializer. Required and optional parameters are as for ``Request``, except that action=query is assumed and listaction is required. :param listaction: the "list=" type from api.php """ kwargs = self._clean_kwargs(kwargs, list=listaction) super().__init__(**kwargs)
[docs] class LogEntryListGenerator(ListGenerator): """Generator for queries of list 'logevents'. Yields LogEntry objects instead of dicts. """ def __init__(self, logtype=None, **kwargs) -> None: """Initializer.""" super().__init__('logevents', **kwargs) from pywikibot import logentries self.entryFactory = logentries.LogEntryFactory(self.site, logtype)
[docs] def result(self, pagedata): """Instantiate LogEntry from data from api.""" return self.entryFactory.create(pagedata)
def _check_result_namespace(self, result): """Return True if result.ns() is in self._namespaces.""" return result.ns() in self._namespaces
def _update_pageid(page, pagedict: dict): """Update pageid.""" if 'pageid' in pagedict: page._pageid = int(pagedict['pageid']) elif 'missing' in pagedict: page._pageid = 0 # Non-existent page else: # Something is wrong. if page.site.sametitle(page.title(), pagedict['title']) \ and 'invalid' in pagedict: raise InvalidTitleError(f"{page}: {pagedict['invalidreason']}") if int(pagedict['ns']) < 0: raise UnsupportedPageError(page) raise RuntimeError(f"Page {pagedict['title']} has neither 'pageid'" " nor 'missing' attribute") def _update_contentmodel(page, pagedict: dict) -> None: """Update page content model.""" page._contentmodel = pagedict.get('contentmodel') # can be None if (page._contentmodel and page._contentmodel == 'proofread-page' and 'proofread' in pagedict): page._quality = pagedict['proofread']['quality'] page._quality_text = pagedict['proofread']['quality_text'] def _update_protection(page, pagedict: dict) -> None: """Update page protection.""" if 'restrictiontypes' in pagedict: page._applicable_protections = set(pagedict['restrictiontypes']) else: page._applicable_protections = None page._protection = {item['type']: (item['level'], item['expiry']) for item in pagedict['protection']} def _update_revisions(page, revisions) -> None: """Update page revisions.""" for rev in revisions: revid = rev['revid'] revision = pywikibot.page.Revision(**rev) # do not overwrite an existing Revision if there is no content if revid in page._revisions and revision.text is None: # type: ignore[attr-defined] # noqa: E501 pass else: page._revisions[revid] = revision def _update_templates(page, templates) -> None: """Update page templates.""" templ_pages = {pywikibot.Page(page.site, tl['title']) for tl in templates} if hasattr(page, '_templates'): page._templates |= templ_pages else: page._templates = templ_pages def _update_categories(page, categories): """Update page categories.""" cat_pages = {pywikibot.Page(page.site, ct['title']) for ct in categories} if hasattr(page, '_categories'): page._categories |= cat_pages else: page._categories = cat_pages def _update_langlinks(page, langlinks) -> None: """Update page langlinks. .. versionadded:: 9.3 only add a language link if it is found in the family file. :meta public: """ links = set() for langlink in langlinks: with suppress(UnknownSiteError): link = pywikibot.Link.langlinkUnsafe(langlink['lang'], langlink['*'], source=page.site) links.add(link) if hasattr(page, '_langlinks'): page._langlinks |= links else: page._langlinks = links def _update_coordinates(page, coordinates) -> None: """Update page coordinates.""" coords = [] for co in coordinates: coord = pywikibot.Coordinate(lat=co['lat'], lon=co['lon'], typ=co.get('type', ''), name=co.get('name', ''), dim=int(co.get('dim', 0)) or None, globe=co['globe'], # See [[gerrit:67886]] primary='primary' in co ) coords.append(coord) page._coords = coords
[docs] def update_page(page: pywikibot.Page, pagedict: dict[str, Any], props: Iterable[str] | None = None) -> None: """Update attributes of *page*, based on query data in *pagedict*. :param page: object to be updated :param pagedict: the contents of a *page* element of a query response :param props: the property names which resulted in *pagedict*. If a missing value in *pagedict* can indicate both 'false' and 'not present' the property which would make the value present must be in the *props* parameter. :raises InvalidTitleError: Page title is invalid :raises UnsupportedPageError: Page with namespace < 0 is not supported yet """ _update_pageid(page, pagedict) _update_contentmodel(page, pagedict) props = props or [] # test for pagedict content only and call updater function for element in ('coordinates', 'revisions'): if element in pagedict: updater = globals()['_update_' + element] updater(page, pagedict[element]) # test for pagedict and props contents, call updater or set attribute for element in ('categories', 'langlinks', 'templates'): if element in pagedict: updater = globals()['_update_' + element] updater(page, pagedict[element]) elif element in props: setattr(page, '_' + element, set()) if 'info' in props: page._isredir = 'redirect' in pagedict if 'touched' in pagedict: page._timestamp = pagedict['touched'] if 'protection' in pagedict: _update_protection(page, pagedict) if 'lastrevid' in pagedict: page.latest_revision_id = pagedict['lastrevid'] if 'imageinfo' in pagedict: if not isinstance(page, pywikibot.FilePage): raise RuntimeError( f'"imageinfo" found but {page} is not a FilePage object') page._load_file_revisions(pagedict['imageinfo']) if 'categoryinfo' in pagedict: page._catinfo = pagedict['categoryinfo'] if 'pageimage' in pagedict: page._pageimage = pywikibot.FilePage(page.site, pagedict['pageimage']) if 'pageprops' in pagedict: page._pageprops = pagedict['pageprops'] elif 'pageprops' in props: page._pageprops = {} # preload is deprecated in MW 1.41, try preloadcontent first if 'preloadcontent' in pagedict: page._preloadedtext = pagedict['preloadcontent']['*'] elif 'preload' in pagedict: page._preloadedtext = pagedict['preload'] if 'flowinfo' in pagedict: page._flowinfo = pagedict['flowinfo']['flow'] if 'lintId' in pagedict: page._lintinfo = pagedict page._lintinfo.pop('pageid') page._lintinfo.pop('title') page._lintinfo.pop('ns') if 'imageforpage' in props and 'imagesforpage' in pagedict: # proofreadpage will work always on dicts # it serves also as workaround for T352482 page._imageforpage = pagedict['imagesforpage'] or {}