Source code for pywikibot.data.api._paraminfo

"""Object representing API parameter information."""
#
# (C) Pywikibot team, 2014-2024
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations

from collections.abc import Container, Sized
from typing import Any

import pywikibot
from pywikibot import config
from pywikibot.backports import Iterable, batched
from pywikibot.tools import classproperty, deprecated, remove_last_args


__all__ = ['ParamInfo']


[docs] class ParamInfo(Sized, Container): """API parameter information data object. Provides cache aware fetching of parameter information. .. seealso:: :api:`Parameter information` """ root_modules = frozenset(['main']) init_modules = frozenset(['main', 'paraminfo']) param_modules = ('list', 'meta', 'prop') @remove_last_args(['modules_only_mode']) def __init__(self, site, preloaded_modules: set[str] | None = None) -> None: """Initializer. .. deprecated:: 8.4 the *modules_only_mode* parameter :param preloaded_modules: API modules to preload """ self.site = site # Keys are module names, values are the raw responses from the server. self._paraminfo = {} # Cached data. self._prefixes = {} self._prefix_map = {} self._with_limits = None self._action_modules = frozenset() # top level modules self._modules = {} # filled in _init() (and enlarged in fetch) self._limit = None self._preloaded_modules = self.init_modules if preloaded_modules: self._preloaded_modules |= set(preloaded_modules) def _add_submodules(self, name: str, modules: set[str] | dict[str, str]) -> None: """Add the modules to the internal cache.""" assert '+' not in name if name == 'main': # The main module behaves differently as it has no prefix if self._action_modules: assert modules == self._action_modules else: self._action_modules = modules elif name in self._modules: # update required to updates from dict and set self._modules[name].update(modules) else: self._modules[name] = modules def _init(self): assert ('query' in self._modules) is ('main' in self._paraminfo) # Skip if ParamInfo is already initialized if 'query' in self._modules: return # Assume that it will be desirable to prefetch 'query' self._preloaded_modules |= {'query'} self._fetch(self._preloaded_modules) main_modules_param = self.parameter('main', 'action') assert main_modules_param assert 'type' in main_modules_param assert isinstance(main_modules_param['type'], list) assert self._action_modules == set(main_modules_param['type']) assert 'query' in self._modules assert 'query' in self._paraminfo # Retrieve all query submodules self._limit = 50 for param in self.param_modules: query_modules_param = self.parameter('query', param) self._limit = min(query_modules_param['limit'], self._limit) self._add_submodules('query', query_modules_param['submodules']) @staticmethod def _modules_to_set(modules: Iterable | str) -> set[str]: """Return modules as a set.""" if isinstance(modules, str): return set(modules.split('|')) return set(modules)
[docs] def fetch(self, modules: Iterable | str) -> None: """Fetch paraminfo for multiple modules. No exception is raised when paraminfo for a module does not exist. ``paraminfo[module]`` to cause an exception if a module does not exist. :param modules: API modules to load """ if 'main' not in self._paraminfo: # The first request should be 'paraminfo', so that # query modules can be prefixed with 'query+' self._init() modules = self._modules_to_set(modules) if self._action_modules: # The query module may be added before the action modules have been if 'query' in self._modules: # It does fetch() while initializing, and this method can't be # called before it's initialized. modules = self._normalize_modules(modules) else: # We do know the valid action modules and require a subset assert not modules - self._action_modules - self.root_modules self._fetch(modules)
def _fetch(self, modules: set | frozenset) -> None: """Get paraminfo for multiple modules without initializing beforehand. :param modules: API modules to load and which haven't been loaded yet. """ def module_generator(): """A generator yielding batches of modules.""" # T340617: self._limit is not set for the first modules # which is frozenset({'paraminfo', 'query', 'main'}) for batch in batched(sorted(modules), self._limit or 50): for failed_module in failed_modules: yield [failed_module] failed_modules.clear() yield list(batch) modules -= set(self._paraminfo) if not modules: return assert 'query' in self._modules or 'paraminfo' not in self._paraminfo # If something went wrong in a batch it can add each module to the # batch and the generator will on the next iteration yield each module # separately failed_modules = [] # This can be further optimised, by grouping them in more stable # subsets, which are unlikely to change. i.e. first request core # modules which have been a stable part of the API for a long time. # Also detecting extension based modules may help. for module_batch in module_generator(): params = { 'action': 'paraminfo', 'modules': module_batch, } # Request need ParamInfo to determine use_get request = self.site._request(expiry=config.API_config_expiry, use_get=True, parameters=params) result = request.submit() normalized_result = self.normalize_paraminfo(result) for path in list(normalized_result): if normalized_result[path] is False: del normalized_result[path] # Sometimes the name/path of the module is not actually the name # which was requested, so we need to manually determine which # (wrongly named) module uses which actual name. See also T105478 missing_modules = [m for m in module_batch if m not in normalized_result] if len(missing_modules) == 1 and len(normalized_result) == 1: # Okay it's possible to recover normalized_result = next(iter(normalized_result.values())) pywikibot.warning( f'The module "{normalized_result["name"]}" ' f'("{normalized_result["path"]}") was returned as path ' f'even though "{missing_modules[0]}" was requested' ) normalized_result['path'] = missing_modules[0] normalized_result['name'] = missing_modules[0].rsplit('+')[0] normalized_result = {missing_modules[0]: normalized_result} elif len(module_batch) > 1 and missing_modules: # Rerequest the missing ones separately pywikibot.log(f'Inconsistency in batch "{missing_modules}";' ' rerequest separately') failed_modules.extend(missing_modules) # Remove all modules which weren't requested, we can't be sure that # they are valid for path in list(normalized_result): if path not in module_batch: del normalized_result[path] self._paraminfo.update(normalized_result) for mod in normalized_result.values(): self._generate_submodules(mod['path']) def _generate_submodules(self, module) -> None: """Check and generate submodules for the given module.""" parameters = self._paraminfo[module].get('parameters', []) submodules = set() # This is supplying submodules even if they aren't submodules # of the given module so skip those for param in parameters: if module == 'main' and param['name'] == 'format' \ or 'submodules' not in param: continue for child, submodule in param['submodules'].items(): if '+' in submodule: parent = submodule.rsplit('+', 1)[0] else: parent = 'main' if parent == module: submodules.add(child) if submodules: self._add_submodules(module, submodules) if module == 'query': # Verify that submodules from generator are just a subset of the # prop/list/meta modules. for param in parameters: if param['name'] == 'generator': break else: raise RuntimeError( "'query' module has no 'generator' parameter") assert param['name'] == 'generator' \ and submodules >= set(param['type']) def _normalize_modules(self, modules) -> set: """Add query+ to any query module name not also in action modules.""" # Users will supply the wrong type, and expect it to work. modules = self._modules_to_set(modules) assert self._action_modules return {'query+' + mod if '+' not in mod and mod in self.query_modules and mod not in self._action_modules else mod for mod in modules}
[docs] def normalize_modules(self, modules) -> set: """Convert the modules into module paths. Add query+ to any query module name not also in action modules. :return: The modules converted into a module paths """ self._init() return self._normalize_modules(modules)
[docs] @staticmethod def normalize_paraminfo(data: dict[str, Any]) -> dict[str, Any]: """Convert API JSON into a new data structure with path as key. For duplicate paths, the value will be False. .. versionchanged:: 8.4 ``normalize_paraminfo`` became a staticmethod. """ result_data = {} modules_data = data['paraminfo'].get('modules', []) for mod_data in modules_data: if 'missing' in mod_data: continue path = mod_data['path'] if path not in result_data: result_data[path] = mod_data elif result_data[path] is not False: # Only warn first time result_data[path] = False pywikibot.warning(f'Path "{path}" is ambiguous.') else: pywikibot.log(f'Found another path "{path}"') return result_data
def __getitem__(self, key): """Return a paraminfo module for the module path, caching it. Use the module path, such as 'query+x', to obtain the paraminfo for submodule 'x' in the query module. If the key does not include a '+' and is not present in the top level of the API, it will fallback to looking for the key 'query+x'. """ self.fetch({key}) if key in self._paraminfo: return self._paraminfo[key] if '+' not in key: return self._paraminfo['query+' + key] raise KeyError(key) def __contains__(self, key) -> bool: """Return whether the key is valid.""" try: self[key] return True except KeyError: return False def __len__(self) -> int: """Return number of cached modules.""" return len(self._paraminfo)
[docs] def parameter( self, module: str, param_name: str ) -> dict[str, Any] | None: """Get details about one modules parameter. Returns None if the parameter does not exist. :param module: API module name :param param_name: parameter name in the module :return: metadata that describes how the parameter may be used """ try: module = self[module] except KeyError: raise ValueError(f"paraminfo for '{module}' not loaded") try: params = module['parameters'] except KeyError: pywikibot.warning(f"module '{module}' has no parameters") return None param_data = [param for param in params if param['name'] == param_name] if not param_data: return None if len(param_data) != 1: raise RuntimeError(f'parameter data length is eiter empty or not ' f'unique.\n{param_data}') return param_data[0]
@property def module_paths(self): """Set of all modules using their paths.""" # Load the submodules of all action modules available self.fetch(self.action_modules) modules = set(self.action_modules) for parent_module in self._modules: submodules = self.submodules(parent_module, path=True) assert not submodules & modules modules |= submodules return modules @property def action_modules(self): """Set of all action modules.""" self._init() return self._action_modules @property def query_modules(self): """Set of all query module names without query+ path prefix.""" return self.submodules('query')
[docs] def submodules(self, name: str, path: bool = False) -> set[str]: """Set of all submodules. :param name: The name of the parent module. :param path: Whether the path and not the name is returned. :return: The names or paths of the submodules. """ if name not in self._modules: self.fetch([name]) submodules = self._modules[name] if path: # prefix submodules submodules = {f'{name}+{mod}' for mod in submodules} return submodules
@property def prefix_map(self) -> dict[str, str]: """Mapping of module to its prefix for all modules with a prefix. This loads paraminfo for all modules. """ if not self._prefix_map: self._prefix_map = { module: prefix for module, prefix in self.attributes('prefix').items() if prefix } return self._prefix_map.copy()
[docs] def attributes(self, attribute: str, modules: set | None = None) -> dict[str, Any]: """Mapping of modules with an attribute to the attribute value. It will include all modules which have that attribute set, also if that attribute is empty or set to False. :param attribute: attribute name :param modules: modules to include. If None (default), it'll load all modules including all submodules using the paths. :return: dict using modules as keys """ if modules is None: modules = self.module_paths self.fetch(modules) return {mod: self[mod][attribute] for mod in modules if attribute in self[mod]}
@classproperty @deprecated(since='8.4.0') def paraminfo_keys(cls) -> frozenset[str]: """Return module types. .. deprecated:: 8.4 """ return frozenset(['modules']) @property @deprecated(since='8.4.0') def preloaded_modules(self) -> frozenset[str] | set[str]: """Return set of preloaded modules. .. deprecated:: 8.4 """ return self._preloaded_modules