Source code for pywikibot.data.api._paraminfo

"""Object representing API parameter information."""
#
# (C) Pywikibot team, 2014-2023
#
# Distributed under the terms of the MIT license.
#
from collections.abc import Container, Sized
from typing import Any, Optional, Union

import pywikibot
from pywikibot import config
from pywikibot.backports import Dict, FrozenSet, Iterable, Set, batched
from pywikibot.tools import classproperty, deprecated, remove_last_args


__all__ = ['ParamInfo']


[docs] class ParamInfo(Sized, Container): """API parameter information data object. Provides cache aware fetching of parameter information. .. seealso:: :api:`Parameter information` """ root_modules = frozenset(['main']) init_modules = frozenset(['main', 'paraminfo']) param_modules = ('list', 'meta', 'prop') @remove_last_args(['modules_only_mode']) def __init__(self, site, preloaded_modules: Optional[Set[str]] = None) -> None: """Initializer. .. deprecated:: 8.4 the *modules_only_mode* parameter :param preloaded_modules: API modules to preload """ self.site = site # Keys are module names, values are the raw responses from the server. self._paraminfo = {} # Cached data. self._prefixes = {} self._prefix_map = {} self._with_limits = None self._action_modules = frozenset() # top level modules self._modules = {} # filled in _init() (and enlarged in fetch) self._limit = None self._preloaded_modules = self.init_modules if preloaded_modules: self._preloaded_modules |= set(preloaded_modules) def _add_submodules(self, name: str, modules: Union[Set[str], Dict[str, str]]) -> None: """Add the modules to the internal cache.""" assert '+' not in name if name == 'main': # The main module behaves differently as it has no prefix if self._action_modules: assert modules == self._action_modules else: self._action_modules = modules elif name in self._modules: # update required to updates from dict and set self._modules[name].update(modules) else: self._modules[name] = modules def _init(self): assert ('query' in self._modules) is ('main' in self._paraminfo) # Skip if ParamInfo is already initialized if 'query' in self._modules: return # Assume that it will be desirable to prefetch 'query' self._preloaded_modules |= {'query'} self._fetch(self._preloaded_modules) main_modules_param = self.parameter('main', 'action') assert main_modules_param assert 'type' in main_modules_param assert isinstance(main_modules_param['type'], list) assert self._action_modules == set(main_modules_param['type']) assert 'query' in self._modules assert 'query' in self._paraminfo # Retrieve all query submodules self._limit = 50 for param in self.param_modules: query_modules_param = self.parameter('query', param) self._limit = min(query_modules_param['limit'], self._limit) self._add_submodules('query', query_modules_param['submodules']) @staticmethod def _modules_to_set(modules: Union[Iterable, str]) -> Set[str]: """Return modules as a set.""" if isinstance(modules, str): return set(modules.split('|')) return set(modules)
[docs] def fetch(self, modules: Union[Iterable, str]) -> None: """Fetch paraminfo for multiple modules. No exception is raised when paraminfo for a module does not exist. ``paraminfo[module]`` to cause an exception if a module does not exist. :param modules: API modules to load """ if 'main' not in self._paraminfo: # The first request should be 'paraminfo', so that # query modules can be prefixed with 'query+' self._init() modules = self._modules_to_set(modules) if self._action_modules: # The query module may be added before the action modules have been if 'query' in self._modules: # It does fetch() while initializing, and this method can't be # called before it's initialized. modules = self._normalize_modules(modules) else: # We do know the valid action modules and require a subset assert not modules - self._action_modules - self.root_modules self._fetch(modules)
def _fetch(self, modules: Union[set, frozenset]) -> None: """ Fetch paraminfo for multiple modules without initializing beforehand. :param modules: API modules to load and which haven't been loaded yet. """ def module_generator(): """A generator yielding batches of modules.""" # T340617: self._limit is not set for the first modules # which is frozenset({'paraminfo', 'query', 'main'}) for batch in batched(sorted(modules), self._limit or 50): for failed_module in failed_modules: yield [failed_module] failed_modules.clear() yield list(batch) modules -= set(self._paraminfo) if not modules: return assert 'query' in self._modules or 'paraminfo' not in self._paraminfo # If something went wrong in a batch it can add each module to the # batch and the generator will on the next iteration yield each module # separately failed_modules = [] # This can be further optimised, by grouping them in more stable # subsets, which are unlikely to change. i.e. first request core # modules which have been a stable part of the API for a long time. # Also detecting extension based modules may help. for module_batch in module_generator(): params = { 'action': 'paraminfo', 'modules': module_batch, } # Request need ParamInfo to determine use_get request = self.site._request(expiry=config.API_config_expiry, use_get=True, parameters=params) result = request.submit() normalized_result = self.normalize_paraminfo(result) for path in list(normalized_result): if normalized_result[path] is False: del normalized_result[path] # Sometimes the name/path of the module is not actually the name # which was requested, so we need to manually determine which # (wrongly named) module uses which actual name. See also T105478 missing_modules = [m for m in module_batch if m not in normalized_result] if len(missing_modules) == 1 and len(normalized_result) == 1: # Okay it's possible to recover normalized_result = next(iter(normalized_result.values())) pywikibot.warning('The module "{0[name]}" ("{0[path]}") ' 'was returned as path even though "{1}" ' 'was requested'.format(normalized_result, missing_modules[0])) normalized_result['path'] = missing_modules[0] normalized_result['name'] = missing_modules[0].rsplit('+')[0] normalized_result = {missing_modules[0]: normalized_result} elif len(module_batch) > 1 and missing_modules: # Rerequest the missing ones separately pywikibot.log(f'Inconsistency in batch "{missing_modules}";' ' rerequest separately') failed_modules.extend(missing_modules) # Remove all modules which weren't requested, we can't be sure that # they are valid for path in list(normalized_result): if path not in module_batch: del normalized_result[path] self._paraminfo.update(normalized_result) for mod in normalized_result.values(): self._generate_submodules(mod['path']) def _generate_submodules(self, module) -> None: """Check and generate submodules for the given module.""" parameters = self._paraminfo[module].get('parameters', []) submodules = set() # This is supplying submodules even if they aren't submodules # of the given module so skip those for param in parameters: if module == 'main' and param['name'] == 'format' \ or 'submodules' not in param: continue for child, submodule in param['submodules'].items(): if '+' in submodule: parent = submodule.rsplit('+', 1)[0] else: parent = 'main' if parent == module: submodules.add(child) if submodules: self._add_submodules(module, submodules) if module == 'query': # Verify that submodules from generator are just a subset of the # prop/list/meta modules. for param in parameters: if param['name'] == 'generator': break else: raise RuntimeError( "'query' module has no 'generator' parameter") assert param['name'] == 'generator' \ and submodules >= set(param['type']) def _normalize_modules(self, modules) -> set: """Add query+ to any query module name not also in action modules.""" # Users will supply the wrong type, and expect it to work. modules = self._modules_to_set(modules) assert self._action_modules return {'query+' + mod if '+' not in mod and mod in self.query_modules and mod not in self._action_modules else mod for mod in modules}
[docs] def normalize_modules(self, modules) -> set: """Convert the modules into module paths. Add query+ to any query module name not also in action modules. :return: The modules converted into a module paths """ self._init() return self._normalize_modules(modules)
[docs] @staticmethod def normalize_paraminfo(data: Dict[str, Any]) -> Dict[str, Any]: """Convert API JSON into a new data structure with path as key. For duplicate paths, the value will be False. .. versionchanged:: 8.4 ``normalize_paraminfo`` became a staticmethod. """ result_data = {} modules_data = data['paraminfo'].get('modules', []) for mod_data in modules_data: if 'missing' in mod_data: continue path = mod_data['path'] if path not in result_data: result_data[path] = mod_data elif result_data[path] is not False: # Only warn first time result_data[path] = False pywikibot.warning(f'Path "{path}" is ambiguous.') else: pywikibot.log(f'Found another path "{path}"') return result_data
def __getitem__(self, key): """Return a paraminfo module for the module path, caching it. Use the module path, such as 'query+x', to obtain the paraminfo for submodule 'x' in the query module. If the key does not include a '+' and is not present in the top level of the API, it will fallback to looking for the key 'query+x'. """ self.fetch({key}) if key in self._paraminfo: return self._paraminfo[key] if '+' not in key: return self._paraminfo['query+' + key] raise KeyError(key) def __contains__(self, key) -> bool: """Return whether the key is valid.""" try: self[key] return True except KeyError: return False def __len__(self) -> int: """Return number of cached modules.""" return len(self._paraminfo)
[docs] def parameter( self, module: str, param_name: str ) -> Optional[Dict[str, Any]]: """Get details about one modules parameter. Returns None if the parameter does not exist. :param module: API module name :param param_name: parameter name in the module :return: metadata that describes how the parameter may be used """ try: module = self[module] except KeyError: raise ValueError(f"paraminfo for '{module}' not loaded") try: params = module['parameters'] except KeyError: pywikibot.warning(f"module '{module}' has no parameters") return None param_data = [param for param in params if param['name'] == param_name] if not param_data: return None if len(param_data) != 1: raise RuntimeError(f'parameter data length is eiter empty or not ' f'unique.\n{param_data}') return param_data[0]
@property def module_paths(self): """Set of all modules using their paths.""" # Load the submodules of all action modules available self.fetch(self.action_modules) modules = set(self.action_modules) for parent_module in self._modules: submodules = self.submodules(parent_module, path=True) assert not submodules & modules modules |= submodules return modules @property def action_modules(self): """Set of all action modules.""" self._init() return self._action_modules @property def query_modules(self): """Set of all query module names without query+ path prefix.""" return self.submodules('query')
[docs] def submodules(self, name: str, path: bool = False) -> Set[str]: """Set of all submodules. :param name: The name of the parent module. :param path: Whether the path and not the name is returned. :return: The names or paths of the submodules. """ if name not in self._modules: self.fetch([name]) submodules = self._modules[name] if path: # prefix submodules submodules = {f'{name}+{mod}' for mod in submodules} return submodules
@property def prefix_map(self) -> Dict[str, str]: """Mapping of module to its prefix for all modules with a prefix. This loads paraminfo for all modules. """ if not self._prefix_map: self._prefix_map = { module: prefix for module, prefix in self.attributes('prefix').items() if prefix } return self._prefix_map.copy()
[docs] def attributes(self, attribute: str, modules: Optional[set] = None) -> Dict[str, Any]: """Mapping of modules with an attribute to the attribute value. It will include all modules which have that attribute set, also if that attribute is empty or set to False. :param attribute: attribute name :param modules: modules to include. If None (default), it'll load all modules including all submodules using the paths. :return: dict using modules as keys """ if modules is None: modules = self.module_paths self.fetch(modules) return {mod: self[mod][attribute] for mod in modules if attribute in self[mod]}
@classproperty @deprecated(since='8.4.0') def paraminfo_keys(cls) -> FrozenSet[str]: """Return module types. .. deprecated:: 8.4.0 """ return frozenset(['modules']) @property @deprecated(since='8.4.0') def preloaded_modules(self) -> Union[FrozenSet[str], Set[str]]: """Return set of preloaded modules. .. deprecated:: 8.4.0 """ return self._preloaded_modules