Source code for pywikibot.data.api._paraminfo

"""Object representing API parameter information."""
#
# (C) Pywikibot team, 2014-2024
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations

from collections.abc import Container, Sized
from typing import Any

import pywikibot
from pywikibot import config
from pywikibot.backports import Iterable, batched
from pywikibot.tools import classproperty, deprecated, remove_last_args


__all__ = ['ParamInfo']



[docs]
class ParamInfo(Sized, Container):

    """API parameter information data object.

    Provides cache aware fetching of parameter information.

    .. seealso:: :api:`Parameter information`
    """

    root_modules = frozenset(['main'])
    init_modules = frozenset(['main', 'paraminfo'])
    param_modules = ('list', 'meta', 'prop')

    @remove_last_args(['modules_only_mode'])
    def __init__(self,
                 site,
                 preloaded_modules: set[str] | None = None) -> None:
        """Initializer.

        .. deprecated:: 8.4
           the *modules_only_mode* parameter

        :param preloaded_modules: API modules to preload
        """
        self.site = site

        # Keys are module names, values are the raw responses from the server.
        self._paraminfo = {}

        # Cached data.
        self._prefixes = {}
        self._prefix_map = {}
        self._with_limits = None

        self._action_modules = frozenset()  # top level modules
        self._modules = {}  # filled in _init() (and enlarged in fetch)
        self._limit = None

        self._preloaded_modules = self.init_modules
        if preloaded_modules:
            self._preloaded_modules |= set(preloaded_modules)

    def _add_submodules(self, name: str,
                        modules: set[str] | dict[str, str]) -> None:
        """Add the modules to the internal cache."""
        assert '+' not in name
        if name == 'main':
            # The main module behaves differently as it has no prefix
            if self._action_modules:
                assert modules == self._action_modules
            else:
                self._action_modules = modules
        elif name in self._modules:
            # update required to updates from dict and set
            self._modules[name].update(modules)
        else:
            self._modules[name] = modules

    def _init(self):
        assert ('query' in self._modules) is ('main' in self._paraminfo)

        # Skip if ParamInfo is already initialized
        if 'query' in self._modules:
            return

        # Assume that it will be desirable to prefetch 'query'
        self._preloaded_modules |= {'query'}

        self._fetch(self._preloaded_modules)

        main_modules_param = self.parameter('main', 'action')
        assert main_modules_param
        assert 'type' in main_modules_param
        assert isinstance(main_modules_param['type'], list)
        assert self._action_modules == set(main_modules_param['type'])
        assert 'query' in self._modules
        assert 'query' in self._paraminfo

        # Retrieve all query submodules
        self._limit = 50
        for param in self.param_modules:
            query_modules_param = self.parameter('query', param)
            self._limit = min(query_modules_param['limit'], self._limit)
            self._add_submodules('query', query_modules_param['submodules'])

    @staticmethod
    def _modules_to_set(modules: Iterable | str) -> set[str]:
        """Return modules as a set."""
        if isinstance(modules, str):
            return set(modules.split('|'))
        return set(modules)


[docs]
    def fetch(self, modules: Iterable | str) -> None:
        """Fetch paraminfo for multiple modules.

        No exception is raised when paraminfo for a module does not
        exist. ``paraminfo[module]`` to cause an exception if a module
        does not exist.

        :param modules: API modules to load
        """
        if 'main' not in self._paraminfo:
            # The first request should be 'paraminfo', so that
            # query modules can be prefixed with 'query+'
            self._init()

        modules = self._modules_to_set(modules)

        if self._action_modules:
            # The query module may be added before the action modules have been
            if 'query' in self._modules:
                # It does fetch() while initializing, and this method can't be
                # called before it's initialized.
                modules = self._normalize_modules(modules)
            else:
                # We do know the valid action modules and require a subset
                assert not modules - self._action_modules - self.root_modules

        self._fetch(modules)


    def _fetch(self, modules: set | frozenset) -> None:
        """
        Fetch paraminfo for multiple modules without initializing beforehand.

        :param modules: API modules to load and which haven't been loaded yet.
        """
        def module_generator():
            """A generator yielding batches of modules."""
            # T340617: self._limit is not set for the first modules
            # which is frozenset({'paraminfo', 'query', 'main'})
            for batch in batched(sorted(modules), self._limit or 50):
                for failed_module in failed_modules:
                    yield [failed_module]
                failed_modules.clear()
                yield list(batch)

        modules -= set(self._paraminfo)
        if not modules:
            return

        assert 'query' in self._modules or 'paraminfo' not in self._paraminfo

        # If something went wrong in a batch it can add each module to the
        # batch and the generator will on the next iteration yield each module
        # separately
        failed_modules = []

        # This can be further optimised, by grouping them in more stable
        # subsets, which are unlikely to change. i.e. first request core
        # modules which have been a stable part of the API for a long time.
        # Also detecting extension based modules may help.
        for module_batch in module_generator():
            params = {
                'action': 'paraminfo',
                'modules': module_batch,
            }

            # Request need ParamInfo to determine use_get
            request = self.site._request(expiry=config.API_config_expiry,
                                         use_get=True,
                                         parameters=params)
            result = request.submit()

            normalized_result = self.normalize_paraminfo(result)
            for path in list(normalized_result):
                if normalized_result[path] is False:
                    del normalized_result[path]

            # Sometimes the name/path of the module is not actually the name
            # which was requested, so we need to manually determine which
            # (wrongly named) module uses which actual name. See also T105478
            missing_modules = [m for m in module_batch
                               if m not in normalized_result]
            if len(missing_modules) == 1 and len(normalized_result) == 1:
                # Okay it's possible to recover
                normalized_result = next(iter(normalized_result.values()))
                pywikibot.warning('The module "{0[name]}" ("{0[path]}") '
                                  'was returned as path even though "{1}" '
                                  'was requested'.format(normalized_result,
                                                         missing_modules[0]))
                normalized_result['path'] = missing_modules[0]
                normalized_result['name'] = missing_modules[0].rsplit('+')[0]
                normalized_result = {missing_modules[0]: normalized_result}
            elif len(module_batch) > 1 and missing_modules:
                # Rerequest the missing ones separately
                pywikibot.log(f'Inconsistency in batch "{missing_modules}";'
                              ' rerequest separately')
                failed_modules.extend(missing_modules)

            # Remove all modules which weren't requested, we can't be sure that
            # they are valid
            for path in list(normalized_result):
                if path not in module_batch:
                    del normalized_result[path]

            self._paraminfo.update(normalized_result)
            for mod in normalized_result.values():
                self._generate_submodules(mod['path'])

    def _generate_submodules(self, module) -> None:
        """Check and generate submodules for the given module."""
        parameters = self._paraminfo[module].get('parameters', [])
        submodules = set()

        # This is supplying submodules even if they aren't submodules
        # of the given module so skip those
        for param in parameters:
            if module == 'main' and param['name'] == 'format' \
               or 'submodules' not in param:
                continue

            for child, submodule in param['submodules'].items():
                if '+' in submodule:
                    parent = submodule.rsplit('+', 1)[0]
                else:
                    parent = 'main'
                if parent == module:
                    submodules.add(child)

        if submodules:
            self._add_submodules(module, submodules)

        if module == 'query':
            # Verify that submodules from generator are just a subset of the
            # prop/list/meta modules.
            for param in parameters:
                if param['name'] == 'generator':
                    break
            else:
                raise RuntimeError(
                    "'query' module has no 'generator' parameter")

            assert param['name'] == 'generator' \
                and submodules >= set(param['type'])

    def _normalize_modules(self, modules) -> set:
        """Add query+ to any query module name not also in action modules."""
        # Users will supply the wrong type, and expect it to work.
        modules = self._modules_to_set(modules)

        assert self._action_modules

        return {'query+' + mod
                if '+' not in mod and mod in self.query_modules
                and mod not in self._action_modules
                else mod
                for mod in modules}


[docs]
    def normalize_modules(self, modules) -> set:
        """Convert the modules into module paths.

        Add query+ to any query module name not also in action modules.

        :return: The modules converted into a module paths
        """
        self._init()
        return self._normalize_modules(modules)



[docs]
    @staticmethod
    def normalize_paraminfo(data: dict[str, Any]) -> dict[str, Any]:
        """Convert API JSON into a new data structure with path as key.

        For duplicate paths, the value will be False.

        .. versionchanged:: 8.4
           ``normalize_paraminfo`` became a staticmethod.
        """
        result_data = {}
        modules_data = data['paraminfo'].get('modules', [])
        for mod_data in modules_data:
            if 'missing' in mod_data:
                continue

            path = mod_data['path']
            if path not in result_data:
                result_data[path] = mod_data
            elif result_data[path] is not False:
                # Only warn first time
                result_data[path] = False
                pywikibot.warning(f'Path "{path}" is ambiguous.')
            else:
                pywikibot.log(f'Found another path "{path}"')

        return result_data


    def __getitem__(self, key):
        """Return a paraminfo module for the module path, caching it.

        Use the module path, such as 'query+x', to obtain the paraminfo
        for submodule 'x' in the query module.

        If the key does not include a '+' and is not present in the top
        level of the API, it will fallback to looking for the key
        'query+x'.
        """
        self.fetch({key})
        if key in self._paraminfo:
            return self._paraminfo[key]
        if '+' not in key:
            return self._paraminfo['query+' + key]
        raise KeyError(key)

    def __contains__(self, key) -> bool:
        """Return whether the key is valid."""
        try:
            self[key]
            return True
        except KeyError:
            return False

    def __len__(self) -> int:
        """Return number of cached modules."""
        return len(self._paraminfo)


[docs]
    def parameter(
        self,
        module: str,
        param_name: str
    ) -> dict[str, Any] | None:
        """Get details about one modules parameter.

        Returns None if the parameter does not exist.

        :param module: API module name
        :param param_name: parameter name in the module
        :return: metadata that describes how the parameter may be used
        """
        try:
            module = self[module]
        except KeyError:
            raise ValueError(f"paraminfo for '{module}' not loaded")

        try:
            params = module['parameters']
        except KeyError:
            pywikibot.warning(f"module '{module}' has no parameters")
            return None

        param_data = [param for param in params if param['name'] == param_name]

        if not param_data:
            return None

        if len(param_data) != 1:
            raise RuntimeError(f'parameter data length is eiter empty or not '
                               f'unique.\n{param_data}')
        return param_data[0]


    @property
    def module_paths(self):
        """Set of all modules using their paths."""
        # Load the submodules of all action modules available
        self.fetch(self.action_modules)
        modules = set(self.action_modules)
        for parent_module in self._modules:
            submodules = self.submodules(parent_module, path=True)
            assert not submodules & modules
            modules |= submodules
        return modules

    @property
    def action_modules(self):
        """Set of all action modules."""
        self._init()
        return self._action_modules

    @property
    def query_modules(self):
        """Set of all query module names without query+ path prefix."""
        return self.submodules('query')


[docs]
    def submodules(self, name: str, path: bool = False) -> set[str]:
        """Set of all submodules.

        :param name: The name of the parent module.
        :param path: Whether the path and not the name is returned.
        :return: The names or paths of the submodules.
        """
        if name not in self._modules:
            self.fetch([name])
        submodules = self._modules[name]
        if path:
            # prefix submodules
            submodules = {f'{name}+{mod}' for mod in submodules}
        return submodules


    @property
    def prefix_map(self) -> dict[str, str]:
        """Mapping of module to its prefix for all modules with a prefix.

        This loads paraminfo for all modules.
        """
        if not self._prefix_map:
            self._prefix_map = {
                module: prefix
                for module, prefix in self.attributes('prefix').items()
                if prefix
            }
        return self._prefix_map.copy()


[docs]
    def attributes(self, attribute: str,
                   modules: set | None = None) -> dict[str, Any]:
        """Mapping of modules with an attribute to the attribute value.

        It will include all modules which have that attribute set, also
        if that attribute is empty or set to False.

        :param attribute: attribute name
        :param modules: modules to include. If None (default), it'll
            load all modules including all submodules using the paths.
        :return: dict using modules as keys
        """
        if modules is None:
            modules = self.module_paths
        self.fetch(modules)

        return {mod: self[mod][attribute]
                for mod in modules if attribute in self[mod]}


    @classproperty
    @deprecated(since='8.4.0')
    def paraminfo_keys(cls) -> frozenset[str]:
        """Return module types.

        .. deprecated:: 8.4
        """
        return frozenset(['modules'])

    @property
    @deprecated(since='8.4.0')
    def preloaded_modules(self) -> frozenset[str] | set[str]:
        """Return set of preloaded modules.

        .. deprecated:: 8.4
        """
        return self._preloaded_modules