"""GeneratorFactory module wich handles pagegenerators options."""
#
# (C) Pywikibot team, 2008-2024
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations
import itertools
import re
import sys
from datetime import timedelta
from functools import partial
from itertools import zip_longest
from typing import TYPE_CHECKING, Any
import pywikibot
from pywikibot import i18n
from pywikibot.backports import Callable, Iterable, Sequence, removeprefix
from pywikibot.bot import ShowingListOption
from pywikibot.data import api
from pywikibot.exceptions import (
ArgumentDeprecationWarning,
UnknownExtensionError,
)
from pywikibot.pagegenerators._filters import (
CategoryFilterPageGenerator,
ItemClaimFilterPageGenerator,
NamespaceFilterPageGenerator,
QualityFilterPageGenerator,
RedirectFilterPageGenerator,
RegexBodyFilterPageGenerator,
RegexFilterPageGenerator,
SubpageFilterGenerator,
)
from pywikibot.pagegenerators._generators import (
CategorizedPageGenerator,
GoogleSearchPageGenerator,
LanguageLinksPageGenerator,
LiveRCPageGenerator,
LogeventsPageGenerator,
MySQLPageGenerator,
NewimagesPageGenerator,
NewpagesPageGenerator,
PagePilePageGenerator,
PrefixingPageGenerator,
RecentChangesPageGenerator,
SubCategoriesPageGenerator,
SupersetPageGenerator,
TextIOPageGenerator,
UserContributionsGenerator,
WikibaseSearchItemPageGenerator,
WikidataSPARQLPageGenerator,
)
from pywikibot.tools import issue_deprecation_warning, strtobool
from pywikibot.tools.collections import DequeGenerator
from pywikibot.tools.itertools import (
filter_unique,
intersect_generators,
roundrobin_generators,
)
if TYPE_CHECKING:
from typing_extensions import Literal
from pywikibot.site import BaseSite, Namespace
HANDLER_GEN_TYPE = Iterable[pywikibot.page.BasePage]
GEN_FACTORY_CLAIM_TYPE = list[tuple[str, str, dict[str, str], bool]]
OPT_GENERATOR_TYPE = HANDLER_GEN_TYPE | None
# This is the function that will be used to de-duplicate page iterators.
_filter_unique_pages = partial(
filter_unique, key=lambda page: '{}:{}:{}'.format(*page._cmpkey()))
[docs]
class GeneratorFactory:
"""Process command line arguments and return appropriate page generator.
This factory is responsible for processing command line arguments
that are used by many scripts and that determine which pages to work on.
.. note:: GeneratorFactory must be instantiated after global
arguments are parsed except if site parameter is given.
"""
def __init__(self, site: BaseSite | None = None,
positional_arg_name: str | None = None,
enabled_options: Iterable[str] | None = None,
disabled_options: Iterable[str] | None = None) -> None:
"""Initializer.
:param site: Site for generator results
:param positional_arg_name: generator to use for positional args,
which do not begin with a hyphen
:param enabled_options: only enable options given by this Iterable.
This is priorized over disabled_options
:param disabled_options: disable these given options and let them
be handled by scripts options handler
"""
self.gens: list[Iterable[pywikibot.page.BasePage]] = []
self._namespaces: list[str] | frozenset[Namespace] = []
self.limit: int | None = None
self.qualityfilter_list: list[int] = []
self.articlefilter_list: list[str] = []
self.articlenotfilter_list: list[str] = []
self.titlefilter_list: list[str] = []
self.titlenotfilter_list: list[str] = []
self.claimfilter_list: GEN_FACTORY_CLAIM_TYPE = []
self.catfilter_list: list[pywikibot.Category] = []
self.intersect = False
self.subpage_max_depth: int | None = None
self.redirectfilter: bool | None = None
self._site = site
self._positional_arg_name = positional_arg_name
self._sparql: str | None = None
self.nopreload = False
self._validate_options(enabled_options, disabled_options)
self.is_preloading: bool | None = None
"""Return whether Page objects are preloaded. You may use this
instance variable after :meth:`getCombinedGenerator` is called
e.g.::
gen_factory = GeneratorFactory()
print(gen_factory.is_preloading) # None
gen = gen_factory.getCombinedGenerator()
print(gen_factory.is_preloading) # True or False
Otherwise the value is undefined and gives None.
.. versionadded:: 7.3
"""
def _validate_options(self,
enable: Iterable[str] | None,
disable: Iterable[str] | None) -> None:
"""Validate option restrictions."""
msg = '{!r} is not a valid pagegenerators option to be '
enable = enable or []
disable = disable or []
self.enabled_options = set(enable)
self.disabled_options = set(disable)
for opt in enable:
if not hasattr(self, '_handle_' + opt):
pywikibot.warning((msg + 'enabled').format(opt))
self.enabled_options.remove(opt)
for opt in disable:
if not hasattr(self, '_handle_' + opt):
pywikibot.warning((msg + 'disabled').format(opt))
self.disabled_options.remove(opt)
if self.enabled_options and self.disabled_options:
pywikibot.warning('Ignoring disabled option because enabled '
'options are set.')
self.disabled_options = set()
@property
def site(self) -> pywikibot.site.BaseSite:
"""Generator site.
The generator site should not be accessed until after the global
arguments have been handled, otherwise the default Site may be changed
by global arguments, which will cause this cached value to be stale.
:return: Site given to initializer, otherwise the default Site at the
time this property is first accessed.
"""
if self._site is None:
self._site = pywikibot.Site()
return self._site
@property
def namespaces(self) -> frozenset[pywikibot.site.Namespace]:
"""List of Namespace parameters.
Converts int or string namespaces to Namespace objects and
change the storage to immutable once it has been accessed.
The resolving and validation of namespace command line arguments
is performed in this method, as it depends on the site property
which is lazy loaded to avoid being cached before the global
arguments are handled.
:return: namespaces selected using arguments
:raises KeyError: a namespace identifier was not resolved
:raises TypeError: a namespace identifier has an inappropriate
type such as NoneType or bool
"""
if isinstance(self._namespaces, list):
self._namespaces = frozenset(
self.site.namespaces.resolve(self._namespaces))
return self._namespaces
[docs]
def getCombinedGenerator(self, # noqa: N802
gen: OPT_GENERATOR_TYPE = None,
preload: bool = False) -> OPT_GENERATOR_TYPE:
"""Return the combination of all accumulated generators.
Only call this after all arguments have been parsed.
.. versionchanged:: 7.3
set the instance variable :attr:`is_preloading` to True or False.
.. versionchanged:: 8.0
if ``limit`` option is set and multiple generators are given,
pages are yieded in a :func:`roundrobin
<tools.itertools.roundrobin_generators>` way.
:param gen: Another generator to be combined with
:param preload: preload pages using PreloadingGenerator
unless self.nopreload is True
"""
if gen:
self.gens.insert(0, gen)
for i, gen_item in enumerate(self.gens):
if self.namespaces:
if (isinstance(gen_item, api.QueryGenerator)
and gen_item.support_namespace()):
gen_item.set_namespace(self.namespaces)
# QueryGenerator does not support namespace param.
else:
self.gens[i] = NamespaceFilterPageGenerator(
gen_item, self.namespaces, self.site)
if self.limit:
try:
gen_item.set_maximum_items(self.limit) # type: ignore[attr-defined] # noqa: E501
except AttributeError:
self.gens[i] = itertools.islice(gen_item, self.limit)
if not self.gens:
if any((self.titlefilter_list,
self.titlenotfilter_list,
self.articlefilter_list,
self.articlenotfilter_list,
self.claimfilter_list,
self.catfilter_list,
self.qualityfilter_list,
self.subpage_max_depth is not None,
self.redirectfilter is not None)):
pywikibot.warning('filter(s) specified but no generators.')
return None
if len(self.gens) == 1:
dupfiltergen = self.gens[0]
if hasattr(self, '_single_gen_filter_unique'):
dupfiltergen = _filter_unique_pages(dupfiltergen)
if self.intersect:
pywikibot.warning(
'"-intersect" ignored as only one generator is specified.')
elif self.intersect:
# By definition no duplicates are possible.
dupfiltergen = intersect_generators(*self.gens)
else:
combine = roundrobin_generators if self.limit else itertools.chain
dupfiltergen = _filter_unique_pages(combine(*self.gens))
# Add on subpage filter generator
if self.subpage_max_depth is not None:
dupfiltergen = SubpageFilterGenerator(
dupfiltergen, self.subpage_max_depth)
if self.redirectfilter is not None:
# Generator expects second parameter true to exclude redirects, but
# our logic is true to assert it is a redirect, false when it isn't
dupfiltergen = RedirectFilterPageGenerator(
dupfiltergen, not self.redirectfilter)
if self.claimfilter_list:
for claim in self.claimfilter_list:
dupfiltergen = ItemClaimFilterPageGenerator(dupfiltergen,
claim[0], claim[1],
claim[2], claim[3])
if self.qualityfilter_list:
dupfiltergen = QualityFilterPageGenerator(
dupfiltergen, self.qualityfilter_list)
if self.titlefilter_list:
dupfiltergen = RegexFilterPageGenerator(
dupfiltergen, self.titlefilter_list)
if self.titlenotfilter_list:
dupfiltergen = RegexFilterPageGenerator(
dupfiltergen, self.titlenotfilter_list, 'none')
if self.catfilter_list:
dupfiltergen = CategoryFilterPageGenerator(
dupfiltergen, self.catfilter_list)
self.is_preloading = not self.nopreload and bool(
preload or self.articlefilter_list or self.articlenotfilter_list)
if self.is_preloading:
if isinstance(dupfiltergen, DequeGenerator):
preloadgen = pywikibot.pagegenerators.DequePreloadingGenerator
else:
preloadgen = pywikibot.pagegenerators.PreloadingGenerator
dupfiltergen = preloadgen(dupfiltergen)
if self.articlefilter_list:
dupfiltergen = RegexBodyFilterPageGenerator(
dupfiltergen, self.articlefilter_list)
if self.articlenotfilter_list:
dupfiltergen = RegexBodyFilterPageGenerator(
dupfiltergen, self.articlenotfilter_list, 'none')
return dupfiltergen
[docs]
def getCategory(self, category: str # noqa: N802
) -> tuple[pywikibot.Category, str | None]:
"""Return Category and start as defined by category.
:param category: category name with start parameter
"""
if not category:
category = i18n.input('pywikibot-enter-category-name')
category = category.replace('#', '|')
startfrom: str | None = None
category, _, startfrom = category.partition('|')
if not startfrom:
startfrom = None
# Insert "Category:" before category name to avoid parsing problems in
# Link.parse() when categoryname contains ":";
# Part before ":" might be interpreted as an interwiki prefix
prefix = category.split(':', 1)[0] # whole word if ":" not present
if prefix not in self.site.namespaces[14]:
category = f'{self.site.namespace(14)}:{category}'
cat = pywikibot.Category(pywikibot.Link(category,
source=self.site,
default_namespace=14))
return cat, startfrom
[docs]
def getCategoryGen(self, category: str, # noqa: N802
recurse: int | bool = False,
content: bool = False,
gen_func: Callable | None = None) -> Any:
"""Return generator based on Category defined by category and gen_func.
:param category: category name with start parameter
:param recurse: if not False or 0, also iterate articles in
subcategories. If an int, limit recursion to this number of
levels. (Example: recurse=1 will iterate articles in first-level
subcats, but no deeper.)
:param content: if True, retrieve the content of the current version
of each page (default False)
"""
if gen_func is None:
raise ValueError('getCategoryGen requires a gen_func argument')
cat, startfrom = self.getCategory(category)
return gen_func(cat,
start=startfrom,
recurse=recurse,
content=content)
@staticmethod
def _parse_log_events(
logtype: str,
user: str | None = None,
start: str | None = None,
end: str | None = None,
) -> Iterable[pywikibot.page.BasePage] | None:
"""Parse the -logevent argument information.
.. deprecated:: 9.2
the *start* parameter as total amount of pages.
:param logtype: A valid logtype
:param user: A username associated to the log events. Ignored if
empty string or None.
:param start: Timestamp to start listing from. This must be
convertible into Timestamp matching '%Y%m%d%H%M%S'.
:param end: Timestamp to end listing at. This must be
convertible into a Timestamp matching '%Y%m%d%H%M%S'.
:return: The generator or None if invalid 'start/total' or 'end'
value.
"""
def parse_start(
start: str | None
) -> tuple[pywikibot.Timestamp | None, int | None]:
"""Parse start and return (start, total)."""
if not start:
return None, None
if len(start) >= 8:
return pywikibot.Timestamp.fromtimestampformat(start), None
instead = (f'-limit option like "-logevents:{logtype}'
f'{"," if user else ""}{user} -limit:{start}"')
issue_deprecation_warning('-logevents with total argument',
instead,
warning_class=ArgumentDeprecationWarning,
since='9.2.0')
return None, int(start)
try:
start_, total = parse_start(start)
except ValueError as err:
pywikibot.error(
f'{err}. Start parameter has wrong format!')
return None
if total is not None and total < 0:
pywikibot.error(f'Total number of log ({start}) events must be a'
' positive int.')
return None
if end is None:
end_ = None
else:
try:
end_ = pywikibot.Timestamp.fromtimestampformat(end)
except ValueError as err:
pywikibot.error(
f'{err}. End parameter has wrong format!')
return None
if start_ or end_:
pywikibot.info(
'Fetching log events in range: {} - {}.'
.format(end or 'beginning of time', start or 'now'))
# 'user or None', because user might be an empty string when
# 'foo,,bar' was used.
return LogeventsPageGenerator(logtype, user or None, total=total,
start=start_, end=end_)
def _handle_filelinks(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-filelinks` argument."""
if not value:
value = i18n.input('pywikibot-enter-file-links-processing')
if not value.startswith(self.site.namespace(6) + ':'):
value = 'Image:' + value
file_page = pywikibot.FilePage(self.site, value)
return file_page.using_pages()
def _handle_linter(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-linter` argument."""
if not self.site.has_extension('Linter'):
raise UnknownExtensionError(
'-linter needs a site with Linter extension.')
cats = self.site.siteinfo.get('linter') # Get linter categories.
valid_cats = [c for _list in cats.values() for c in _list]
value = value or ''
lint_from: str | None = None
cat, _, lint_from = value.partition('/')
lint_from = lint_from or None
def show_available_categories(cats: dict[
str, Sequence[pywikibot.Category]]
) -> None:
_i = ' ' * 4
_2i = 2 * _i
txt = 'Available categories of lint errors:\n'
for prio, _list in cats.items():
txt += f'{_i}{prio}\n'
txt += ''.join(
f'{_2i}{c}\n' for c in _list)
pywikibot.info(txt)
if cat == 'show': # Display categories of lint errors.
show_available_categories(cats)
sys.exit(0)
if not cat:
lint_cats = valid_cats
elif cat in ['low', 'medium', 'high']:
lint_cats = cats[cat]
else:
lint_cats = cat.split(',')
assert set(lint_cats) <= set(valid_cats), \
f'Invalid category of lint errors: {cat}'
return self.site.linter_pages(
lint_categories='|'.join(lint_cats), namespaces=self.namespaces,
lint_from=lint_from)
def _handle_querypage(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-querypage` argument."""
if value is None: # Display special pages.
pages = self.site._paraminfo.parameter('query+querypage',
'page')
pages = sorted(pages['type'])
limit = self.site._paraminfo.parameter('query+querypage',
'limit')
max_w = max(len(p) for p in pages[::2]) + 4
txt = 'Available special pages:\n'
for a, b in zip_longest(pages[::2], pages[1::2], fillvalue=''):
txt += ' {a:<{max_w}}{b}\n'.format(a=a, b=b, max_w=max_w)
txt += ('\nMaximum number of pages to return is {max} '
'({highmax} for bots).\n'.format_map(limit))
pywikibot.info(txt)
sys.exit(0)
return self.site.querypage(value)
def _handle_url(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-url` argument."""
if not value:
value = pywikibot.input('Please enter the URL:')
return TextIOPageGenerator(value, site=self.site)
def _handle_unusedfiles(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-unusedfiles` argument."""
return self.site.unusedfiles(total=_int_none(value))
def _handle_lonelypages(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-lonelypages` argument."""
return self.site.lonelypages(total=_int_none(value))
def _handle_unwatched(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-unwatched` argument."""
return self.site.unwatchedpage(total=_int_none(value))
def _handle_wantedpages(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-wantedpages` argument."""
return self.site.wantedpages(total=_int_none(value))
def _handle_wantedfiles(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-wantedfiles` argument."""
return self.site.wantedfiles(total=_int_none(value))
def _handle_wantedtemplates(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-wantedtemplates` argument."""
return self.site.wantedtemplates(total=_int_none(value))
def _handle_wantedcategories(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-wantedcategories` argument."""
return self.site.wantedcategories(total=_int_none(value))
def _handle_property(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-property` argument."""
if not value:
question = 'Which property name to be used?'
value = pywikibot.input(question + ' (List [?])')
pnames = self.site.get_property_names()
# also use the default by <enter> key
if value == '?' or value not in pnames:
_, value = pywikibot.input_choice(question,
ShowingListOption(pnames))
return self.site.pages_with_property(value)
def _handle_usercontribs(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-usercontribs` argument."""
self._single_gen_filter_unique = True
return UserContributionsGenerator(
value, site=self.site, _filter_unique=None)
def _handle_withoutinterwiki(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-withoutinterwiki` argument."""
return self.site.withoutinterwiki(total=_int_none(value))
def _handle_interwiki(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-interwiki` argument."""
if not value:
value = i18n.input('pywikibot-enter-page-processing')
page = pywikibot.Page(pywikibot.Link(value, self.site))
return LanguageLinksPageGenerator(page)
def _handle_randomredirect(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-randomredirect` argument."""
# partial workaround for bug T119940
# to use -namespace/ns with -randomredirect, -ns must be given
# before -randomredirect
# otherwise default namespace is 0
namespaces = self.namespaces or 0
return self.site.randompages(total=_int_none(value),
namespaces=namespaces, redirects=True)
def _handle_random(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-random` argument."""
# partial workaround for bug T119940
# to use -namespace/ns with -random, -ns must be given
# before -random
# otherwise default namespace is 0
namespaces = self.namespaces or 0
return self.site.randompages(total=_int_none(value),
namespaces=namespaces)
def _handle_recentchanges(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-recentchanges` argument."""
rcstart = None
rcend = None
rctag = None
total = None
params = value.split(',') if value else []
if params and not params[0].isdigit():
rctag = params.pop(0)
if len(params) > 2:
raise ValueError('More than two parameters passed.')
if len(params) == 2:
offset = float(params[0])
duration = float(params[1])
if offset < 0 or duration < 0:
raise ValueError('Negative valued parameters passed.')
ts_time = self.site.server_time()
rcstart = ts_time - timedelta(minutes=offset)
rcend = rcstart - timedelta(minutes=duration)
elif len(params) == 1:
total = int(params[0])
self._single_gen_filter_unique = True
return RecentChangesPageGenerator(
namespaces=self.namespaces, total=total, start=rcstart, end=rcend,
site=self.site, tag=rctag)
def _handle_liverecentchanges(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-liverecentchanges` argument."""
self.nopreload = True
return LiveRCPageGenerator(site=self.site, total=_int_none(value))
def _handle_file(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-file` argument."""
if not value:
value = pywikibot.input('Please enter the local file name:')
return TextIOPageGenerator(value, site=self.site)
def _handle_namespaces(self, value: str) -> Literal[True]:
"""Handle `-namespaces` argument."""
if isinstance(self._namespaces, frozenset):
raise RuntimeError('-namespace/ns option must be provided before '
'-newpages/-random/-randomredirect/-linter')
if not value:
value = pywikibot.input('What namespace are you filtering on?')
not_key = 'not:'
if value.startswith(not_key):
value = removeprefix(value, not_key)
resolve = self.site.namespaces.resolve
not_ns = set(resolve(value.split(',')))
if not self._namespaces:
self._namespaces = list(
set(self.site.namespaces.values()) - not_ns)
else:
self._namespaces = list(
set(resolve(self._namespaces)) - not_ns)
else:
self._namespaces += value.split(',')
return True
_handle_ns = _handle_namespaces
_handle_namespace = _handle_namespaces
def _handle_limit(self, value: str) -> Literal[True]:
"""Handle `-limit` argument."""
if not value:
value = pywikibot.input('What is the limit value?')
self.limit = _int_none(value)
return True
def _handle_category(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-category` argument."""
return self.getCategoryGen(
value, recurse=False, gen_func=CategorizedPageGenerator)
_handle_cat = _handle_category
def _handle_catr(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-catr` argument."""
return self.getCategoryGen(
value, recurse=True, gen_func=CategorizedPageGenerator)
def _handle_subcats(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-subcats` argument."""
return self.getCategoryGen(
value, recurse=False, gen_func=SubCategoriesPageGenerator)
def _handle_subcatsr(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-subcatsr` argument."""
return self.getCategoryGen(
value, recurse=True, gen_func=SubCategoriesPageGenerator)
def _handle_catfilter(self, value: str) -> Literal[True]:
"""Handle `-catfilter` argument."""
cat, _ = self.getCategory(value)
self.catfilter_list.append(cat)
return True
def _handle_page(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-page` argument."""
if not value:
value = pywikibot.input('What page do you want to use?')
return [pywikibot.Page(pywikibot.Link(value, self.site))]
def _handle_pageid(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-pageid` argument."""
if not value:
value = pywikibot.input('What pageid do you want to use?')
return self.site.load_pages_from_pageids(value)
def _handle_uncatfiles(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-uncatfiles` argument."""
return self.site.uncategorizedimages()
def _handle_uncatcat(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-uncatcat` argument."""
return self.site.uncategorizedcategories()
def _handle_uncat(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-uncat` argument."""
return self.site.uncategorizedpages()
def _handle_ref(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-ref` argument."""
if not value:
value = pywikibot.input(
'Links to which page should be processed?')
page = pywikibot.Page(pywikibot.Link(value, self.site))
return page.getReferences()
def _handle_links(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-links` argument."""
if not value:
value = pywikibot.input(
'Links from which page should be processed?')
page = pywikibot.Page(pywikibot.Link(value, self.site))
return page.linkedPages()
def _handle_weblink(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-weblink` argument."""
if not value:
value = pywikibot.input(
'Pages with which weblink should be processed?')
return self.site.exturlusage(value)
def _handle_transcludes(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-transcludes` argument."""
if not value:
value = pywikibot.input(
'Pages that transclude which page should be processed?')
page = pywikibot.Page(pywikibot.Link(value,
default_namespace=10,
source=self.site))
return page.getReferences(only_template_inclusion=True)
def _handle_start(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-start` argument."""
if not value:
value = '!'
firstpagelink = pywikibot.Link(value, self.site)
return self.site.allpages(
start=firstpagelink.title, namespace=firstpagelink.namespace,
filterredir=False)
def _handle_prefixindex(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-prefixindex` argument."""
if not value:
value = pywikibot.input('What page names are you looking for?')
return PrefixingPageGenerator(prefix=value, site=self.site)
def _handle_newimages(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-newimages` argument."""
return NewimagesPageGenerator(total=_int_none(value), site=self.site)
def _handle_newpages(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-newpages` argument."""
# partial workaround for bug T69249
# to use -namespace/ns with -newpages, -ns must be given
# before -newpages
# otherwise default namespace is 0
namespaces = self.namespaces or 0
return NewpagesPageGenerator(
namespaces=namespaces, total=_int_none(value), site=self.site)
def _handle_unconnectedpages(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-unconnectedpages` argument."""
return self.site.unconnected_pages(total=_int_none(value))
def _handle_imagesused(
self,
value: str,
) -> Iterable[pywikibot.FilePage]:
"""Handle `-imagesused` argument."""
if not value:
value = pywikibot.input(
'Images on which page should be processed?')
page = pywikibot.Page(pywikibot.Link(value, self.site))
return page.imagelinks()
def _handle_searchitem(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-searchitem` argument."""
if not value:
value = pywikibot.input('Text to look for:')
params = value.split(':')
value = params[-1]
lang = params[0] if len(params) == 2 else None
return WikibaseSearchItemPageGenerator(
value, language=lang, site=self.site)
def _handle_search(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-search` argument."""
if not value:
value = pywikibot.input('What do you want to search for?')
# In order to be useful, all namespaces are required
return self.site.search(value, namespaces=[])
@staticmethod
def _handle_google(value: str) -> HANDLER_GEN_TYPE:
"""Handle `-google` argument."""
return GoogleSearchPageGenerator(value)
def _handle_titleregex(self, value: str) -> Literal[True]:
"""Handle `-titleregex` argument."""
if not value:
value = pywikibot.input(
'What page names are you looking for?')
self.titlefilter_list.append(value)
return True
def _handle_titleregexnot(self, value: str) -> Literal[True]:
"""Handle `-titleregexnot` argument."""
if not value:
value = pywikibot.input(
'All pages except which ones?')
self.titlenotfilter_list.append(value)
return True
def _handle_grep(self, value: str) -> Literal[True]:
"""Handle `-grep` argument."""
if not value:
value = pywikibot.input('Which pattern do you want to grep?')
self.articlefilter_list.append(value)
return True
def _handle_grepnot(self, value: str) -> Literal[True]:
"""Handle `-grepnot` argument."""
if not value:
value = pywikibot.input('Which pattern do you want to skip?')
self.articlenotfilter_list.append(value)
return True
def _handle_ql(self, value: str) -> Literal[True]:
"""Handle `-ql` argument."""
if not self.site.has_extension('ProofreadPage'):
raise UnknownExtensionError(
'Ql filtering needs a site with ProofreadPage extension.')
int_values = [int(_) for _ in value.split(',')]
if min(int_values) < 0 or max(int_values) > 4: # Invalid input ql.
valid_ql_list = [
'{}: {}'.format(*i)
for i in self.site.proofread_levels.items()]
valid_ql = ', '.join(valid_ql_list)
pywikibot.warning(
f'Acceptable values for -ql are:\n {valid_ql}')
self.qualityfilter_list = int_values
return True
def _handle_onlyif(self, value: str) -> Literal[True]:
"""Handle `-onlyif` argument."""
return self._onlyif_onlyifnot_handler(value, False)
def _handle_onlyifnot(self, value: str) -> Literal[True]:
"""Handle `-onlyifnot` argument."""
return self._onlyif_onlyifnot_handler(value, True)
def _onlyif_onlyifnot_handler(self, value: str, ifnot: bool
) -> Literal[True]:
"""Handle `-onlyif` and `-onlyifnot` arguments."""
if not value:
value = pywikibot.input('Which claim do you want to filter?')
p = re.compile(r'(?<!\\),') # Match "," only if there no "\" before
temp = [] # Array to store split argument
for arg in p.split(value):
key, value = arg.replace(r'\,', ',').split('=', 1)
temp.append((key, value))
self.claimfilter_list.append(
(temp[0][0], temp[0][1], dict(temp[1:]), ifnot))
return True
def _handle_sparqlendpoint(self, value: str) -> Literal[True]:
"""Handle `-sparqlendpoint` argument."""
if not value:
value = pywikibot.input('SPARQL endpoint:')
self._sparql = value
return True
def _handle_sparql(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-sparql` argument."""
if not value:
value = pywikibot.input('SPARQL query:')
return WikidataSPARQLPageGenerator(
value, site=self.site, endpoint=self._sparql)
def _handle_mysqlquery(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-mysqlquery` argument."""
if not value:
value = pywikibot.input('Mysql query string:')
return MySQLPageGenerator(value, site=self.site)
def _handle_supersetquery(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-supersetquery` argument."""
if not value:
value = pywikibot.input('Superset SQL query string:')
return SupersetPageGenerator(value, site=self.site)
def _handle_intersect(self, value: str) -> Literal[True]:
"""Handle `-intersect` argument."""
self.intersect = True
return True
def _handle_subpage(self, value: str) -> Literal[True]:
"""Handle `-subpage` argument."""
if not value:
value = pywikibot.input(
'Maximum subpage depth:')
self.subpage_max_depth = int(value)
return True
def _handle_logevents(self, value: str) -> HANDLER_GEN_TYPE | None:
"""Handle `-logevents` argument."""
params = value.split(',')
if params[0] not in self.site.logtypes:
raise NotImplementedError(
f'Invalid -logevents parameter "{params[0]}"')
return self._parse_log_events(*params)
def _handle_redirect(self, value: str) -> Literal[True]:
"""Handle `-redirect` argument.
.. versionadded:: 8.5
"""
if not value:
# True by default
value = 'true'
self.redirectfilter = strtobool(value)
return True
def _handle_pagepile(self, value: str) -> HANDLER_GEN_TYPE:
"""Handle `-pagepile` argument.
.. versionadded:: 9.0
"""
if not value.isnumeric():
raise ValueError(
f'PagePile id must be an int. It was given "{value}"')
return PagePilePageGenerator(int(value))
[docs]
def handle_args(self, args: Iterable[str]) -> list[str]:
"""Handle command line arguments and return the rest as a list.
.. versionadded:: 6.0
.. versionchanged:: 7.3
Prioritize -namespaces options to solve problems with several
generators like -newpages/-random/-randomredirect/-linter
"""
ordered_args = [arg for arg in args
if arg.startswith(('-ns', '-namespace'))]
ordered_args += [arg for arg in args
if not arg.startswith(('-ns', '-namespace'))]
return [arg for arg in ordered_args if not self.handle_arg(arg)]
[docs]
def handle_arg(self, arg: str) -> bool:
"""Parse one argument at a time.
If it is recognized as an argument that specifies a generator, a
generator is created and added to the accumulation list, and the
function returns true. Otherwise, it returns false, so that caller
can try parsing the argument. Call getCombinedGenerator() after all
arguments have been parsed to get the final output generator.
.. versionadded:: 6.0
renamed from ``handleArg``
:param arg: Pywikibot argument consisting of -name:value
:return: True if the argument supplied was recognised by the factory
"""
value: str | None = None
if not arg.startswith('-') and self._positional_arg_name:
value = arg
arg = '-' + self._positional_arg_name
else:
arg, _, value = arg.partition(':')
if not value:
value = None
opt = arg[1:]
if opt in self.disabled_options:
return False
if self.enabled_options and opt not in self.enabled_options:
return False
handler = getattr(self, '_handle_' + opt, None)
if not handler:
return False
handler_result = handler(value)
if isinstance(handler_result, bool):
return handler_result
if handler_result:
self.gens.append(handler_result)
return True
return False
def _int_none(v: str | None) -> int | None:
"""Return None if v is None or '' else return int(v)."""
return None if not v else int(v)