Source code for page._toolforge
"""Object representing interface to toolforge tools.
.. versionadded:: 7.7
"""
#
# (C) Pywikibot team, 2022-2023
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations
import collections
import re
import pywikibot
from pywikibot import config
[docs]
class WikiBlameMixin:
"""Page mixin for main authorship.
.. versionadded:: 7.7
"""
#: Supported wikipedia site codes
WIKIBLAME_CODES = 'als', 'bar', 'de', 'en', 'it', 'nds', 'sco'
def _check_wh_supported(self):
"""Check if WikiHistory is supported."""
if self.site.family.name != 'wikipedia':
raise NotImplementedError(
'main_authors method is implemented for wikipedia family only')
if self.site.code not in self.WIKIBLAME_CODES:
raise NotImplementedError(
'main_authors method is not implemented for wikipedia:{}'
.format(self.site.code))
if self.namespace() != pywikibot.site.Namespace.MAIN:
raise NotImplementedError(
'main_authors method is implemented for main namespace only')
if not self.exists():
raise pywikibot.exceptions.NoPageError(self)
[docs]
def main_authors(self, *,
onlynew: bool | None = None) -> collections.Counter:
"""Retrieve the 5 topmost main authors of an article.
This method uses WikiHistory to retrieve the text based main
authorship.
Sample:
>>> import pywikibot
>>> site = pywikibot.Site('wikipedia:nds')
>>> page = pywikibot.Page(site, 'Python (Programmeerspraak)')
>>> auth = page.main_authors(onlynew=False)
>>> auth
Counter({'RebeccaBreu': 99, 'Slomox': 1})
.. note:: Only implemented for main namespace pages.
.. note:: Only wikipedias of :attr:`WIKIBLAME_CODES` are supported.
.. seealso::
- https://wikihistory.toolforge.org
- https://de.wikipedia.org/wiki/Wikipedia:Technik/Cloud/wikihistory
:param onlynew: If False, use the cached values. If True,
calculate the Counter data which can take some time; it may
fail with TimeoutError after ``config.max_retries``. If None
it calculates new data like for True but uses data from
cache if new data cannot be calculated in meantime.
:return: Number of edits for each username
:raise NotImplementedError: unsupported site or unsupported namespace
:raise pywikibot.exceptions.NoPageError: The page does not exist
:raise pywikibot.exceptions.TimeoutError: Maximum retries exceeded
"""
baseurl = 'https://wikihistory.toolforge.org'
pattern = (r'><bdi>(?P<author>.+?)</bdi></a>\s'
r'\((?P<percent>\d{1,3})&')
self._check_wh_supported()
url = baseurl + '/wiki/getauthors.php?wiki={}wiki&page_id={}'.format(
self.site.code, self.pageid)
if onlynew:
url += '&onlynew=1'
for current_retries in range(config.max_retries):
r = pywikibot.comms.http.fetch(url)
if r.status_code != 200:
r.raise_for_status()
if 'Timeout' not in r.text: # window.setTimeout in result
return collections.Counter(
{user: int(cnt)
for user, cnt in re.findall(pattern, r.text)})
delay = pywikibot.config.retry_wait * 2 ** current_retries
pywikibot.warning('WikiHistory timeout.\n'
'Waiting {:.1f} seconds before retrying.'
.format(delay))
pywikibot.sleep(delay)
if onlynew is None and current_retries >= config.max_retries - 2:
url += '&onlynew=1'
raise pywikibot.exceptions.TimeoutError(
'Maximum retries attempted without success.')