Source code for page._toolforge

"""Object representing interface to toolforge tools.

.. versionadded:: 7.7
"""
#
# (C) Pywikibot team, 2022-2025
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations

import collections
import re
from http import HTTPStatus
from warnings import warn

import pywikibot
from pywikibot.tools import deprecated, deprecated_args, remove_last_args


[docs] class WikiBlameMixin: """Page mixin for main authorship. .. versionadded:: 7.7 """ #: Supported wikipedia site codes WIKIBLAME_CODES = 'als', 'bar', 'de', 'en', 'it', 'nds', 'sco' def _check_wh_supported(self) -> None: """Check if WikiHistory is supported.""" if self.site.family.name != 'wikipedia': raise NotImplementedError( 'main_authors method is implemented for wikipedia family only') if (code := self.site.code) not in self.WIKIBLAME_CODES: raise NotImplementedError( f'main_authors method is not implemented for wikipedia:{code}') if (ns := self.namespace()) not in (0, 4, 10, 12, 14, 100): raise NotImplementedError( f'main_authors method is not implemented for {ns} namespace') if not self.exists(): raise pywikibot.exceptions.NoPageError(self)
[docs] @deprecated('authorsship', since='9.3.0') @deprecated_args(onlynew=None) # since 9.2.0 def main_authors(self) -> collections.Counter[str, int]: """Retrieve the 5 topmost main authors of an article. Sample: >>> import pywikibot >>> site = pywikibot.Site('wikipedia:de') >>> page = pywikibot.Page(site, 'Project:Pywikibot') >>> auth = page.main_authors() >>> auth.most_common(1) [('DrTrigon', 37)] .. deprecated:: 9.3 use :meth:`authorship` instead. .. seealso:: :meth:`authorship` for further informations :return: Percentage of edits for each username :raise NotImplementedError: unsupported site or unsupported namespace. :raise NoPageError: The page does not exist. :raise TimeoutError: WikiHistory timeout """ return collections.Counter( {user: int(cnt) for user, (_, cnt) in self.authorship(5).items()})
[docs] @remove_last_args(['revid', 'date']) # since 10.1.0 def authorship( self, n: int | None = None, *, min_chars: int = 0, min_pct: float = 0.0, max_pct_sum: float | None = None, ) -> dict[str, tuple[int, float]]: """Retrieve authorship attribution of an article. This method uses WikiHistory to retrieve the authors measured by character count. Sample: >>> import pywikibot >>> site = pywikibot.Site('wikipedia:en') >>> page = pywikibot.Page(site, 'Pywikibot') >>> auth = page.authorship() # doctest: +SKIP >>> auth # doctest: +SKIP {'1234qwer1234qwer4': (68, 100.0)} .. important:: Only implemented for pages in Main, Project, Category and Template namespaces and only wikipedias of :attr:`WIKIBLAME_CODES` are supported. .. versionadded:: 9.3 XTools is used to retrieve authors. This method replaces :meth:`main_authors`. .. versionchanged:: 10.1 WikiHistory is used to retrieve authors due to :phab:`T392694`. Here are the differences between these two implementations: .. tabs:: .. tab:: WikiHistory .. versionadded:: 10.1 - Implemented from version 7.7 until 9.2 (with :meth:`main_authors` method) and from 10.1. - Main, Project, Category and Template namespaces are supported - Only 'als', 'bar', 'de', 'en', 'it', 'nds' and 'sco' Wikipedias are supported. - Revision ID *revid* or revision *date* is not supported. Always the latest revision is used. - Only the most 5 authors are given. - No additional parsing library is required. .. seealso:: - https://wikihistory.toolforge.org - https://de.wikipedia.org/wiki/WP:HT/wikihistory .. tab:: XTools .. versionremoved:: 10.1 - Implemented from version 9.3 until 10.0. - Only Main namespace is supported. - Only 'ar', 'de', 'en', 'es', 'eu', 'fr', 'hu', 'id', 'it', 'ja', 'nl', 'pl', 'pt' and 'tr' Wikipedias are supported. - Revision ID *revid* or revision *date* is supported to get authorship for this revision. - All authors can be given. - wikitextparser parsing library is required. .. seealso:: - https://xtools.wmcloud.org/authorship/ - https://www.mediawiki.org/wiki/XTools/Authorship - https://www.mediawiki.org/wiki/WikiWho :param n: Only return the first *n* or fewer authors. :param min_chars: Only return authors with more than *min_chars* chars changes. :param min_pct: Only return authors with more than *min_pct* percentage edits. :param max_pct_sum: Only return authors until the prcentage sum reached *max_pct_sum*. :return: Character count and percentage of edits for each username. :raise NotImplementedError: unsupported site or unsupported namespace. :raise NoPageError: The page does not exist. :raise TimeoutError: WikiHistory timeout """ if n and n > 5: warn('Only the first 5 authors can be given.', stacklevel=2) baseurl = 'https://wikihistory.toolforge.org' pattern = (r'><bdi>(?P<author>.+?)</bdi></a>\s' r'\((?P<percent>\d{1,3})&') self._check_wh_supported() for onlynew in (1, 0): url = baseurl + (f'/wiki/getauthors.php?wiki={self.site.code}wiki' f'&page_id={self.pageid}&onlynew={onlynew}') r = pywikibot.comms.http.fetch(url) if r.status_code != HTTPStatus.OK: r.raise_for_status() if 'Timeout' not in r.text: break pywikibot.sleep(pywikibot.config.retry_wait) else: raise pywikibot.exceptions.TimeoutError('WikiHistory Timeout') length = len(self.text) result: list[list[str]] = [] pct_sum = 0.0 for rank, (user, cnt) in enumerate(re.findall(pattern, r.text), start=1): chars = length * int(cnt) // 100 percent = float(cnt) # take into account that data() is ordered if n and rank > n or chars < min_chars or percent < min_pct: break result.append((user, chars, percent)) pct_sum += percent if max_pct_sum and pct_sum >= max_pct_sum: break return {user: (chars, percent) for user, chars, percent in result}