Source code for data.wikistats

"""Objects representing WikiStats API."""
#
# (C) Pywikibot team, 2014-2024
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations

from csv import DictReader
from io import StringIO
from typing import Any

import pywikibot
from pywikibot.comms import http


# cache the data
_data: dict[str, list[Any]] = {}


[docs] class WikiStats: """Light wrapper around WikiStats data, caching responses and data. The methods accept a Pywikibot family name as the WikiStats table name, mapping the names before calling the WikiStats API. .. versionchanged:: 9.0 tables are cached globally instead by instances. """ FAMILY_MAPPING = { 'wikipedia': 'wikipedias', 'wikiquote': 'wikiquotes', 'wikisource': 'wikisources', 'wiktionary': 'wiktionaries', } MISC_SITES_TABLE = 'mediawikis' WMF_MULTILANG_TABLES = { 'wikipedias', 'wiktionaries', 'wikisources', 'wikinews', 'wikibooks', 'wikiquotes', 'wikivoyage', 'wikiversity', } OTHER_MULTILANG_TABLES = { 'uncyclomedia', 'rodovid', 'wikifur', 'wikitravel', 'scoutwiki', 'opensuse', 'metapedias', 'lxde', 'pardus', 'gentoo', } OTHER_TABLES = { # Farms 'wikia', 'wikkii', 'wikisite', 'editthis', 'orain', 'shoutwiki', 'referata', # Single purpose/manager sets 'wmspecials', 'gamepedias', 'w3cwikis', 'neoseeker', 'sourceforge', } ALL_TABLES = ({MISC_SITES_TABLE} | WMF_MULTILANG_TABLES | OTHER_MULTILANG_TABLES | OTHER_TABLES) ALL_KEYS = set(FAMILY_MAPPING.keys()) | ALL_TABLES def __init__(self, url: str = 'https://wikistats.wmcloud.org/') -> None: """Initializer.""" self.url = url
[docs] def get(self, table: str) -> list: """Get a list of a table of data. :param table: table of data to fetch """ if table in _data: return _data[table] if table not in self.ALL_KEYS: pywikibot.warning('WikiStats unknown table ' + table) table = self.FAMILY_MAPPING.get(table, table) path = '/api.php?action=dump&table={table}&format=csv' url = self.url + path r = http.fetch(url.format(table=table)) f = StringIO(r.text) reader = DictReader(f) data = list(reader) _data[table] = data return data
[docs] def get_dict(self, table: str) -> dict: """Get dictionary of a table of data. :param table: table of data to fetch """ return {data['prefix']: data for data in self.get(table)}
[docs] def sorted(self, table: str, key: str, reverse: bool | None = None) -> list: """Reverse numerical sort of data. :param table: name of table of data :param key: data table key :param reverse: If set to True the sorting order is reversed. If None the sorting order for numeric keys are reversed whereas alphanumeric keys are sorted in normal way. :return: The sorted table """ table = self.get(table) # take the first entry to determine the sorting key first_entry = table[0] if first_entry[key].isdigit(): def sort_key(d): return int(d[key]) reverse = reverse if reverse is not None else True else: def sort_key(d): return d[key] reverse = reverse if reverse is not None else False return sorted(table, key=sort_key, reverse=reverse)
[docs] def languages_by_size(self, table: str): """Return ordered list of languages by size from WikiStats.""" # This assumes they appear in order of size in the WikiStats dump. return [d['prefix'] for d in self.get(table)]