Source code for scripts.category_graph

#!/usr/bin/env python3
r"""Visualizes category hierarchy.

Generates graphical representation in formats dot, svg and html5
of category hierarchy.

Usage:

    pwb.py category_graph [-style STYLE] [-depth DEPTH] [-from FROM] [-to TO]

actions:

-from [FROM]  Category name to scan, default is main category, "?" to ask.

optional arguments:

-to TO        base file name to save, "?" to ask

-style STYLE  graphviz style definitions in dot format (see below)

-depth DEPTH  maximal hierarchy depth. 2 by default

-downsize K   font size divider for subcategories. 4 by default Use 1
              for the same font size

.. seealso:: https://graphviz.org/doc/info/attrs.html
   for graphviz style definitions.

Example
-------

Visualizes main category:

    pwb.py -v category_graph -from

Extended example with style settings:

    pwb.py category_graph -from Life -downsize 1.5 \
    -style 'graph[rankdir=BT ranksep=0.5] node[shape=circle style=filled \
    fillcolor=green] edge[style=dashed penwidth=3]'


.. versionadded:: 8.0
"""
#
# (C) Pywikibot team, 2022-2024
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations

import argparse
import glob
from collections import defaultdict
from contextlib import suppress
from pathlib import Path

import pywikibot
from pywikibot import config
from pywikibot.bot import SingleSiteBot, suggest_help


try:
    import pydot
except ImportError as e:
    pydot = e


[docs] class CategoryGraphBot(SingleSiteBot): """Bot to create graph of the category structure."""
[docs] @staticmethod def setup_args(ap): """Declares arguments.""" ap.add_argument('-from', nargs='?', default=argparse.SUPPRESS) ap.add_argument('-to', nargs='?', default='') ap.add_argument('-style', nargs='?', default='') ap.add_argument('-depth', nargs='?', default=2) ap.add_argument('-downsize', nargs='?', default=4)
def __init__(self, args: argparse.Namespace) -> None: """Initializer.""" super().__init__() self.args = args cat_title = vars(args)['from'] if not cat_title: cat_title = 'Main topic classifications' if cat_title == '?': cat_title = pywikibot.input( 'For which category do you want to create a graph?') pywikibot.info(f'Scanning {cat_title!r}') self.cat = pywikibot.Category(self.site, cat_title) self.to = args.to if self.to == '?': self.to = pywikibot.input( 'Please enter the name of the file ' 'where the tree should be saved,\n' 'or press enter to use category name:') if not self.to: self.to = cat_title.replace(' ', '_') self.rev = defaultdict(list) self.fw = defaultdict(list) self.leaves = set() self.counter = 0 font = 'fontname="Helvetica,Arial,sans-serif"' style = f'graph [rankdir=LR ranksep=2 concentrate=true {font}] ' \ f'node [newrank=true shape=plaintext {font}] ' \ f'edge [arrowhead=open labeldistance=3 ' \ f'labelfontcolor="#00000080" {font}] ' + args.style self.dot = pydot.graph_from_dot_data(f'digraph {{{style}}}')[0] self.dot.set_name(f'"{cat_title}"')
[docs] def scan_level(self, cat, level, hue=None) -> str: """Recursive function to fill dot graph. :param cat: the Category of the node we're currently opening. :param level: the current decreasing from depth to zero level in the tree (for recursion), opposite of depth. """ title = cat.title(with_ns=False) size = float(self.args.downsize) ** level subcats = sorted(cat.subcategories()) def node(): subs = ', '.join([c.title(with_ns=False).replace(' ', ' ') for c in subcats]) n = pydot.Node(title, label=rf'"{title}\n{len(subcats)} C"', tooltip=title + '\n\n' + subs, URL=cat.full_url(), fontsize=int(10 * size)) return n def edge(n, h): minlen = n % columns + 1 if level != self.args.depth else 1 e = pydot.Edge(title, subcat.title(with_ns=False), tooltip=title + ' ⟶ ' + subcat.title(with_ns=False), headlabel=title, # distribute the graph to depth minlen=minlen, penwidth=round(size / 2, 2), arrowsize=round(size / 4, 2), color=str(round(h, 2)) + ' 1 0.7', labelfontsize=int(3 * size), labelfontcolor=str(round(h, 2)) + ' 1 0.5') return e if config.verbose_output: pywikibot.info('Adding ' + cat.title(with_ns=False)) node = node() self.dot.add_node(node) self.counter += 1 if not level or self.counter >= 1e4: # because graphviz crashes on huge graphs if self.counter == 1e4: pywikibot.warning('Number of nodes reached limit') self.leaves.add(node.get_name()) return columns = len(subcats) // 5 + 1 for n, subcat in enumerate(subcats): # generating different hue for color per each root branch h = hue if hue is not None else (11 / 18 * n) % 1 e = edge(n, h) self.dot.add_edge(e) # repeat recursively self.scan_level(subcat, level - 1, h) # track graph's structure to reduse too big graph self.rev[e.get_destination()].append(e.get_source()) self.fw[e.get_source()].append(e.get_destination())
[docs] def run(self) -> None: """Main function of CategoryGraphBot.""" self.scan_level(self.cat, int(self.args.depth)) # reduce too big graph if self.counter > 1000: pywikibot.warning('Removing standalone subcategories ' 'because graph is too big') for n in self.leaves: while len(self.rev[n]) == 1: if config.verbose_output: pywikibot.info('Removing ' + n) self.dot.del_edge(self.rev[n][0], n) self.dot.del_node(n) self.fw[self.rev[n][0]].remove(n) if self.fw[self.rev[n][0]]: break n = self.rev[n][0] pywikibot.info('Saving results') pywikibot.info(self.to + '.gv') self.dot.write(self.to + '.gv', encoding='utf-8') pywikibot.info(self.to + '.svg') self.dot.write_svg(self.to + '.svg', encoding='utf-8') pywikibot.info(self.to + '.html') header = ('<head><meta charset="UTF-8"/>' '<title>' + self.cat.title(with_ns=False) + '</title> </head>\n' '<div style="position:absolute;">' 'Zoom and drag with mouse. ' 'Nodes are links to Wikipedia.' '</div>\n' '<script ' 'src="https://unpkg.com/panzoom@9.4.0/dist/panzoom.min.js" ' 'query="#graph0" name="pz"></script>\n' '<style> svg { height:100%; width:100%; } </style>\n') with open(self.to + '.html', mode='wb') as o: o.write(header.encode()) o.write(self.dot.create('dot', 'svg', encoding='utf-8'))
[docs] def main(*args: str) -> None: """Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. :param args: command line arguments """ ap = argparse.ArgumentParser(add_help=False) CategoryGraphBot.setup_args(ap) local_args = pywikibot.handle_args() args, rest = ap.parse_known_args(local_args) if suggest_help( missing_action='from' not in args, unknown_parameters=rest, missing_dependencies=(['pydot'] if isinstance(pydot, ImportError) else []) ): return file_path = args.to # If file exists, ask user if ok to overwrite. Otherwise, make # the file, including directories unless it is top level. if glob.glob(file_path + '.*'): choice = pywikibot.input_yn(f'Files exist for {file_path}. Overwrite?', 'n', automatic_quit=False) if not choice: pywikibot.info('Exiting...') return else: dir_path = Path(file_path) with suppress(FileNotFoundError): dir_path.parent.mkdir(parents=True, exist_ok=True) bot = CategoryGraphBot(args) bot.run()
if __name__ == '__main__': main()