"""Module with the Graphviz drawing calls."""## (C) Pywikibot team, 2006-2024## Distributed under the terms of the MIT license.#from__future__importannotationsimportitertoolsimportthreadingfromcollectionsimportCounterimportpywikibotfrompywikibotimportconfigtry:importpydotPYDOT_ERROR=NoneexceptImportErrorase:PYDOT_ERROR=e
[docs]classGraphSavingThread(threading.Thread):"""Threaded graph renderer. Rendering a graph can take extremely long. We use multithreading because of that. TODO: Find out if several threads running in parallel can slow down the system too much. Consider adding a mechanism to kill a thread if it takes too long. """def__init__(self,graph:pydot.Dot,origin:pywikibot.page.Page)->None:"""Initializer."""super().__init__()self.graph=graphself.origin=origin
[docs]defrun(self)->None:"""Write graphs to the data directory."""forfmtinconfig.interwiki_graph_formats:filename=config.datafilepath('interwiki-graphs/'+getFilename(self.origin,fmt))ifself.graph.write(filename,prog='dot',format=fmt):pywikibot.info('Graph saved as '+filename)else:pywikibot.info('Graph could not be saved as '+filename)
[docs]classSubject:"""Data about a page with translations on multiple wikis."""def__init__(self,origin:pywikibot.page.Page|None=None)->None:"""Initializer. :param origin: the page on the 'origin' wiki """# Remember the "origin page"self.origin=origin# found_in is a dictionary where pages are keys and lists of# pages are values. It stores where we found each page.# As we haven't yet found a page that links to the origin page, we# start with an empty list for it.self.found_in:dict[pywikibot.Page,list[pywikibot.Page]]={}iforigin:self.found_in={origin:[]}
[docs]classGraphDrawer:"""Graphviz (dot) code creator."""def__init__(self,subject:pywikibot.interwiki_graph.Subject)->None:"""Initializer. :param subject: page data to graph :raises ImportError if pydot is not installed """ifPYDOT_ERROR:msg=f'pydot is not installed: {PYDOT_ERROR}.'raiseImportError(msg)self.graph:pydot.Dot|None=Noneself.subject=subject
[docs]@staticmethoddefgetLabel(page:pywikibot.page.Page)->str:"""Get label for page."""returnf'"{page.site.code}:{page.title()}"'
def_octagon_site_set(self)->set[pywikibot.site.BaseSite]:"""Build a list of sites with more than one valid page."""page_list=self.subject.found_in.keys()# Only track sites of normal pageseach_site=(page.siteforpageinpage_listifpage.exists()andnotpage.isRedirectPage())return{x[0]forxinitertools.takewhile(lambdax:x[1]>1,Counter(each_site).most_common())}
[docs]defaddNode(self,page:pywikibot.page.Page)->None:"""Add a node for page."""assertself.graphisnotNonenode=pydot.Node(self.getLabel(page),shape='rectangle')node.set_URL(f'"http://{page.site.hostname()}'f'{page.site.get_address(page.title(as_url=True))}"')node.set_style('filled')node.set_fillcolor('white')node.set_fontsize('11')ifnotpage.exists():node.set_fillcolor('red')elifpage.isRedirectPage():node.set_fillcolor('blue')elifpage.isDisambig():node.set_fillcolor('orange')ifpage.namespace()!=self.subject.origin.namespace():node.set_color('green')node.set_style('filled,bold')ifpage.siteinself.octagon_sites:# mark conflict by octagonal nodenode.set_shape('octagon')self.graph.add_node(node)
[docs]defaddDirectedEdge(self,page:pywikibot.page.Page,refPage:pywikibot.page.Page)->None:"""Add a directed edge from refPage to page."""assertself.graphisnotNone# if page was given as a hint, referrers would be [None]ifrefPageisnotNone:sourceLabel=self.getLabel(refPage)targetLabel=self.getLabel(page)edge=pydot.Edge(sourceLabel,targetLabel)oppositeEdge=self.graph.get_edge(targetLabel,sourceLabel)ifoppositeEdge:oppositeEdge=oppositeEdge[0]oppositeEdge.set_dir('both')# workaround for sf.net bug 401: prevent duplicate edges# (it is unclear why duplicate edges occur)# https://sourceforge.net/p/pywikipediabot/bugs/401/elifself.graph.get_edge(sourceLabel,targetLabel):pywikibot.error(f'Tried to create duplicate edge from {refPage} to {page}')# duplicate edges would be bad because then get_edge() would# give a list of edges, not a single edge when we handle the# opposite edge.else:# add edgeifrefPage.site==page.site:edge.set_color('blue')elifnotpage.exists():# mark dead linksedge.set_color('red')elifrefPage.isDisambig()!=page.isDisambig():# mark links between disambiguation and non-disambiguation# pagesedge.set_color('orange')ifrefPage.namespace()!=page.namespace():edge.set_color('green')self.graph.add_edge(edge)
[docs]defsaveGraphFile(self)->None:"""Write graphs to the data directory."""assertself.graphisnotNonethread=GraphSavingThread(self.graph,self.subject.origin)thread.start()
[docs]defcreateGraph(self)->None:"""Create graph of the interwiki links. For more info see https://meta.wikimedia.org/wiki/Interwiki_graphs """pywikibot.info(f'Preparing graph for {self.subject.origin.title()}')# create empty graphself.graph=pydot.Dot()self.octagon_sites=self._octagon_site_set()forpageinself.subject.found_in:# a node for each found pageself.addNode(page)# mark start node by pointing there from a black dot.firstLabel=self.getLabel(self.subject.origin)self.graph.add_node(pydot.Node('start',shape='point'))self.graph.add_edge(pydot.Edge('start',firstLabel))forpage,referrersinself.subject.found_in.items():forrefPageinreferrers:self.addDirectedEdge(page,refPage)self.saveGraphFile()
[docs]defgetFilename(page:pywikibot.page.Page,extension:str|None=None)->str:"""Create a filename that is unique for the page. :param page: page used to create the new filename :param extension: file extension :return: filename of <family>-<lang>-<page>.<ext> """filename='-'.join((page.site.family.name,page.site.code,page.title(as_filename=True)))ifextension:filename+=f'.{extension}'returnfilename