#!/usr/bin/env python3"""Correct all redirect links in featured pages or only one page of each wiki.Can be used with:-always The bot won't ask for confirmation when putting a page-featured Run over featured pages (for some Wikimedia wikis only)-overwrite Usually only the link is changed ([[Foo]] -> [[Bar|Foo]]). This parameters sets the script to completly overwrite the link text ([[Foo]] -> [[Bar]]).-ignoremoves Do not try to solve deleted pages after page move.¶ms;"""## (C) Pywikibot team, 2004-2023## Distributed under the terms of the MIT license.#from__future__importannotationsimportrefromconcurrent.futuresimportThreadPoolExecutor,as_completedfromcontextlibimportsuppressimportpywikibotfrompywikibotimportpagegeneratorsfrompywikibot.botimport(AutomaticTWSummaryBot,ExistingPageBot,SingleSiteBot,suggest_help,)frompywikibot.exceptionsimport(CircularRedirectError,InterwikiRedirectPageError,InvalidPageError,InvalidTitleError,NoMoveTargetError,)frompywikibot.textlibimportdoes_text_contain_section,isDisabledfrompywikibot.toolsimportfirst_lowerfrompywikibot.toolsimportfirst_upperasfirstcap# This is required for the text that is shown when you run this script# with the parameter -help.docuReplacements={'¶ms;':pagegenerators.parameterHelp}# noqa: N816# Featured articles categoriesFEATURED_ARTICLES='Q4387444'
[docs]classFixingRedirectBot(SingleSiteBot,ExistingPageBot,AutomaticTWSummaryBot):"""Run over pages and resolve redirect links."""use_redirects=Falseignore_save_related_errors=Trueignore_server_errors=Truesummary_key='fixing_redirects-fixing'update_options={'overwrite':False,'ignoremoves':False,}
[docs]defreplace_links(self,text,linked_page,target_page):"""Replace all source links by target."""mysite=pywikibot.Site()linktrail=mysite.linktrail()# make a backup of the original text so we can show the changes laterlink_regex=re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?'r'(\|(?P<label>[^\]]*))?\]\](?P<linktrail>'+linktrail+')')curpos=0# This loop will run until we have finished the current pagewhileTrue:m=link_regex.search(text,pos=curpos)ifnotm:break# Make sure that next time around we will not find this same hit.curpos=m.start()+1try:is_interwikilink=mysite.isInterwikiLink(m['title'])exceptInvalidTitleError:continue# skip invalid title# ignore interwiki links, links in the disabled area# and links to sections of the same pageif(m['title'].strip()==''oris_interwikilinkorisDisabled(text,m.start())):continueactual_link_page=pywikibot.Page(target_page.site,m['title'])# Check whether the link found is to page.try:actual_link_page.title()exceptInvalidTitleErrorase:pywikibot.error(e)continueifactual_link_page!=linked_page:continue# The link looks like this:# [[page_title|link_text]]trailing_charspage_title=m['title']link_text=m['label']ifnotlink_text:# or like this: [[page_title]]trailing_charslink_text=page_titleifm['section']isNone:section=''else:section=m['section']ifsectionandtarget_page.section():pywikibot.warning(f'Source section {section} and target 'f'section {target_page} found. Skipping.')continuetrailing_chars=m['linktrail']iftrailing_chars:link_text+=trailing_chars# remove preleading ":"iflink_text[0]==':':link_text=link_text[1:]iflink_text[0].isupper()orlink_text[0].isdigit():new_page_title=target_page.title()else:new_page_title=first_lower(target_page.title())# remove preleading ":"ifnew_page_title[0]==':':new_page_title=new_page_title[1:]ifnew_page_title==link_textandnotsection \
orself.opt.overwrite:newlink=f'[[{new_page_title}]]'# check if we can create a link with trailing characters instead of# a pipelinkelif(len(new_page_title)<=len(link_text)and(firstcap(link_text[:len(new_page_title)])==firstcap(new_page_title))andre.sub(re.compile(linktrail),'',link_text[len(new_page_title):])==''andnotsection):length=len(new_page_title)newlink=f'[[{link_text[:length]}]]{link_text[length:]}'else:newlink=f'[[{new_page_title}{section}|{link_text}]]'text=text[:m.start()]+newlink+text[m.end():]continuereturntext
[docs]defget_target(self,page):"""Get the target page for a given page."""target=Noneifnotpage.exists():ifnotself.opt.ignoremoves:withsuppress(NoMoveTargetError,CircularRedirectError,InvalidTitleError):target=page.moved_target()elifpage.isRedirectPage():try:target=page.getRedirectTarget()except(CircularRedirectError,InvalidTitleError,InterwikiRedirectPageError):passexceptRuntimeErrorase:pywikibot.error(e)else:section=target.section()ifsectionandnotdoes_text_contain_section(target.text,section):pywikibot.warning(f'Section #{section} not found on page 'f'{target.title(as_link=True,with_section=False)}')target=NoneiftargetisnotNone \
andtarget.namespace()in[2,3]andpage.namespace()notin[2,3]:target=Nonereturnpage,target
[docs]deftreat_page(self)->None:"""Change all redirects from the current page to actual links."""try:newtext=self.current_page.textexceptInvalidPageErrorase:pywikibot.error(e)returnwithThreadPoolExecutor()asexecutor:futures={executor.submit(self.get_target,p)forpinself.current_page.linkedPages()}forfutureinas_completed(futures):page,target=future.result()iftarget:newtext=self.replace_links(newtext,page,target)self.put_current(newtext)
[docs]defmain(*args:str)->None:""" Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. :param args: command line arguments """featured=Falseoptions={}gen=None# Process global args and prepare generator args parsergen_factory=pagegenerators.GeneratorFactory()local_args=pywikibot.handle_args(args)local_args=gen_factory.handle_args(local_args)unknown=[]forarginlocal_args:ifarg=='-featured':featured=Trueelifargin('-always','-ignoremoves','-overwrite'):options[arg[1:]]=Trueelse:unknown.append(arg)suggest_help(unknown_parameters=unknown)mysite=pywikibot.Site()ifmysite.sitename=='wikipedia:nl':pywikibot.info('<<lightred>>There is consensus on the Dutch Wikipedia that ''bots should not be used to fix redirects.')returniffeatured:ref=mysite.page_from_repository(FEATURED_ARTICLES)ifrefisnotNone:gen=ref.articles(namespaces=0,content=True)ifnotgen:suggest_help(unknown_parameters=['-featured'],additional_text='Option is not available for this site.')returnelse:gen=gen_factory.getCombinedGenerator(preload=True)ifgen:bot=FixingRedirectBot(generator=gen,**options)bot.run()else:suggest_help(missing_generator=True)