#!/usr/bin/env python3"""This script creates new items on Wikidata based on certain criteria.* When was the (Wikipedia) page created?* When was the last edit on the page?* Does the page contain interwikis?This script understands various command-line arguments:-lastedit The minimum number of days that has passed since the page was last edited.-pageage The minimum number of days that has passed since the page was created.-touch Do a null edit on every page which has a Wikibase item. Be careful, this option can trigger edit rates or captchas if your account is not autoconfirmed."""## (C) Pywikibot team, 2014-2024## Distributed under the terms of the MIT license.#from__future__importannotationsfromdatetimeimporttimedeltafromtextwrapimportfillimportpywikibotfrompywikibotimportpagegeneratorsfrompywikibot.botimportWikidataBotfrompywikibot.exceptionsimport(LockedPageError,NoCreateError,NoPageError,PageSaveRelatedError,)DELETION_TEMPLATES=('Q4847311','Q6687153','Q21528265')
[docs]classNewItemRobot(WikidataBot):"""A bot to create new items."""use_redirect=Falsetreat_missing_item=Trueupdate_options={'always':True,'lastedit':7,'pageage':21,'touch':'newly',# Can be False, newly (pages linked to newly# created items) or True (touch all pages)}def__init__(self,**kwargs)->None:"""Only accepts options defined in available_options."""super().__init__(**kwargs)self._skipping_templates={}
[docs]defsetup(self)->None:"""Setup ages."""super().setup()self.pageAgeBefore=self.repo.server_time()-timedelta(days=self.opt.pageage)self.lastEditBefore=self.repo.server_time()-timedelta(days=self.opt.lastedit)pywikibot.info(f'Page age is set to {self.opt.pageage} days so only pages created'f'\nbefore {self.pageAgeBefore.isoformat()} will be considered.\n'f'\nLast edit is set to {self.opt.lastedit} days so only pages 'f'last edited\nbefore {self.lastEditBefore.isoformat()} will be'' considered.\n')
@staticmethoddef_touch_page(page)->None:try:pywikibot.info('Doing a null edit on the page.')page.touch()except(NoCreateError,NoPageError):pywikibot.error(f'Page {page.title(as_link=True)} does not exist.')exceptLockedPageError:pywikibot.error(f'Page {page.title(as_link=True)} is locked.')exceptPageSaveRelatedErrorase:pywikibot.error(f'Page {page} not saved:\n{e.args}')def_callback(self,page,exc)->None:ifexcisNoneandself.opt.touch:self._touch_page(page)
[docs]defget_skipping_templates(self,site)->set[pywikibot.Page]:"""Get templates which leads the page to be skipped. If the script is used for multiple sites, hold the skipping templates as attribute. """ifsiteinself._skipping_templates:returnself._skipping_templates[site]skipping_templates=set()pywikibot.info(f'Retrieving skipping templates for site {site}...')foriteminDELETION_TEMPLATES:template=site.page_from_repository(item)iftemplateisNone:continueskipping_templates.add(template)# also add redirect templatesskipping_templates.update(template.getReferences(follow_redirects=False,with_template_inclusion=False,filter_redirects=True,namespaces=site.namespaces.TEMPLATE))self._skipping_templates[site]=skipping_templatesreturnskipping_templates
[docs]defskip_templates(self,page)->str:"""Check whether the page is to be skipped due to skipping template. :param page: treated page :type page: pywikibot.Page :return: the template which leads to skip """skipping_templates=self.get_skipping_templates(page.site)fortemplate,_inpage.templatesWithParams():iftemplateinskipping_templates:returntemplate.title(with_ns=False)return''
[docs]defskip_page(self,page)->bool:"""Skip pages which are unwanted to treat."""ifsuper().skip_page(page):returnTrueifpage.latest_revision.timestamp>self.lastEditBefore:pywikibot.info(f'Last edit on {page} was on {page.latest_revision.timestamp}.'f'\nToo recent. Skipping.')returnTrueifpage.oldest_revision.timestamp>self.pageAgeBefore:pywikibot.info(f'Page creation of {page} on {page.oldest_revision.timestamp} 'f'is too recent. Skipping.')returnTrueifpage.isCategoryRedirect():pywikibot.info(f'{page} is a category redirect. Skipping.')returnTrueifpage.langlinks():# FIXME: Implement thispywikibot.info(f'Found language links (interwiki links) for {page}.\n'f"Haven't implemented that yet so skipping.")returnTruetemplate=self.skip_templates(page)iftemplate:pywikibot.info(f'{page} contains {{{{{template}}}}}. Skipping.')returnTruereturnFalse
[docs]deftreat_page_and_item(self,page,item)->None:"""Treat page/item."""ifitemanditem.exists():pywikibot.info(f'{page} already has an item: {item}.')ifself.opt.touchisTrue:self._touch_page(page)returnself.create_item_for_page(page,callback=lambda_,exc:self._callback(page,exc))
[docs]defmain(*args:str)->None:"""Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. :param args: command line arguments """# Process global args and prepare generator args parserlocal_args=pywikibot.handle_args(args)gen=pagegenerators.GeneratorFactory()options={}forarginlocal_args:ifarg.startswith(('-pageage:','-lastedit:')):key,val=arg.split(':',1)options[key[1:]]=int(val)elifgen.handle_arg(arg):passelse:options[arg[1:].lower()]=Truegenerator=gen.getCombinedGenerator(preload=True)ifnotgenerator:pywikibot.bot.suggest_help(missing_generator=True)returnbot=NewItemRobot(generator=generator,**options)ifnotbot.site.logged_in():bot.site.login()user=pywikibot.User(bot.site,bot.site.username())ifbot.opt.touch=='newly'andnot('autoconfirmed'inuser.groups()or'confirmed'inuser.groups()):pywikibot.warning(fill(f'You are logged in as {user.username}, an account that is not in 'f'the autoconfirmed group on {bot.site.sitename}. Script will not 'f'touch pages linked to newly created items to avoid triggering 'f'edit rates or captchas. Use -touch param to force this.'))bot.opt.touch=Falsebot.run()