Source code for scripts.newitem

#!/usr/bin/env python3
"""
This script creates new items on Wikidata based on certain criteria.

* When was the (Wikipedia) page created?
* When was the last edit on the page?
* Does the page contain interwikis?

This script understands various command-line arguments:

-lastedit         The minimum number of days that has passed since the page was
                  last edited.

-pageage          The minimum number of days that has passed since the page was
                  created.

-touch            Do a null edit on every page which has a Wikibase item.
                  Be careful, this option can trigger edit rates or captchas
                  if your account is not autoconfirmed.

"""
#
# (C) Pywikibot team, 2014-2023
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations

from datetime import timedelta
from textwrap import fill

import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import WikidataBot
from pywikibot.exceptions import (
    LockedPageError,
    NoCreateError,
    NoPageError,
    PageSaveRelatedError,
)


DELETION_TEMPLATES = ('Q4847311', 'Q6687153', 'Q21528265')


[docs] class NewItemRobot(WikidataBot): """A bot to create new items.""" use_redirect = False treat_missing_item = True update_options = { 'always': True, 'lastedit': 7, 'pageage': 21, 'touch': 'newly', # Can be False, newly (pages linked to newly # created items) or True (touch all pages) } def __init__(self, **kwargs) -> None: """Only accepts options defined in available_options.""" super().__init__(**kwargs) self._skipping_templates = {}
[docs] def setup(self) -> None: """Setup ages.""" super().setup() self.pageAgeBefore = self.repo.server_time() - timedelta( days=self.opt.pageage) self.lastEditBefore = self.repo.server_time() - timedelta( days=self.opt.lastedit) pywikibot.info('Page age is set to {} days so only pages created' '\nbefore {} will be considered.\n' .format(self.opt.pageage, self.pageAgeBefore.isoformat())) pywikibot.info( 'Last edit is set to {} days so only pages last edited' '\nbefore {} will be considered.\n' .format(self.opt.lastedit, self.lastEditBefore.isoformat()))
@staticmethod def _touch_page(page) -> None: try: pywikibot.info('Doing a null edit on the page.') page.touch() except (NoCreateError, NoPageError): pywikibot.error(f'Page {page.title(as_link=True)} does not exist.') except LockedPageError: pywikibot.error(f'Page {page.title(as_link=True)} is locked.') except PageSaveRelatedError as e: pywikibot.error(f'Page {page} not saved:\n{e.args}') def _callback(self, page, exc) -> None: if exc is None and self.opt.touch: self._touch_page(page)
[docs] def get_skipping_templates(self, site) -> set[pywikibot.Page]: """Get templates which leads the page to be skipped. If the script is used for multiple sites, hold the skipping templates as attribute. """ if site in self._skipping_templates: return self._skipping_templates[site] skipping_templates = set() pywikibot.info(f'Retrieving skipping templates for site {site}...') for item in DELETION_TEMPLATES: template = site.page_from_repository(item) if template is None: continue skipping_templates.add(template) # also add redirect templates skipping_templates.update( template.getReferences(follow_redirects=False, with_template_inclusion=False, filter_redirects=True, namespaces=site.namespaces.TEMPLATE)) self._skipping_templates[site] = skipping_templates return skipping_templates
[docs] def skip_templates(self, page) -> str: """Check whether the page is to be skipped due to skipping template. :param page: treated page :type page: pywikibot.Page :return: the template which leads to skip """ skipping_templates = self.get_skipping_templates(page.site) for template, _ in page.templatesWithParams(): if template in skipping_templates: return template.title(with_ns=False) return ''
[docs] def skip_page(self, page) -> bool: """Skip pages which are unwanted to treat.""" if super().skip_page(page): return True if page.latest_revision.timestamp > self.lastEditBefore: pywikibot.info( f'Last edit on {page} was on {page.latest_revision.timestamp}.' f'\nToo recent. Skipping.') return True if page.oldest_revision.timestamp > self.pageAgeBefore: pywikibot.info( f'Page creation of {page} on {page.oldest_revision.timestamp} ' f'is too recent. Skipping.') return True if page.isCategoryRedirect(): pywikibot.info(f'{page} is a category redirect. Skipping.') return True if page.langlinks(): # FIXME: Implement this pywikibot.info( f'Found language links (interwiki links) for {page}.\n' f"Haven't implemented that yet so skipping.") return True template = self.skip_templates(page) if template: pywikibot.info(f'{page} contains {{{{{template}}}}}. Skipping.') return True return False
[docs] def treat_page_and_item(self, page, item) -> None: """Treat page/item.""" if item and item.exists(): pywikibot.info(f'{page} already has an item: {item}.') if self.opt.touch is True: self._touch_page(page) return self.create_item_for_page( page, callback=lambda _, exc: self._callback(page, exc))
[docs] def main(*args: str) -> None: """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. :param args: command line arguments """ # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) gen = pagegenerators.GeneratorFactory() options = {} for arg in local_args: if arg.startswith(('-pageage:', '-lastedit:')): key, val = arg.split(':', 1) options[key[1:]] = int(val) elif gen.handle_arg(arg): pass else: options[arg[1:].lower()] = True generator = gen.getCombinedGenerator(preload=True) if not generator: pywikibot.bot.suggest_help(missing_generator=True) return bot = NewItemRobot(generator=generator, **options) if not bot.site.logged_in(): bot.site.login() user = pywikibot.User(bot.site, bot.site.username()) if bot.opt.touch == 'newly' and not ( 'autoconfirmed' in user.groups() or 'confirmed' in user.groups()): pywikibot.warning(fill( f'You are logged in as {user.username}, an account that is not in ' f'the autoconfirmed group on {bot.site.sitename}. Script will not ' f'touch pages linked to newly created items to avoid triggering ' f'edit rates or captchas. Use -touch param to force this.')) bot.opt.touch = False bot.run()
if __name__ == '__main__': main()