Source code for scripts.newitem

# -*- coding: utf-8 -*-
This script creates new items on Wikidata based on certain criteria.

* When was the (Wikipedia) page created?
* When was the last edit on the page?
* Does the page contain interwikis?

This script understands various command-line arguments:

-lastedit         The minimum number of days that has passed since the page was
                  last edited.

-pageage          The minimum number of days that has passed since the page was

-touch            Do a null edit on every page which has a wikibase item.
                  Be careful, this option can trigger edit rates or captchas
                  if your account is not autoconfirmed.

# (C) Pywikibot team, 2014-2020
# Distributed under the terms of the MIT license.
from datetime import timedelta
from textwrap import fill
from typing import Set

import pywikibot
from pywikibot import pagegenerators
from import NoRedirectPageBot, WikidataBot
from pywikibot.exceptions import (LockedPage, NoCreateError, NoPage,

DELETION_TEMPLATES = ('Q4847311', 'Q6687153', 'Q21528265')

[docs]class NewItemRobot(WikidataBot, NoRedirectPageBot): """A bot to create new items.""" treat_missing_item = True
[docs] def __init__(self, generator, **kwargs) -> None: """Only accepts options defined in availableOptions.""" self.availableOptions.update({ 'always': True, 'lastedit': 7, 'pageage': 21, 'touch': 'newly', # Can be False, newly (pages linked to newly # created items) or True (touch all pages) }) super().__init__(**kwargs) self.generator = generator self.pageAge = self.getOption('pageage') self.lastEdit = self.getOption('lastedit') self._skipping_templates = {}
[docs] def setup(self) -> None: """Setup ages.""" super().setup() self.pageAgeBefore = self.repo.server_time() - timedelta( days=self.pageAge) self.lastEditBefore = self.repo.server_time() - timedelta( days=self.lastEdit) pywikibot.output('Page age is set to {0} days so only pages created' '\nbefore {1} will be considered.\n' .format(self.pageAge, self.pageAgeBefore.isoformat())) pywikibot.output( 'Last edit is set to {0} days so only pages last edited' '\nbefore {1} will be considered.\n' .format(self.lastEdit, self.lastEditBefore.isoformat()))
@staticmethod def _touch_page(page) -> None: try: pywikibot.output('Doing a null edit on the page.') page.touch() except (NoCreateError, NoPage): pywikibot.error('Page {0} does not exist.'.format( page.title(as_link=True))) except LockedPage: pywikibot.error('Page {0} is locked.'.format( page.title(as_link=True))) except PageNotSaved: pywikibot.error('Page {0} not saved.'.format( page.title(as_link=True))) def _callback(self, page, exc) -> None: if exc is None and self.getOption('touch'): self._touch_page(page)
[docs] def get_skipping_templates(self, site) -> Set[pywikibot.Page]: """Get templates which leads the page to be skipped. If the script is used for multiple sites, hold the skipping templates as attribute. """ if site in self._skipping_templates: return self._skipping_templates[site] skipping_templates = set() pywikibot.output('Retrieving skipping templates for site {}...' .format(site)) for item in DELETION_TEMPLATES: template = site.page_from_repository(item) if template is None: continue skipping_templates.add(template) # also add redirect templates skipping_templates.update( template.getReferences(follow_redirects=False, with_template_inclusion=False, filter_redirects=True, namespaces=site.namespaces.TEMPLATE)) self._skipping_templates[site] = skipping_templates return skipping_templates
[docs] def skip_templates(self, page) -> str: """Check whether the page is to be skipped due to skipping template. @param page: treated page @type page: pywikibot.Page @return: the template which leads to skip """ skipping_templates = self.get_skipping_templates( for template, _ in page.templatesWithParams(): if template in skipping_templates: return template.title(with_ns=False) return ''
[docs] def skip_page(self, page) -> bool: """Skip pages which are unwanted to treat.""" if page.editTime() > self.lastEditBefore: pywikibot.output( 'Last edit on {page} was on {page.latest_revision.timestamp}.' '\nToo recent. Skipping.'.format(page=page)) return True if page.oldest_revision.timestamp > self.pageAgeBefore: pywikibot.output( 'Page creation of {page} on {page.oldest_revision.timestamp} ' 'is too recent. Skipping.'.format(page=page)) return True if page.isCategoryRedirect(): pywikibot.output('{} is a category redirect. Skipping.' .format(page)) return True if page.langlinks(): # FIXME: Implement this pywikibot.output( 'Found language links (interwiki links) for {}.\n' "Haven't implemented that yet so skipping." .format(page)) return True template = self.skip_templates(page) if template: pywikibot.output('%s contains {{%s}}. Skipping.' % (page, template)) return True return super(NewItemRobot, self).skip_page(page)
[docs] def treat_page_and_item(self, page, item) -> None: """Treat page/item.""" if item and item.exists(): pywikibot.output('{0} already has an item: {1}.' .format(page, item)) if self.getOption('touch') is True: self._touch_page(page) return self.create_item_for_page( page, callback=lambda _, exc: self._callback(page, exc))
[docs]def main(*args) -> None: """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: str """ # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) gen = pagegenerators.GeneratorFactory() options = {} for arg in local_args: if arg.startswith(('-pageage:', '-lastedit:')): key, val = arg.split(':', 1) options[key[1:]] = int(val) elif gen.handleArg(arg): pass else: options[arg[1:].lower()] = True generator = gen.getCombinedGenerator(preload=True) if not generator: return bot = NewItemRobot(generator, **options) if not user = pywikibot.User(, if bot.getOption('touch') == 'newly' \ and 'autoconfirmed' not in user.groups(): pywikibot.warning(fill( 'You are logged in as {}, an account that is ' 'not in the autoconfirmed group on {}. Script ' 'will not touch pages linked to newly created ' 'items to avoid triggering edit rates or ' 'captchas. Use -touch param to force this.' .format(user.username, bot.options['touch'] = False
if __name__ == '__main__': main()