Source code for scripts.interwikidata

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Script to handle interwiki links based on Wikibase.

This script connects pages to Wikibase items using language links on the page.
If multiple language links are present, and they are connected to different
items, the bot skips. After connecting the page to an item, language links
can be removed from the page.

These command line parameters can be used to specify which pages to work on:

&params;

Furthermore, the following command line parameters are supported:

-clean            Clean pages.

-create           Create items.

-merge            Merge items.

-summary:         Use your own edit summary for cleaning the page.
"""

# (C) Pywikibot team, 2015-2020
#
# Distributed under the terms of the MIT license.
#
from typing import Set, Union

import pywikibot

from pywikibot import pagegenerators, output, warning
from pywikibot.bot import ExistingPageBot, SingleSiteBot, suggest_help

# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {'&params;': pagegenerators.parameterHelp}  # noqa: N816

# Allowed namespaces. main, project, template, category
NAMESPACES = (0, 4, 10, 14)

# TODO: Some templates on pages, like csd, inuse and afd templates,
# should cause the bot to skip the page, see T134497


[docs]class IWBot(ExistingPageBot, SingleSiteBot): """The bot for interwiki."""
[docs] def __init__(self, **kwargs) -> None: """Initialize the bot.""" self.availableOptions.update({ 'clean': False, 'create': False, 'merge': False, 'summary': None, 'ignore_ns': False, # used by interwikidata_tests only }) super().__init__(**kwargs) if not self.site.has_data_repository: raise ValueError('{site} does not have a data repository, ' 'use interwiki.py instead.'.format( site=self.site)) self.repo = self.site.data_repository() if not self.getOption('summary'): self.options['summary'] = pywikibot.i18n.twtranslate( self.site, 'interwikidata-clean-summary')
[docs] def treat_page(self) -> None: """Check page.""" if (self.current_page.namespace() not in NAMESPACES and not self.getOption('ignore_ns')): output('{page} is not in allowed namespaces, skipping' .format(page=self.current_page.title( as_link=True))) return self.iwlangs = pywikibot.textlib.getLanguageLinks( self.current_page.text, insite=self.current_page.site) if not self.iwlangs: output('No interlanguagelinks on {page}'.format( page=self.current_page.title(as_link=True))) return try: item = pywikibot.ItemPage.fromPage(self.current_page) except pywikibot.NoPage: item = None if item is None: item = self.try_to_add() if self.getOption('create') and item is None: item = self.create_item() else: if self.getOption('merge'): item = self.try_to_merge(item) if item and self.getOption('clean'): self.current_item = item self.clean_page()
[docs] def create_item(self) -> pywikibot.ItemPage: """Create item in repo for current_page.""" data = {'sitelinks': {self.site.dbName(): {'site': self.site.dbName(), 'title': self.current_page.title()} }, 'labels': {self.site.lang: {'language': self.site.lang, 'value': self.current_page.title()} } } for site, page in self.iwlangs.items(): if not page.exists(): continue dbname = site.dbName() title = page.title() data['sitelinks'][dbname] = {'site': dbname, 'title': title} data['labels'][site.lang] = {'language': site.lang, 'value': title} summary = ('Bot: New item with sitelink(s) from ' + self.current_page.title(as_link=True, insite=self.repo)) item = pywikibot.ItemPage(self.repo) item.editEntity(data, new='item', summary=summary) output('Created item {item}'.format(item=item.getID())) return item
[docs] def handle_complicated(self) -> bool: """ Handle pages when they have interwiki conflict. When this method returns True it means conflict has resolved and it's okay to clean old interwiki links. This method should change self.current_item and fix conflicts. Change it in subclasses. """ return False
[docs] def clean_page(self) -> None: """Clean interwiki links from the page.""" if not self.iwlangs: return dbnames = [iw_site.dbName() for iw_site in self.iwlangs] if set(dbnames) - set(self.current_item.sitelinks.keys()): if not self.handle_complicated(): warning('Interwiki conflict in {}, skipping...' .format(self.current_page.title(as_link=True))) return output('Cleaning up the page') new_text = pywikibot.textlib.removeLanguageLinks( self.current_page.text, site=self.current_page.site) self.put_current(new_text, summary=self.getOption('summary'))
[docs] def get_items(self) -> Set[pywikibot.ItemPage]: """Return all items of pages linked through the interwiki.""" wd_data = set() for iw_page in self.iwlangs.values(): if not iw_page.exists(): warning('Interwiki {} does not exist, skipping...' .format(iw_page.title(as_link=True))) continue try: wd_data.add(pywikibot.ItemPage.fromPage(iw_page)) except pywikibot.NoPage: output('Interwiki {} does not have an item' .format(iw_page.title(as_link=True))) return wd_data
[docs] def try_to_add(self) -> Union[pywikibot.ItemPage, bool, None]: """Add current page in repo.""" wd_data = self.get_items() if not wd_data: # will create a new item with interwiki return None if len(wd_data) > 1: warning('Interwiki conflict in {}, skipping...' .format(self.current_page.title(as_link=True))) return False item = list(wd_data).pop() if self.current_page.site.dbName() in item.sitelinks: warning('Interwiki conflict in {}, skipping...' .format(item.title(as_link=True))) return False output('Adding link to ' + item.title()) item.setSitelink(self.current_page, summary='Added ' + ( self.current_page.title(as_link=True, insite=item.site))) return item
[docs] def try_to_merge(self, item) -> Union[pywikibot.ItemPage, bool, None]: """Merge two items.""" wd_data = self.get_items() if not wd_data: # todo: add links to item return None if len(wd_data) > 1: warning('Interwiki conflict in {}, skipping...' .format(self.current_page.title(as_link=True))) return False target_item = list(wd_data).pop() try: item.mergeInto(target_item) except pywikibot.data.api.APIError: # warning already printed by the API return False else: target_item.get(force=True) return target_item
[docs]def main(*args) -> None: """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: str """ local_args = pywikibot.handle_args(args) gen_factory = pagegenerators.GeneratorFactory() options = {} for arg in local_args: if gen_factory.handleArg(arg): continue option, sep, value = arg.partition(':') option = option[1:] if option.startswith('-') else None if option == 'summary': options[option] = value else: options[option] = True site = pywikibot.Site() generator = gen_factory.getCombinedGenerator(preload=True) if generator: bot = IWBot(generator=generator, site=site, **options) bot.run() else: suggest_help(missing_generator=True)
if __name__ == '__main__': main()