#!/usr/bin/python3
r"""
Script to delete files that are also present on Wikimedia Commons.
Do not run this script on Wikimedia Commons itself. It works based on
a given array of templates defined below.
Files are downloaded and compared. If the files match, it can be deleted on
the source wiki. If multiple versions of the file exist, the script will not
delete. If the SHA1 comparison is not equal, the script will not delete.
A sysop rights on the local wiki is required if you want all features of
this script to work properly.
This script understands various command-line arguments:
-always run automatically, do not ask any questions. All files
that qualify for deletion are deleted. Reduced screen
output.
-replace replace links if the files are equal and the file names
differ
-replacealways replace links if the files are equal and the file names
differ without asking for confirmation
-replaceloose Do loose replacements. This will replace all occurrences
of the name of the file (and not just explicit file
syntax). This should work to catch all instances of the
file, including where it is used as a template parameter
or in galleries. However, it can also make more mistakes.
-replaceonly Use this if you do not have a local sysop rights, but do
wish to replace links from the NowCommons template.
Example
-------
python pwb.py nowcommons -replaceonly -replaceloose -replacealways -replace
.. note:: This script is a
:py:obj:`ConfigParserBot <pywikibot.bot.ConfigParserBot>`. All options
can be set within a settings file which is scripts.ini by default.
"""
#
# (C) Pywikibot team, 2006-2022
#
# Distributed under the terms of the MIT license.
#
import sys
from itertools import chain
import pywikibot
from pywikibot import i18n
from pywikibot import pagegenerators as pg
from pywikibot.bot import Bot, ConfigParserBot
from pywikibot.exceptions import IsRedirectPageError, NoPageError
from pywikibot.tools import filter_unique
from scripts.image import ImageRobot as ImageBot
nowcommons = {
'_default': [
'NowCommons'
],
'ar': [
'الآن كومنز',
'الآن كومونز',
],
'de': [
'NowCommons',
'NC',
'Nowcommons',
'Now Commons',
'NowCommons/Mängel',
'NC/M',
],
'en': [
'NowCommons',
'Ncd',
],
'eo': [
'Nun en komunejo',
'NowCommons',
],
'fa': [
'موجود در انبار',
'NowCommons',
],
'fr': [
'Image sur Commons',
'DoublonCommons',
'Déjà sur Commons',
'Maintenant sur commons',
'Désormais sur Commons',
'NC',
'NowCommons',
'Nowcommons',
'Sharedupload',
'Sur Commons',
'Sur Commons2',
],
'he': [
'גם בוויקישיתוף'
],
'hu': [
'Azonnali-commons',
'NowCommons',
'Nowcommons',
'NC'
],
'ia': [
'OraInCommons'
],
'it': [
'NowCommons',
],
'ja': [
'NowCommons',
],
'ko': [
'공용중복',
'공용 중복',
'NowCommons',
'Now Commons',
'Nowcommons',
],
'nds-nl': [
'NoenCommons',
'NowCommons',
],
'nl': [
'NuCommons',
'Nucommons',
'NowCommons',
'Nowcommons',
'NCT',
'Nct',
],
'ro': [
'NowCommons'
],
'ru': [
'NowCommons',
'NCT',
'Nowcommons',
'Now Commons',
'Db-commons',
'Перенесено на Викисклад',
'На Викискладе',
],
'sr': [
'NowCommons',
'На Остави',
],
'zh': [
'NowCommons',
'Nowcommons',
'NCT',
],
}
namespace_in_template = [
'en',
'ia',
'it',
'ja',
'ko',
'lt',
'ro',
'zh',
]
[docs]class NowCommonsDeleteBot(Bot, ConfigParserBot):
"""Bot to delete migrated files.
.. versionchanged:: 7.0
NowCommonsDeleteBot is a ConfigParserBot
"""
update_options = {
'replace': False,
'replacealways': False,
'replaceloose': False,
'replaceonly': False,
}
def __init__(self, **kwargs) -> None:
"""Initializer."""
super().__init__(**kwargs)
self.site = pywikibot.Site()
if not self.site.has_image_repository:
sys.exit('There must be a file repository to run this script')
self.commons = self.site.image_repository()
if self.site == self.commons:
sys.exit(
'You cannot run this bot on file repository like Commons.')
self.summary = i18n.twtranslate(self.site,
'imagetransfer-nowcommons_notice')
[docs] def nc_templates_list(self):
"""Return nowcommons templates."""
if self.site.lang in nowcommons:
return nowcommons[self.site.lang]
return nowcommons['_default']
@property
def nc_templates(self):
"""A set of now commons template Page instances."""
if not hasattr(self, '_nc_templates'):
self._nc_templates = {pywikibot.Page(self.site, title, ns=10)
for title in self.nc_templates_list()}
return self._nc_templates
@property
def generator(self):
"""Generator method."""
gens = (t.getReferences(follow_redirects=True, namespaces=[6],
only_template_inclusion=True)
for t in self.nc_templates)
gen = chain(*gens)
gen = filter_unique(gen, key=lambda p: '{}:{}:{}'.format(*p._cmpkey()))
gen = pg.PreloadingGenerator(gen)
return gen
[docs] def find_file_on_commons(self, local_file_page):
"""Find filename on Commons."""
for template_name, params in local_file_page.templatesWithParams():
if template_name not in self.nc_templates:
continue
if not params:
file_on_commons = local_file_page.title(with_ns=False)
elif self.site.lang in namespace_in_template:
skip = False
file_on_commons = None
for par in params:
val = par.split('=')
if len(val) == 1 and not skip:
file_on_commons = par[par.find(':') + 1:]
break
if val[0].strip() == '1':
file_on_commons = \
val[1].strip()[val[1].strip().find(':') + 1:]
break
skip = True
if not file_on_commons:
file_on_commons = local_file_page.title(with_ns=False)
else:
val = params[0].split('=')
if len(val) == 1:
file_on_commons = params[0].strip()
else:
file_on_commons = val[1].strip()
return file_on_commons
[docs] def run(self) -> None:
"""Run the bot."""
commons = self.commons
comment = self.summary
for page in self.generator:
self.current_page = page
try:
local_file_page = pywikibot.FilePage(self.site, page.title())
if local_file_page.file_is_shared():
pywikibot.output('File is already on Commons.')
continue
sha1 = local_file_page.latest_file_info.sha1
file_on_commons = self.find_file_on_commons(local_file_page)
if not file_on_commons:
pywikibot.output('NowCommons template not found.')
continue
commons_file_page = pywikibot.FilePage(commons, 'File:'
+ file_on_commons)
if (local_file_page.title(with_ns=False)
!= commons_file_page.title(with_ns=False)):
using_pages = list(local_file_page.using_pages())
if using_pages and using_pages != [local_file_page]:
pywikibot.output(
'"<<lightred>>{}<<default>>" is still used in {} '
'pages.'.format(
local_file_page.title(with_ns=False),
len(using_pages)))
if self.opt.replace:
pywikibot.output(
'Replacing "<<lightred>>{}<<default>>" by '
'"<<lightgreen>>{}<<default>>".'.format(
local_file_page.title(with_ns=False),
commons_file_page.title(with_ns=False)))
bot = ImageBot(
local_file_page.usingPages(),
local_file_page.title(with_ns=False),
commons_file_page.title(with_ns=False),
always=self.opt.replacealways,
loose=self.opt.replaceloose)
bot.run()
# If the image is used with the urlname the
# previous function won't work
is_used = bool(list(pywikibot.FilePage(
self.site,
page.title()).using_pages(total=1)))
if is_used and self.opt.replaceloose:
bot = ImageBot(
local_file_page.usimgPages(),
local_file_page.title(with_ns=False,
as_url=True),
commons_file_page.title(with_ns=False),
always=self.opt.replacealways,
loose=self.opt.replaceloose)
bot.run()
# refresh because we want the updated list
using_pages = len(list(pywikibot.FilePage(
self.site, page.title()).using_pages()))
else:
pywikibot.output('Please change them manually.')
continue
pywikibot.output(
'No page is using "<<lightgreen>>{}<<default>>" '
'anymore.'.format(
local_file_page.title(with_ns=False)))
commons_text = commons_file_page.get()
if not self.opt.replaceonly:
if sha1 == commons_file_page.latest_file_info.sha1:
pywikibot.output(
'The file is identical to the one on Commons.')
if len(local_file_page.get_file_history()) > 1:
pywikibot.output(
'This file has a version history. Please '
'delete it manually after making sure that '
'the old versions are not worth keeping.')
continue
if self.opt.always is False:
format_str = (
'\n\n>>>> Description on '
'<<<lightpurple>>{}<<default>> <<<<\n'
)
pywikibot.output(format_str.format(page.title()))
pywikibot.output(local_file_page.get())
pywikibot.output(
format_str.format(commons_file_page.title()))
pywikibot.output(commons_text)
if pywikibot.input_yn(
'Does the description on Commons contain '
'all required source and license\n'
'information?',
default=False, automatic_quit=False):
local_file_page.delete(
'{} [[:commons:File:{}]]'
.format(comment, file_on_commons),
prompt=False)
else:
local_file_page.delete(
comment + ' [[:commons:File:{}]]'
.format(file_on_commons),
prompt=False)
else:
pywikibot.output('The file is not identical to '
'the one on Commons.')
except (NoPageError, IsRedirectPageError) as e:
pywikibot.output(str(e[0]))
continue
else:
self.counter['read'] += 1
if not self.counter['read']:
pywikibot.output('No transcluded files found for {}.'
.format(self.nc_templates_list()[0]))
self.exit()
[docs]def main(*args: str) -> None:
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
:param args: command line arguments
"""
options = {}
for arg in pywikibot.handle_args(args):
if arg == '-replacealways':
options['replace'] = True
options['replacealways'] = True
elif arg.startswith('-') and arg[1:] in ('always',
'replace',
'replaceloose',
'replaceonly'):
options[arg[1:]] = True
bot = NowCommonsDeleteBot(**options)
bot.run()
if __name__ == '__main__':
main()