#!/usr/bin/python3
"""Tests for archivebot scripts."""
#
# (C) Pywikibot team, 2014-2022
#
# Distributed under the terms of the MIT license.
#
import unittest
from contextlib import suppress
from datetime import datetime
import pywikibot
from pywikibot.exceptions import Error
from pywikibot.textlib import TimeStripper
from scripts import archivebot
from tests.aspects import TestCase
THREADS = {
'als': 4, 'ar': 1, 'bar': 0, 'bg': 0, 'bjn': 1, 'bs': 0, 'ca': 5, 'ckb': 2,
'cs': 0, 'de': 1, 'en': 25, 'eo': 2, 'es': 13, 'fa': 2, 'fr': 25, 'frr': 2,
'hi': 0, 'hr': 2, 'hu': 5, 'id': 3, 'it': 25, 'ja': 4, 'la': 0, 'lt': 1,
'nl': 9, 'nn': 0, 'no': 0, 'pdc': 25, 'pfl': 3, 'pl': 8, 'pt': 0, 'ro': 1,
'ru': 20, 'scn': 2, 'simple': 1, 'sr': 0, 'sv': 5, 'th': 1, 'tr': 7,
'ug': 0, 'uk': 1, 'uz': 1, 'vi': 1, 'zh': 4, 'zh-yue': 2,
}
THREADS_WITH_UPDATED_FORMAT = {
'eo': 1, 'pdc': 1,
}
[docs]class TestArchiveBotFunctionsWithSites(TestCase):
"""Test functions dependent to sites in archivebot."""
sites = {
'enwiki': {
'family': 'wikipedia',
'code': 'en',
},
'frwikt': {
'family': 'wiktionary',
'code': 'fr',
},
'jawiki': {
'family': 'wikipedia',
'code': 'ja',
}
}
[docs] def test_str2localized_duration_English(self):
"""Test English localizations of duration."""
site = self.get_site('enwiki')
self.assertEqual(
archivebot.str2localized_duration(site, '0s'), '0 seconds')
self.assertEqual(
archivebot.str2localized_duration(site, '1d'), '1 day')
self.assertEqual(
archivebot.str2localized_duration(site, '10h'), '10 hours')
[docs] def test_str2localized_duration_French(self):
"""Test French localizations of duration."""
site = self.get_site('frwikt')
self.assertEqual(
archivebot.str2localized_duration(site, '10d'), '10 jours')
self.assertEqual(archivebot.str2localized_duration(site, '1y'), '1 an')
[docs] def test_str2localized_duration_Japanese(self):
"""Test Japanese localizations of duration."""
site = self.get_site('jawiki')
self.assertEqual(
archivebot.str2localized_duration(site, '4000s'), '4000 秒')
[docs]class TestArchiveBotFunctions(TestCase):
"""Test functions in archivebot."""
net = False
[docs] def test_str2size(self):
"""Test for parsing the shorthand notation of sizes."""
self.assertEqual(archivebot.str2size('0'), (0, 'B'))
self.assertEqual(archivebot.str2size('3000'), (3000, 'B'))
self.assertEqual(archivebot.str2size('4 K'), (4096, 'B'))
self.assertEqual(archivebot.str2size('1 M'), (1_048_576, 'B'))
self.assertEqual(archivebot.str2size('2T'), (2, 'T'))
self.assertEqual(archivebot.str2size('2 000'), (2000, 'B'))
self.assertEqual(archivebot.str2size('2 000B'), (2000, 'B'))
self.assertEqual(archivebot.str2size('2 000 B'), (2000, 'B'))
[docs] def test_str2size_failures(self):
"""Test for rejecting of invalid shorthand notation of sizes."""
with self.assertRaises(archivebot.MalformedConfigError):
archivebot.str2size('4 KK')
with self.assertRaises(archivebot.MalformedConfigError):
archivebot.str2size('K4')
with self.assertRaises(archivebot.MalformedConfigError):
archivebot.str2size('4X')
with self.assertRaises(archivebot.MalformedConfigError):
archivebot.str2size('1 234 56')
with self.assertRaises(archivebot.MalformedConfigError):
archivebot.str2size('1234 567')
[docs]class TestArchiveBot(TestCase):
"""Test archivebot script on 40+ Wikipedia sites."""
family = 'wikipedia'
sites = {code: {'family': 'wikipedia', 'code': code} for code in THREADS}
cached = True
expected_failures = ['ar', 'scn', 'th']
def test_archivebot(self, code=None):
"""Test archivebot for one site."""
site = self.get_site(code)
page = pywikibot.Page(site, 'user talk:xqt')
talk = archivebot.DiscussionPage(page, None)
self.assertIsInstance(talk.archives, dict)
self.assertIsInstance(talk.archived_threads, int)
self.assertTrue(talk.archiver is None)
self.assertIsInstance(talk.header, str)
self.assertIsInstance(talk.timestripper, TimeStripper)
self.assertIsInstance(talk.threads, list)
self.assertGreaterEqual(
len(talk.threads), THREADS[code],
'{} Threads found on {},\n{} or more expected'
.format(len(talk.threads), talk, THREADS[code]))
for thread in talk.threads:
with self.subTest(thread=thread.title,
content=thread.content[-72:]):
self.assertIsInstance(thread, archivebot.DiscussionThread)
self.assertIsInstance(thread.title, str)
self.assertIsInstance(thread.ts, TimeStripper)
self.assertEqual(thread.ts, talk.timestripper)
self.assertIsInstance(thread.code, str)
self.assertEqual(thread.code, talk.timestripper.site.code)
self.assertIsInstance(thread.content, str)
self.assertIsInstance(thread.timestamp, datetime)
# FIXME: see TestArchiveBotAfterDateUpdate()
# 'ar': Uses Arabic acronym for TZ
# 'eo': changed month name setting in wiki from Sep to sep
# Localisation updates from https://translatewiki.net.
# Change-Id: I3d9b14ae3a5d77fea9694ef113b0180e5677c39e
# ref: mediawiki languages/i18n/eo.json
# for new entries it should work
# 'pdc': changed month name setting in wiki over time (?)
# in old posts in talk page, February is "Feb.", site message gives
# <message name="feb" xml:space="preserve">Han.</message>.
# for new entries it should work
# 'th': year is 2552 while regex assumes 19..|20.., might be fixed
[docs]class TestArchiveBotAfterDateUpdate(TestCase):
"""
Test archivebot script on failures on Wikipedia sites.
If failure is due to updated date format on wiki, test pages with
new format only.
"""
family = 'wikipedia'
sites = {code: {'family': 'wikipedia', 'code': code}
for code in THREADS_WITH_UPDATED_FORMAT}
cached = True
def test_archivebot(self, code=None):
"""Test archivebot for one site."""
site = self.get_site(code)
page = pywikibot.Page(site, 'user talk:mpaa')
talk = archivebot.DiscussionPage(page, None)
self.assertIsInstance(talk.archives, dict)
self.assertIsInstance(talk.archived_threads, int)
self.assertTrue(talk.archiver is None)
self.assertIsInstance(talk.header, str)
self.assertIsInstance(talk.timestripper, TimeStripper)
self.assertIsInstance(talk.threads, list)
self.assertGreaterEqual(
len(talk.threads), THREADS_WITH_UPDATED_FORMAT[code],
'{} Threads found on {},\n{} or more expected'
.format(len(talk.threads), talk,
THREADS_WITH_UPDATED_FORMAT[code]))
for thread in talk.threads:
with self.subTest(thread=thread.title,
content=thread.content[-72:]):
self.assertIsInstance(thread, archivebot.DiscussionThread)
self.assertIsInstance(thread.title, str)
self.assertIsInstance(thread.ts, TimeStripper)
self.assertEqual(thread.ts, talk.timestripper)
self.assertIsInstance(thread.code, str)
self.assertEqual(thread.code, talk.timestripper.site.code)
self.assertIsInstance(thread.content, str)
self.assertIsInstance(thread.timestamp, datetime)
[docs]class TestDiscussionPageObject(TestCase):
"""Test DiscussionPage object."""
cached = True
family = 'wikipedia'
code = 'test'
[docs] def load_page(self, title: str):
"""Load the given page."""
page = pywikibot.Page(self.site, title)
tmpl = pywikibot.Page(self.site, 'User:MiszaBot/config')
archiver = archivebot.PageArchiver(page=page, template=tmpl, salt='')
page = archivebot.DiscussionPage(page, archiver)
page.load_page()
self.page = page
[docs] def testThreadsWithSubsections(self):
"""Test recognizing threads with subsections.
Talk:For-pywikibot-archivebot/subsections must have::
{{User:MiszaBot/config
|archive = Talk:Main_Page/archive
|algo = old(30d)
}}
= Front matter =
placeholder
== A ==
foo bar
=== A1 ===
foo bar bar
==== A11 ====
foo
== B ==
foo bar bar bar
"""
self.load_page('Talk:For-pywikibot-archivebot/testcase2')
self.assertEqual([x.title for x in self.page.threads], ['A', 'B'])
[docs] def test_is_full_method(self):
"""Test DiscussionPage.is_full method."""
self.load_page('Talk:For-pywikibot-archivebot')
page = self.page
self.assertEqual(page.archiver.maxsize, 2_096_128)
self.assertEqual(page.size(), 181)
self.assertTrue(page.is_full((100, 'B')))
page.full = False
self.assertFalse(page.is_full((1000, 'B')))
page.full = False
self.assertFalse(page.is_full((3, 'T')))
page.full = False
self.assertTrue(page.is_full((2, 'T')))
self.assertTrue(page.is_full((3, 'T'))) # page.full is kept
page.full = False
page.archiver.maxsize = 100
self.assertTrue(page.is_full((1000, 'B'))) # maxsize is used
[docs]class TestPageArchiverObject(TestCase):
"""Test PageArchiver object."""
cached = True
family = 'wikipedia'
code = 'test'
[docs] def testLoadConfigInTemplateNamespace(self):
"""Test loading of config with TEMPLATE_PAGE in Template ns.
Talk:For-pywikibot-archivebot-01 must have::
{{Pywikibot_archivebot
|archive = Talk:Main_Page/archive
|algo = old(30d)
}}
"""
site = self.get_site()
page = pywikibot.Page(site, 'Talk:For-pywikibot-archivebot-01')
# TEMPLATE_PAGE assumed in ns=10 if ns is not explicit.
tmpl_with_ns = pywikibot.Page(site, 'Template:Pywikibot_archivebot')
tmpl_without_ns = pywikibot.Page(site, 'Pywikibot_archivebot', ns=10)
try:
archivebot.PageArchiver(page, tmpl_with_ns, '')
except Error as e:
self.fail(f'PageArchiver() raised {e}!')
try:
archivebot.PageArchiver(page, tmpl_without_ns, '')
except Error as e:
self.fail(f'PageArchiver() raised {e}!')
[docs] def testLoadConfigInOtherNamespace(self):
"""Test loading of config with TEMPLATE_PAGE not in Template ns.
Talk:For-pywikibot-archivebot must have::
{{User:MiszaBot/config
|archive = Talk:Main_Page/archive
|algo = old(30d)
}}
"""
site = self.get_site()
page = pywikibot.Page(site, 'Talk:For-pywikibot-archivebot')
tmpl_with_ns = pywikibot.Page(site, 'User:MiszaBot/config', ns=10)
tmpl_without_ns = pywikibot.Page(site, 'MiszaBot/config', ns=10)
# TEMPLATE_PAGE assumed in ns=10 if ns is not explicit.
try:
archivebot.PageArchiver(page, tmpl_with_ns, '')
except Error as e:
self.fail(f'PageArchiver() raised {e}!')
with self.assertRaises(archivebot.MissingConfigError):
archivebot.PageArchiver(page, tmpl_without_ns, '')
if __name__ == '__main__': # pragma: no cover
with suppress(SystemExit):
unittest.main()