#!/usr/bin/env python3"""This bot uploads text from djvu files onto pages in the "Page" namespace... note:: It is intended to be used for Wikisource.The following parameters are supported:-index: name of the index page (without the Index: prefix)-djvu: path to the djvu file, it shall be: .. hlist:: * path to a file name * dir where a djvu file name as index is located optional, by default is current dir '.'-pages:<start>-<end>,...<start>-<end>,<start>-<end> Page range to upload; optional, :samp:`start=1`, :samp:`end={djvu file number of images}`. Page ranges can be specified as:: A-B -> pages A until B A- -> pages A until number of images A -> just page A -B -> pages 1 until BThis script is a :class:`ConfigParserBot <bot.ConfigParserBot>`. Thefollowing options can be set within a settings file which is scripts.iniby default:-summary: [str] Custom edit summary. Use quotes if edit summary contains spaces.-force Overwrites existing text optional, default False.-always Do not bother asking to confirm any of the changes."""## (C) Pywikibot team, 2008-2024## Distributed under the terms of the MIT license.#from__future__importannotationsimportos.pathimportpywikibotfrompywikibotimporti18nfrompywikibot.botimportSingleSiteBotfrompywikibot.exceptionsimportNoPageErrorfrompywikibot.proofreadpageimportProofreadPagefrompywikibot.tools.djvuimportDjVuFile
[docs]classDjVuTextBot(SingleSiteBot):"""A bot that uploads text-layer from djvu files to Page:namespace. Works only on sites with Proofread Page extension installed. .. versionchanged:: 7.0 CheckerBot is a ConfigParserBot """update_options={'force':False,'summary':'',}def__init__(self,djvu,index,pages:tuple|None=None,**kwargs)->None:"""Initializer. :param djvu: djvu from where to fetch the text layer :type djvu: DjVuFile object :param index: index page in the Index: namespace :type index: Page object :param pages: page interval to upload (start, end) """super().__init__(**kwargs)self._djvu=djvuself._index=indexself._prefix=self._index.title(with_ns=False)self._page_ns=self.site._proofread_page_ns.custom_nameifnotpages:self._pages=(1,self._djvu.number_of_images())else:self._pages=pages# Get edit summary message if it's empty.ifnotself.opt.summary:self.opt.summary=i18n.twtranslate(self._index.site,'djvutext-creating')
[docs]defpage_number_gen(self):"""Generate pages numbers from specified page intervals."""last=0forstart,endinsorted(self._pages):start=max(last,start)last=end+1yield fromrange(start,last)
@propertydefgenerator(self):"""Generate pages from specified page interval."""forpage_numberinself.page_number_gen():title=f'{self._page_ns}:{self._prefix}/{page_number}'page=ProofreadPage(self._index.site,title)page.page_number=page_number# remember page number in djvu fileyieldpage
[docs]deftreat(self,page)->None:"""Process one page."""old_text=page.text# Overwrite body of the page with content from djvupage.body=self._djvu.get_page(page.page_number)new_text=page.textifpage.exists()andnotself.opt.force:pywikibot.info(f'Page {page} already exists, not adding!\n''Use -force option to overwrite the output page.')else:self.userPut(page,old_text,new_text,summary=self.opt.summary)
[docs]defmain(*args:str)->None:"""Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. :param args: command line arguments """index=Nonedjvu_path='.'# default djvu file directorypages='1-'options={}# Parse command line arguments.local_args=pywikibot.handle_args(args)forarginlocal_args:opt,_,value=arg.partition(':')ifopt=='-index':index=valueelifopt=='-djvu':djvu_path=valueelifopt=='-pages':pages=valueelifopt=='-summary':options['summary']=valueelifoptin('-force','-always'):options[opt[1:]]=Trueelse:pywikibot.info('Unknown argument '+arg)# index is mandatory.ifnotindex:pywikibot.bot.suggest_help(missing_parameters=['-index'])return# If djvu_path is not a file, build djvu_path from dir+index.djvu_path=os.path.expanduser(djvu_path)djvu_path=os.path.abspath(djvu_path)ifnotos.path.exists(djvu_path):pywikibot.error('No such file or directory: '+djvu_path)returnifos.path.isdir(djvu_path):djvu_path=os.path.join(djvu_path,index)# Check the djvu file exists and, if so, create the DjVuFile wrapper.djvu=DjVuFile(djvu_path)ifnotdjvu.has_text():pywikibot.error(f'No text layer in djvu file {djvu.file}')return# Parse pages param.pages=pages.split(',')fori,page_intervalinenumerate(pages):start,sep,end=page_interval.partition('-')start=int(startor1)end=int(endordjvu.number_of_images())ifsepelsestartpages[i]=(start,end)site=pywikibot.Site()ifnotsite.has_extension('ProofreadPage'):pywikibot.error(f'Site {site} must have ProofreadPage extension.')returnindex_page=pywikibot.Page(site,index,ns=site.proofread_index_ns)ifnotindex_page.exists():raiseNoPageError(index)pywikibot.info(f'uploading text from {djvu.file} to {index_page}')bot=DjVuTextBot(djvu,index_page,pages=pages,site=site,**options)bot.run()