#!/usr/bin/env python3"""Script that preloads site and user info for all sites of given family.The following parameters are supported:-worker:<num> The number of parallel tasks to be run. Default is the number of processors on the machine**Usage:** python pwb.py preload_sites [{<family>}] [-worker:{<num>}]To force preloading, change the global expiry values to 0: python pwb.py -API_config_expiry:0 -API_uinfo_expiry:0 \ preload_sites [{<family>}]or run the :mod:`cache<scripts.maintenance.cache>` script previeously: python pwb.py cache -delete.. versionchanged:: 7.4 script was moved to the framework scripts folder."""## (C) Pywikibot team, 2021-2024## Distributed under the terms of the MIT license.#from__future__importannotationsfromconcurrent.futuresimportThreadPoolExecutor,waitfromdatetimeimportdatetimeimportpywikibotfrompywikibot.backportsimportremoveprefixfrompywikibot.familyimportFamilytry:# Python 3.13fromosimportprocess_cpu_count# type: ignore[attr-defined]exceptImportError:fromosimportcpu_countasprocess_cpu_count#: supported families by this scriptfamilies_list=['wikibooks','wikinews','wikipedia','wikiquote','wikisource','wikiversity','wikivoyage','wiktionary',]# Ignore sites from preloading# example: {'wikiversity': ['beta'], }exceptions:dict[str,list[str]]={}
[docs]defpreload_family(family:str,executor:ThreadPoolExecutor)->None:"""Preload all sites of a single family file. .. versionchanged:: 9.2 use a separate worker thread for each site. """defcreate_page(code,family):"""Preload siteinfo and userinfo."""site=pywikibot.Site(code,family)pywikibot.Page(site,'Main Page')msg='Preloading sites of {} family{}'pywikibot.info(msg.format(family,'...'))codes=Family.load(family).codesforcodeinexceptions.get(family,[]):ifcodeincodes:codes.remove(code)obsolete=Family.load(family).obsoletefutures=set()forcodeincodes:ifcodenotinobsolete:futures.add(executor.submit(create_page,code,family))wait(futures)pywikibot.info(msg.format(family,' completed.'))
[docs]defpreload_families(families:list[str]|set[str],worker:int|None)->None:"""Preload all sites of all given family files. .. versionchanged:: 7.3 Default of worker is calculated like for Python 3.8 but preserves at least one worker for each element in families_list for better performance. """start=datetime.now()ifworkerisNone:# Python 3.13 defaultworker=min(32,(process_cpu_count()or1)+4)# to allow adding futures in preload_family the workers must be one# more than families are handledworker=max(len(families)*2,worker)pywikibot.info(f'Using {worker} workers to process {len(families)} families')withThreadPoolExecutor(worker)asexecutor:futures={executor.submit(preload_family,family,executor)forfamilyinfamilies}wait(futures)pywikibot.info(f'Loading time used: {datetime.now()-start}')