"""Classes for detecting a MediaWiki site."""## (C) Pywikibot team, 2010-2024## Distributed under the terms of the MIT license.#from__future__importannotationsimportjsonimportrefromcontextlibimportsuppressfromhtml.parserimportHTMLParserfromhttpimportHTTPStatusfromurllib.parseimporturljoin,urlparsefromrequests.exceptionsimportRequestExceptionimportpywikibotfrompywikibot.backportsimportremovesuffixfrompywikibot.comms.httpimportfetchfrompywikibot.exceptionsimportClientError,ServerErrorfrompywikibot.toolsimportMediaWikiVersiontry:fromrequestsimportJSONDecodeErrorexceptImportError:# requests < 2.27.0fromjsonimportJSONDecodeErrorSERVER_DB_ERROR_MSG= \
'<h1>Sorry! This site is experiencing technical difficulties.</h1>'MIN_VERSION=MediaWikiVersion('1.27')
[docs]classMWSite:"""Minimal wiki site class."""def__init__(self,fromurl,**kwargs)->None:"""Initializer. :raises pywikibot.exceptions.ServerError: a server error occurred while loading the site :raises Timeout: a timeout occurred while loading the site :raises RuntimeError: Version not found or version less than 1.27 """fromurl=removesuffix(fromurl,'$1')r=fetch(fromurl,**kwargs)check_response(r)iffromurl!=r.url:pywikibot.log(f'{fromurl} redirected to {r.url}')fromurl=r.urlself.fromurl=fromurldata=r.textwp=WikiHTMLPageParser(fromurl)wp.feed(data)self.version=wp.versionself.server=wp.serverself.scriptpath=wp.scriptpathself.articlepath=Noneifself.api:try:self._parse_site()except(ServerError,RequestException):raiseexceptExceptionase:pywikibot.log(f'MW detection failed: {e!r}')ifnotself.version:self._fetch_old_version()ifnotself.api:raiseRuntimeError(f'Unsupported url: {self.fromurl}')ifnotself.versionorself.version<MIN_VERSION:raiseRuntimeError(f'Unsupported version: {self.version}')ifnotself.articlepath:ifself.private_wiki:ifself.api!=self.fromurlandself.private_wiki:self.articlepath=self.fromurl.rsplit('/',1)[0]+'/$1'else:raiseRuntimeError('Unable to determine articlepath because the wiki is ''private. Use the Main Page URL instead of the API.')else:raiseRuntimeError(f'Unable to determine articlepath: {self.fromurl}')def__repr__(self)->str:returnf'{type(self).__name__}("{self.fromurl}")'@propertydeflangs(self):"""Build interwikimap."""response=fetch(self.api+'?action=query&meta=siteinfo&siprop=interwikimap''&sifilteriw=local&format=json')iw=response.json()error=iw.get('error')iferror:raiseRuntimeError(f"{error['code']} - {error['info']}")return[wikiforwikiiniw['query']['interwikimap']if'language'inwiki]def_fetch_old_version(self)->None:"""Extract the version from API help with ?version enabled."""ifself.versionisNone:try:r=fetch(self.api+'?version&format=json')try:d=r.json()exceptJSONDecodeError:# Fallback for old versions which didn't wrap help in jsond={'error':{'*':r.text}}self.version=list(filter(lambdax:x.startswith('MediaWiki'),(line.strip()forlineind['error']['*'].split('\n'))))[0].split()[1]exceptException:passelse:self.version=MediaWikiVersion(self.version)def_parse_site(self)->None:"""Parse siteinfo data."""response=fetch(self.api+'?action=query&meta=siteinfo&format=json')check_response(response)# remove preleading newlines and Byte Order Mark (BOM), see T128992content=response.text.strip().lstrip('\uFEFF')info=json.loads(content)self.private_wiki=('error'ininfoandinfo['error']['code']=='readapidenied')ifself.private_wiki:# user config is not loaded because PYWIKIBOT_NO_USER_CONFIG# is set to '2' by generate_family_file.py.# Prepare a temporary config for login.username=pywikibot.input('Private wiki detected. Login is required.\n''Please enter your username?')# Setup a dummy family so that we can create a site objectfam=pywikibot.family.AutoFamily('temporary_family',self.server+self.scriptpath)site=pywikibot.Site(fam.code,fam,username)site.version=lambda:str(self.version)# Now the site object is able to logininfo=site.siteinfoelse:info=info['query']['general']self.version=MediaWikiVersion.from_generator(info['generator'])ifself.version<MIN_VERSION:returnself.server=urljoin(self.fromurl,info['server'])foritemin['scriptpath','articlepath','lang']:setattr(self,item,info[item])def__eq__(self,other):"""Return True if equal to other."""return(self.server+self.scriptpath==other.server+other.scriptpath)def__hash__(self):"""Get hashable representation."""returnhash(self.server+self.scriptpath)@propertydefapi(self)->str|None:"""Get api URL."""ifself.serverisNoneorself.scriptpathisNone:returnNonereturnself.server+self.scriptpath+'/api.php'@propertydefiwpath(self):"""Get article path URL."""returnself.server+self.articlepath
[docs]classWikiHTMLPageParser(HTMLParser):"""Wiki HTML page parser."""def__init__(self,url)->None:"""Initializer."""super().__init__(convert_charrefs=True)self.url=urlparse(url)self.generator=Noneself.version=Noneself._parsed_url=Noneself.server=Noneself.scriptpath=None
[docs]defset_api_url(self,url)->None:"""Set api_url."""url=url.split('.php',1)[0]try:value,script_name=url.rsplit('/',1)exceptValueError:returnifscript_namenotin('api','load','opensearch_desc'):returnifscript_name=='load':self.set_version(MediaWikiVersion('1.17.0'))ifself._parsed_url:# A Resource Loader link is less reliable than other links.# Resource Loader can load resources from a different site.# e.g. http://kino.skripov.com/index.php/$1# loads resources from http://megawiki.net/returnnew_parsed_url=urlparse(value)ifself._parsed_url:assertnew_parsed_url.path==self._parsed_url.pathifnotnew_parsed_url.schemeornotnew_parsed_url.netloc:new_parsed_url=urlparse(f'{new_parsed_url.schemeorself.url.scheme}://'f'{new_parsed_url.netlocorself.url.netloc}'f'{new_parsed_url.path}')elifself._parsed_url:# allow upgrades to https, but not downgradesifself._parsed_url.scheme=='https' \
andnew_parsed_url.scheme!=self._parsed_url.scheme:return# allow http://www.brickwiki.info/ vs http://brickwiki.info/if(new_parsed_url.netlocinself._parsed_url.netlocorself._parsed_url.netlocinnew_parsed_url.netloc):returnassertnew_parsed_url==self._parsed_url, \
f'{self._parsed_url} != {new_parsed_url}'self._parsed_url=new_parsed_urlself.server=f'{self._parsed_url.scheme}://{self._parsed_url.netloc}'self.scriptpath=self._parsed_url.path
[docs]defhandle_starttag(self,tag,attrs)->None:"""Handle an opening tag."""attrs=dict(attrs)iftag=='meta':ifattrs.get('name')=='generator':self.generator=attrs['content']withsuppress(ValueError):self.version=MediaWikiVersion.from_generator(self.generator)eliftag=='link'and'rel'inattrsand'href'inattrs:ifattrs['rel']in('EditURI','stylesheet','search'):self.set_api_url(attrs['href'])eliftag=='script'and'src'inattrs:self.set_api_url(attrs['src'])
[docs]defcheck_response(response):"""Raise ClientError or ServerError depending on http status. .. versionadded:: 3.0 .. versionchanged:: 7.0 Raise a generic :class:`exceptions.ServerError` if http status code is not IANA-registered but unofficial code .. versionchanged:: 8.1 Raise a :class:`exceptions.ClientError` if status code is 4XX """forstatus_code,err_class,err_typein[(HTTPStatus.INTERNAL_SERVER_ERROR,ServerError,'Server'),(HTTPStatus.BAD_REQUEST,ClientError,'Client')]:# highest http status code firstifresponse.status_code>=status_code:try:status=HTTPStatus(response.status_code)exceptValueErroraserr:m=re.search(r'\d{3}',err.args[0],flags=re.ASCII)ifnotm:raisemsg=f'Generic {err_type} Error ({m.group()})'else:msg=f'({status}) {status.description}'raiseerr_class(msg)ifresponse.status_code==HTTPStatus.OK \
andSERVER_DB_ERROR_MSGinresponse.text:raiseServerError('Server cannot access the database')