"""SPARQL Query interface."""## (C) Pywikibot team, 2016-2024## Distributed under the terms of the MIT license.#from__future__importannotationsfromtextwrapimportfillfromurllib.parseimportquotefromrequests.exceptionsimportTimeoutfrompywikibotimportSitefrompywikibot.backportsimportremoveprefixfrompywikibot.commsimporthttpfrompywikibot.dataimportWaitingMixinfrompywikibot.exceptionsimportError,NoUsernameErrortry:fromrequestsimportJSONDecodeErrorexceptImportError:# requests < 2.27.0fromjsonimportJSONDecodeErrorDEFAULT_HEADERS={'cache-control':'no-cache','Accept':'application/sparql-results+json'}
[docs]classSparqlQuery(WaitingMixin):"""SPARQL Query class. This class allows to run SPARQL queries against any SPARQL endpoint. .. versionchanged:: 8.4 inherited from :class:`data.WaitingMixin` which provides a :meth:`data.WaitingMixin.wait` method. """def__init__(self,endpoint:str|None=None,entity_url:str|None=None,repo=None,max_retries:int|None=None,retry_wait:float|None=None)->None:""" Create endpoint. :param endpoint: SPARQL endpoint URL :param entity_url: URL prefix for any entities returned in a query. :param repo: The Wikibase site which we want to run queries on. If provided this overrides any value in endpoint and entity_url. Defaults to Wikidata. :type repo: pywikibot.site.DataSite :param max_retries: (optional) Maximum number of times to retry after errors, defaults to config.max_retries. :param retry_wait: (optional) Minimum time in seconds to wait after an error, defaults to config.retry_wait seconds (doubles each retry until config.retry_max is reached). """# default to Wikidataifnotrepoandnotendpoint:repo=Site('wikidata')ifrepo:try:self.endpoint=repo.sparql_endpointself.entity_url=repo.concept_base_uriexceptNotImplementedError:raiseNotImplementedError('Wiki version must be 1.28-wmf.23 or newer to ''automatically extract the sparql endpoint. ''Please provide the endpoint and entity_url ''parameters instead of a repo.')ifnotself.endpoint:raiseError(f'The site {repo} does not provide a sparql endpoint.')else:ifnotentity_url:raiseError('If initialised with an endpoint the entity_url ''must be provided.')self.endpoint=endpointself.entity_url=entity_urlself.last_response=Noneifmax_retriesisnotNone:self.max_retries=max_retriesifretry_waitisnotNone:self.retry_wait=retry_wait
[docs]defget_last_response(self):""" Return last received response. :return: Response object from last request or None """returnself.last_response
[docs]defselect(self,query:str,full_data:bool=False,headers:dict[str,str]|None=None)->list[dict[str,str]]|None:""" Run SPARQL query and return the result. The response is assumed to be in format defined by: https://www.w3.org/TR/2013/REC-sparql11-results-json-20130321/ :param query: Query text :param full_data: Whether return full data objects or only values """ifheadersisNone:headers=DEFAULT_HEADERSdata=self.query(query,headers=headers)ifnotdataor'results'notindata:returnNoneresult=[]qvars=data['head']['vars']forrowindata['results']['bindings']:values={}forvarinqvars:ifvarnotinrow:# var is not available (OPTIONAL is probably used)values[var]=Noneeliffull_data:ifrow[var]['type']notinVALUE_TYPES:raiseValueError(f"Unknown type: {row[var]['type']}")valtype=VALUE_TYPES[row[var]['type']]values[var]=valtype(row[var],entity_url=self.entity_url)else:values[var]=row[var]['value']result.append(values)returnresult
[docs]defquery(self,query:str,headers:dict[str,str]|None=None):"""Run SPARQL query and return parsed JSON result. .. versionchanged:: 8.5 :exc:`exceptions.NoUsernameError` is raised if the response looks like the user is not logged in. :param query: Query text :raises NoUsernameError: User not logged in """ifheadersisNone:headers=DEFAULT_HEADERS# force clearedself.last_response=Noneurl=f'{self.endpoint}?query={quote(query)}'whileTrue:try:self.last_response=http.fetch(url,headers=headers)breakexceptTimeout:self.wait()try:returnself.last_response.json()exceptJSONDecodeError:# There is no proper error given but server returns HTML page# in case login isn't valid sotry to guess what the problem is# and notify user instead of silently ignoring it.# This could be made more reliable by fixing the backend.# Note: only raise error when response starts with HTML,# not in case the response otherwise might have it in betweenstrcontent=self.last_response.content.decode()if(strcontent.startswith('<!DOCTYPE html>')and'https://commons-query.wikimedia.org'inurland('Special:UserLogin'instrcontentor'Special:OAuth'instrcontent)):raiseNoUsernameError(fill('User not logged in. You need to log in to Wikimedia ''Commons and give OAUTH permission. Open ''https://commons-query.wikimedia.org with browser to ''login and give permission.'))returnNone
[docs]defask(self,query:str,headers:dict[str,str]|None=None)->bool:""" Run SPARQL ASK query and return boolean result. :param query: Query text """ifheadersisNone:headers=DEFAULT_HEADERSdata=self.query(query,headers=headers)returndata['boolean']
[docs]defget_items(self,query,item_name:str='item',result_type=set):""" Retrieve items which satisfy given query. Items are returned as Wikibase IDs. :param query: Query string. Must contain ?{item_name} as one of the projected values. :param item_name: Name of the value to extract :param result_type: type of the iterable in which SPARQL results are stored (default set) :type result_type: iterable :return: item ids, e.g. Q1234 :rtype: same as result_type """res=self.select(query,full_data=True)ifres:returnresult_type(r[item_name].getID()forrinres)returnresult_type()
[docs]classSparqlNode:"""Base class for SPARQL nodes."""def__init__(self,value)->None:"""Create a SparqlNode."""self.value=valuedef__str__(self)->str:returnself.value
[docs]classURI(SparqlNode):"""Representation of URI result type."""def__init__(self,data:dict,entity_url,**kwargs)->None:"""Create URI object."""super().__init__(data.get('value'))self.entity_url=entity_url
[docs]defgetID(self):# noqa: N802""" Get ID of Wikibase object identified by the URI. :return: ID of Wikibase object, e.g. Q1234 """ifself.value.startswith(self.entity_url):returnremoveprefix(self.value,self.entity_url)returnNone
def__repr__(self)->str:return'<'+self.value+'>'
[docs]classLiteral(SparqlNode):"""Representation of RDF literal result type."""def__init__(self,data:dict,**kwargs)->None:"""Create Literal object."""super().__init__(data.get('value'))self.type=data.get('datatype')self.language=data.get('xml:lang')def__repr__(self)->str:ifself.type:returnself.value+'^^'+self.typeifself.language:returnself.value+'@'+self.languagereturnself.value
[docs]classBnode(SparqlNode):"""Representation of blank node."""def__init__(self,data:dict,**kwargs)->None:"""Create Bnode."""super().__init__(data.get('value'))def__repr__(self)->str:return'_:'+self.value