Merge branch 'master' of gitlab.com:matteodelabre/wikimedica-disease-search
This commit is contained in:
commit
63f3aaa7bc
|
@ -0,0 +1,131 @@
|
|||
import requests
|
||||
import rdflib
|
||||
|
||||
SIGNES_ET_SYMPTOMES_WIKIMEDICA_PAGE = (
|
||||
'http://wikimedi.ca/wiki/Sp%C3%A9cial:'
|
||||
'Export_RDF/Concept:Signes_et_sympt%C3%B4mes'
|
||||
)
|
||||
|
||||
PREFIX_PROPERTY = "http://wikimedi.ca/wiki/Special:URIResolver/Property-3A"
|
||||
|
||||
WIKIDATA_SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
|
||||
|
||||
USER_AGENT = "WikimedicaDiseaseSearch/0.1 (https://gitlab.com/matteodelabre/wikimedica-disease-search)"
|
||||
|
||||
session = requests.Session()
|
||||
session.headers = {'User-Agent': USER_AGENT}
|
||||
|
||||
def request_wikimedica(page, request):
|
||||
"""
|
||||
Effectue une requête SPARQL sur une page de WikiMedica
|
||||
|
||||
:param page: Page de WikiMedica ciblé
|
||||
:param request: Requête SPARQL appliqué
|
||||
"""
|
||||
data = session.get(page, stream=True)
|
||||
|
||||
g = rdflib.Graph()
|
||||
g.parse(data.raw)
|
||||
qres = g.query(request)
|
||||
|
||||
return [row.asdict() for row in qres]
|
||||
|
||||
|
||||
def request_endpoint(endpoint, request):
|
||||
"""
|
||||
Effectue une requête SPARQL sur point d'accés
|
||||
|
||||
:param page: Adresse du point d'accés
|
||||
:param request: Requête SPARQL appliqué
|
||||
"""
|
||||
res = session.get(endpoint, params={'format': 'json' ,'query': request})
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception('Erreur {}'.format(res.status_code))
|
||||
|
||||
return res.json()
|
||||
|
||||
|
||||
nb_wikimedica_entity = 0
|
||||
nb_align_by_property = 0
|
||||
nb_align_by_name = 0
|
||||
nb_align_conflict = 0
|
||||
|
||||
# get name and URI from WikiMedica
|
||||
request_name_uri = """
|
||||
SELECT ?name ?uri
|
||||
WHERE {
|
||||
?el rdfs:label ?name .
|
||||
?el rdfs:isDefinedBy ?uri
|
||||
}
|
||||
"""
|
||||
|
||||
entitys_dic = request_wikimedica(
|
||||
SIGNES_ET_SYMPTOMES_WIKIMEDICA_PAGE,
|
||||
request_name_uri
|
||||
)
|
||||
|
||||
nb_wikimedica_entity = len(entitys_dic)
|
||||
|
||||
# Try align entity with Wikidata
|
||||
for entity in entitys_dic:
|
||||
print()
|
||||
print("="*10 + " " + entity['name'] + " " + "="*10)
|
||||
print(entity['uri'])
|
||||
|
||||
|
||||
# use Wikidata_id property
|
||||
request_prop_Wikidata_id = """
|
||||
PREFIX property: <{}>
|
||||
SELECT ?wikidata_id
|
||||
WHERE {{
|
||||
?el property:Wikidata_id ?wikidata_id .
|
||||
}}
|
||||
""".format(PREFIX_PROPERTY)
|
||||
|
||||
prop_Wikidata_id = request_wikimedica(
|
||||
entity['uri'],
|
||||
request_prop_Wikidata_id
|
||||
)
|
||||
|
||||
if len(prop_Wikidata_id) == 1:
|
||||
nb_align_by_property = nb_align_by_property + 1
|
||||
entity.update(prop_Wikidata_id[0])
|
||||
print("align with 'property:Wikidata_id'")
|
||||
elif len(prop_Wikidata_id) > 1:
|
||||
print("Attention : il y a plus de 1 lien 'property:wikidata_id'")
|
||||
|
||||
|
||||
# use name of entity
|
||||
else:
|
||||
request_search_by_name = """
|
||||
SELECT ?entity
|
||||
WHERE {{
|
||||
?entity rdfs:label ?entityLabel;
|
||||
wdt:P31 ?type.
|
||||
VALUES ?entityLabel {{"{}"@fr "{}"@fr}}
|
||||
VALUES ?type {{wd:Q12136 wd:Q1441305 wd:Q169872}}
|
||||
}}
|
||||
""".format((entity['name']).lower(), (entity['name']).capitalize())
|
||||
|
||||
list_wikidata_pages_uri = (request_endpoint(
|
||||
WIKIDATA_SPARQL_ENDPOINT,
|
||||
request_search_by_name
|
||||
))['results']['bindings']
|
||||
|
||||
if len(list_wikidata_pages_uri) == 1:
|
||||
nb_align_by_name = nb_align_by_name + 1
|
||||
wikidata_uri = list_wikidata_pages_uri[0]
|
||||
entity['wikidata_id'] = rdflib.term.URIRef(wikidata_uri)
|
||||
print("align with 'name', 1 response")
|
||||
print(wikidata_uri['entity']['value'])
|
||||
elif len(list_wikidata_pages_uri) > 1:
|
||||
nb_align_conflict = nb_align_conflict + 1
|
||||
print("align with 'name', multi response")
|
||||
print(list_wikidata_pages_uri)
|
||||
|
||||
print()
|
||||
print("nb_wikimedica_entity : ", nb_wikimedica_entity)
|
||||
print("nb_align_by_property : ", nb_align_by_property)
|
||||
print("nb_align_by_name : ", nb_align_by_name)
|
||||
print("nb_align_conflict : ", nb_align_conflict)
|
Loading…
Reference in New Issue