2019-11-26 16:19:21 +00:00
|
|
|
from fetch import wikidata, wikimedica
|
2019-11-26 06:05:31 +00:00
|
|
|
import requests
|
|
|
|
import rdflib
|
|
|
|
|
|
|
|
SIGNES_ET_SYMPTOMES_WIKIMEDICA_PAGE = (
|
|
|
|
'http://wikimedi.ca/wiki/Sp%C3%A9cial:'
|
|
|
|
'Export_RDF/Concept:Signes_et_sympt%C3%B4mes'
|
|
|
|
)
|
|
|
|
|
|
|
|
PREFIX_PROPERTY = "http://wikimedi.ca/wiki/Special:URIResolver/Property-3A"
|
|
|
|
|
|
|
|
USER_AGENT = "WikimedicaDiseaseSearch/0.1 (https://gitlab.com/matteodelabre/wikimedica-disease-search)"
|
|
|
|
|
|
|
|
session = requests.Session()
|
|
|
|
session.headers = {'User-Agent': USER_AGENT}
|
|
|
|
|
|
|
|
|
|
|
|
nb_wikimedica_entity = 0
|
|
|
|
nb_align_by_property = 0
|
|
|
|
nb_align_by_name = 0
|
2019-11-26 16:19:21 +00:00
|
|
|
nb_multi_align_possibility = 0
|
2019-11-26 06:05:31 +00:00
|
|
|
|
|
|
|
# get name and URI from WikiMedica
|
|
|
|
request_name_uri = """
|
|
|
|
SELECT ?name ?uri
|
|
|
|
WHERE {
|
|
|
|
?el rdfs:label ?name .
|
|
|
|
?el rdfs:isDefinedBy ?uri
|
|
|
|
}
|
|
|
|
"""
|
|
|
|
|
2019-11-26 16:19:21 +00:00
|
|
|
entitys_dic = wikimedica.request(
|
2019-11-26 06:05:31 +00:00
|
|
|
SIGNES_ET_SYMPTOMES_WIKIMEDICA_PAGE,
|
|
|
|
request_name_uri
|
|
|
|
)
|
|
|
|
|
|
|
|
nb_wikimedica_entity = len(entitys_dic)
|
|
|
|
|
|
|
|
# Try align entity with Wikidata
|
|
|
|
for entity in entitys_dic:
|
2019-11-26 16:19:21 +00:00
|
|
|
# Pre-traitment on name
|
|
|
|
entity['name'] = (entity['name'].split("(")[0]).strip()
|
|
|
|
name = entity['name']
|
|
|
|
|
2019-11-26 06:05:31 +00:00
|
|
|
print()
|
2019-11-26 16:19:21 +00:00
|
|
|
print("="*10 + " " + name + " " + "="*10)
|
2019-11-26 06:05:31 +00:00
|
|
|
print(entity['uri'])
|
|
|
|
|
|
|
|
# use Wikidata_id property
|
|
|
|
request_prop_Wikidata_id = """
|
|
|
|
PREFIX property: <{}>
|
|
|
|
SELECT ?wikidata_id
|
|
|
|
WHERE {{
|
|
|
|
?el property:Wikidata_id ?wikidata_id .
|
|
|
|
}}
|
|
|
|
""".format(PREFIX_PROPERTY)
|
|
|
|
|
2019-11-26 16:19:21 +00:00
|
|
|
prop_Wikidata_id = wikimedica.request(
|
2019-11-26 06:05:31 +00:00
|
|
|
entity['uri'],
|
|
|
|
request_prop_Wikidata_id
|
|
|
|
)
|
|
|
|
|
|
|
|
if len(prop_Wikidata_id) == 1:
|
|
|
|
nb_align_by_property = nb_align_by_property + 1
|
|
|
|
entity.update(prop_Wikidata_id[0])
|
|
|
|
print("align with 'property:Wikidata_id'")
|
|
|
|
elif len(prop_Wikidata_id) > 1:
|
|
|
|
print("Attention : il y a plus de 1 lien 'property:wikidata_id'")
|
|
|
|
|
|
|
|
# use name of entity
|
|
|
|
else:
|
|
|
|
request_search_by_name = """
|
|
|
|
SELECT ?entity
|
|
|
|
WHERE {{
|
|
|
|
?entity rdfs:label ?entityLabel;
|
|
|
|
wdt:P31 ?type.
|
|
|
|
VALUES ?entityLabel {{"{}"@fr "{}"@fr}}
|
|
|
|
VALUES ?type {{wd:Q12136 wd:Q1441305 wd:Q169872}}
|
|
|
|
}}
|
2019-11-26 16:19:21 +00:00
|
|
|
""".format(name.lower(), name.capitalize())
|
2019-11-26 06:05:31 +00:00
|
|
|
|
2019-11-26 16:19:21 +00:00
|
|
|
list_wikidata_pages_uri = (
|
|
|
|
wikidata.request(request_search_by_name)
|
|
|
|
)['results']['bindings']
|
2019-11-26 06:05:31 +00:00
|
|
|
|
|
|
|
if len(list_wikidata_pages_uri) == 1:
|
|
|
|
nb_align_by_name = nb_align_by_name + 1
|
|
|
|
wikidata_uri = list_wikidata_pages_uri[0]
|
|
|
|
entity['wikidata_id'] = rdflib.term.URIRef(wikidata_uri)
|
|
|
|
print("align with 'name', 1 response")
|
|
|
|
print(wikidata_uri['entity']['value'])
|
|
|
|
elif len(list_wikidata_pages_uri) > 1:
|
2019-11-26 16:19:21 +00:00
|
|
|
nb_multi_align_possibility = nb_multi_align_possibility + 1
|
2019-11-26 06:05:31 +00:00
|
|
|
print("align with 'name', multi response")
|
|
|
|
print(list_wikidata_pages_uri)
|
|
|
|
|
|
|
|
print()
|
2019-11-26 16:19:21 +00:00
|
|
|
print("nb wikimedica_entity : ", nb_wikimedica_entity)
|
|
|
|
print("nb align by property : ", nb_align_by_property)
|
|
|
|
print("nb align by name : ", nb_align_by_name)
|
|
|
|
print("nb multi align possibility : ", nb_multi_align_possibility)
|
|
|
|
print("nb no align : ", nb_wikimedica_entity -
|
|
|
|
nb_align_by_property - nb_align_by_name)
|