diff --git a/data/alignement.py b/data/alignement.py index 77e9e98..65a7474 100644 --- a/data/alignement.py +++ b/data/alignement.py @@ -1,6 +1,7 @@ from fetch import wikidata, wikimedica import requests import rdflib +import json SIGNES_ET_SYMPTOMES_WIKIMEDICA_PAGE = ( 'http://wikimedi.ca/wiki/Sp%C3%A9cial:' @@ -22,10 +23,10 @@ nb_multi_align_possibility = 0 # get name and URI from WikiMedica request_name_uri = """ - SELECT ?name ?uri + SELECT ?name ?wikimedica_uri WHERE { ?el rdfs:label ?name . - ?el rdfs:isDefinedBy ?uri + ?el rdfs:isDefinedBy ?wikimedica_uri } """ @@ -41,10 +42,11 @@ for entity in entitys_dic: # Pre-traitment on name entity['name'] = (entity['name'].split("(")[0]).strip() name = entity['name'] + wikimedica_uri = entity['wikimedica_uri'] print() print("="*10 + " " + name + " " + "="*10) - print(entity['uri']) + print(wikimedica_uri) # use Wikidata_id property request_prop_Wikidata_id = """ @@ -56,7 +58,7 @@ for entity in entitys_dic: """.format(PREFIX_PROPERTY) prop_Wikidata_id = wikimedica.request( - entity['uri'], + wikimedica_uri, request_prop_Wikidata_id ) @@ -85,10 +87,14 @@ for entity in entitys_dic: if len(list_wikidata_pages_uri) == 1: nb_align_by_name = nb_align_by_name + 1 - wikidata_uri = list_wikidata_pages_uri[0] - entity['wikidata_id'] = rdflib.term.URIRef(wikidata_uri) + + entity['wikidata_id'] = ( + list_wikidata_pages_uri[0]['entity']['value'] + ).split("/")[-1] + print("align with 'name', 1 response") - print(wikidata_uri['entity']['value']) + print(entity['wikidata_id']) + elif len(list_wikidata_pages_uri) > 1: nb_multi_align_possibility = nb_multi_align_possibility + 1 print("align with 'name', multi response") @@ -101,3 +107,8 @@ print("nb align by name : ", nb_align_by_name) print("nb multi align possibility : ", nb_multi_align_possibility) print("nb no align : ", nb_wikimedica_entity - nb_align_by_property - nb_align_by_name) + +# export result in file +with open("./data/alignment_result.json", "w") as result_file: + json_content = json.dumps(entitys_dic, sort_keys=True, indent=4) + result_file.write(json_content)