#!/usr/bin/env python from fetch import wikidata, wikimedica from neo4j import GraphDatabase import json NEO4J_URI = "bolt://localhost:7687" NEO4J_USR = "neo4j" NEO4J_PSW = "test" ALIGNEMENT_FILE_PATH = "data/alignment_result.json" def define_link_from_type(link_id): """ Define typy of link from ID :param link_id: id of link :return: type corresponding """ if link_id in ["P780","P1542"]: return "Sign_symsymptoms" elif link_id == "P5642": return "Risk_factor" else: raise Exception("Error : unknow link id: " + link_id) def create_graph(): """ Build and insert graph from wikidata to neo4j """ # Create indexes with driver.session() as session: session.run("CREATE INDEX ON :Disease(id);") session.run("CREATE INDEX ON :Sign_symsymptoms(id);") session.run("CREATE INDEX ON :Risk_factor(id);") # Get all diseases, links, symptoms request_disease_links = (wikidata.request(""" SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel WHERE { ?maladie ?link ?signe_symptome. SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}. VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642} } ORDER BY ?maladie """ ))['results']['bindings'] for link in request_disease_links: disease_id = link["maladie"]["value"].split("/")[-1] disease_label = link["maladieLabel"]["value"].lower() disease_type = "Disease" link_id = link["link"]["value"].split("/")[-1] link_label = link["linkLabel"]["value"].lower() link_type = "link_"+define_link_from_type(link_id) signe_symptome_id = link["signe_symptome"]["value"].split("/")[-1] signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower() signe_symptome_type = define_link_from_type(link_id) with driver.session() as session: # add dieadiseases session.run( "MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})", disease_id=disease_id, disease_label=disease_label, ) # add symptoms session.run( "MERGE (s:" + signe_symptome_type + " {id:$signe_symptome_id, label:$signe_symptome_label})", signe_symptome_id=signe_symptome_id, signe_symptome_label=signe_symptome_label, ) # add link session.run( "MATCH (d:" + disease_type + " {id:$disease_id})" "MATCH (s:" + signe_symptome_type + " {id:$signe_symptome_id})" "MERGE (d)-[l:" + link_type + " {id:$link_id, label:$link_label}]->(s)", link_id=link_id, link_label=link_label, disease_id=disease_id, signe_symptome_id=signe_symptome_id ) def align_with_wikimedica(): """ Align neo4j graph and Wikidata """ with open(ALIGNEMENT_FILE_PATH, "r") as align_file: align = json.loads(align_file.read()) for entity in align: if 'wikidata_id' in entity: with driver.session() as session: wikidata_id = entity['wikidata_id'] wikidata_page = wikimedica.get_web_page(entity['wikimedica_uri']) session.run( "MATCH (d {id:$wikidata_id})" "SET d.wikimedia_id = $wikimedica_uri", wikidata_id=wikidata_id, wikimedica_uri=wikidata_page, ) # Conection with Neo4j driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW)) create_graph() align_with_wikimedica() # Close Neo4j connection driver.close()