#!/usr/bin/env python from fetch import wikidata from neo4j import GraphDatabase NEO4J_URI = "bolt://localhost:7687" NEO4J_USR = "neo4j" NEO4J_PSW = "test" def define_link_from_type(link_id): """ Define typy of link from ID :param link_id: id of link :return: type corresponding """ if link_id in ["P780","P1542"]: return "Sign_symsymptoms" elif link_id == "P5642": return "Risk_factor" else: raise Exception("Error : unknow link id: " + link_id) def create_graph(): """ Build and insert graph from wikidata to neo4j """ # Create indexes with driver.session() as session: session.run("CREATE INDEX ON :Disease(id);") session.run("CREATE INDEX ON :Sign_symsymptoms(id);") session.run("CREATE INDEX ON :Risk_factor(id);") # Get all diseases, links, symptoms request_disease_links = (wikidata.request(""" SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel WHERE { ?maladie ?link ?signe_symptome. SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}. VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642} } ORDER BY ?maladie """ ))['results']['bindings'] for link in request_disease_links: disease_id = link["maladie"]["value"].split("/")[-1] disease_label = link["maladieLabel"]["value"].lower() disease_type = "Disease" link_id = link["link"]["value"].split("/")[-1] link_label = link["linkLabel"]["value"].lower() link_type = "link_"+define_link_from_type(link_id) signe_symptome_id = link["signe_symptome"]["value"].split("/")[-1] signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower() signe_symptome_type = define_link_from_type(link_id) with driver.session() as session: # add dieadiseases session.run( "MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})", disease_id=disease_id, disease_label=disease_label, ) # add symptoms session.run( "MERGE (s:" + signe_symptome_type + " {id:$signe_symptome_id, label:$signe_symptome_label})", signe_symptome_id=signe_symptome_id, signe_symptome_label=signe_symptome_label, ) # add link session.run( "MATCH (d:" + disease_type + " {id:$disease_id})" "MATCH (s:" + signe_symptome_type + " {id:$signe_symptome_id})" "MERGE (d)-[l:" + link_type + " {id:$link_id, label:$link_label}]->(s)", link_id=link_id, link_label=link_label, disease_id=disease_id, signe_symptome_id=signe_symptome_id ) # Conection with Neo4j driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW)) create_graph() # Close Neo4j connection driver.close()