From 0d149bc9e7173c616f345a344ebc3d04d9d9061f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Wed, 27 Nov 2019 00:38:13 -0500 Subject: [PATCH] build_graph: Mise en place algorithme de construction du graph --- data/build_graph.py | 89 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100755 data/build_graph.py diff --git a/data/build_graph.py b/data/build_graph.py new file mode 100755 index 0000000..470e3e3 --- /dev/null +++ b/data/build_graph.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +from fetch import wikidata +from neo4j import GraphDatabase + + +NEO4J_URI = "bolt://localhost:7687" +NEO4J_USR = "neo4j" +NEO4J_PSW = "test" + +# Conection with Neo4j +driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW)) + + + +def define_link_from_type(link_id): + """ + Define typy of link from ID + + :param link_id: id of link + :return: type corresponding + """ + if link_id in ["P780","P1542"]: + return "Sign_symsymptoms" + elif link_id == "P5642": + return "Risk_factor" + else: + raise Exception("Error : unknow link id: " + link_id) + + + +# Create indexes +with driver.session() as session: + session.run("CREATE INDEX ON :Disease(id);") + session.run("CREATE INDEX ON :Sign_symsymptoms(id);") + session.run("CREATE INDEX ON :Risk_factor(id);") + +# Get all diseases, links, symptoms +request_disease_links = (wikidata.request(""" + SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel + WHERE { + ?maladie ?link ?signe_symptome. + SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}. + VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642} + } + ORDER BY ?maladie +""" +))['results']['bindings'] + +for link in request_disease_links: + disease_id = link["maladie"]["value"].split("/")[-1] + disease_label = link["maladieLabel"]["value"].lower() + disease_type = "Disease" + + link_id = link["link"]["value"].split("/")[-1] + link_label = link["linkLabel"]["value"].lower() + link_type = "link_"+define_link_from_type(link_id) + + signe_symptome_id = link["signe_symptome"]["value"].split("/")[-1] + signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower() + signe_symptome_type = define_link_from_type(link_id) + + with driver.session() as session: + # add dieadiseases + session.run( + "MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})", + disease_id=disease_id, + disease_label=disease_label, + ) + + # add symptoms + session.run( + "MERGE (s:" + signe_symptome_type + " {id:$signe_symptome_id, label:$signe_symptome_label})", + signe_symptome_id=signe_symptome_id, + signe_symptome_label=signe_symptome_label, + ) + + # add link + session.run( + "MATCH (d:" + disease_type + " {id:$disease_id})" + "MATCH (s:" + signe_symptome_type + " {id:$signe_symptome_id})" + "MERGE (d)-[l:" + link_type + " {id:$link_id, label:$link_label}]->(s)", + link_id=link_id, + link_label=link_label, + disease_id=disease_id, + signe_symptome_id=signe_symptome_id + ) + +# Close Neo4j connection +driver.close()