From ea0be79129eeb9071d611f141b1521c2a01d4de2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Wed, 27 Nov 2019 00:52:21 -0500 Subject: [PATCH] =?UTF-8?q?build=5Fgraph:=20r=C3=A9organisation=20du=20cod?= =?UTF-8?q?e=20en=20fonction?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data/build_graph.py | 120 +++++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 56 deletions(-) diff --git a/data/build_graph.py b/data/build_graph.py index 470e3e3..a3e794e 100755 --- a/data/build_graph.py +++ b/data/build_graph.py @@ -7,9 +7,6 @@ NEO4J_URI = "bolt://localhost:7687" NEO4J_USR = "neo4j" NEO4J_PSW = "test" -# Conection with Neo4j -driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW)) - def define_link_from_type(link_id): @@ -28,62 +25,73 @@ def define_link_from_type(link_id): -# Create indexes -with driver.session() as session: - session.run("CREATE INDEX ON :Disease(id);") - session.run("CREATE INDEX ON :Sign_symsymptoms(id);") - session.run("CREATE INDEX ON :Risk_factor(id);") - -# Get all diseases, links, symptoms -request_disease_links = (wikidata.request(""" - SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel - WHERE { - ?maladie ?link ?signe_symptome. - SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}. - VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642} - } - ORDER BY ?maladie -""" -))['results']['bindings'] - -for link in request_disease_links: - disease_id = link["maladie"]["value"].split("/")[-1] - disease_label = link["maladieLabel"]["value"].lower() - disease_type = "Disease" - - link_id = link["link"]["value"].split("/")[-1] - link_label = link["linkLabel"]["value"].lower() - link_type = "link_"+define_link_from_type(link_id) - - signe_symptome_id = link["signe_symptome"]["value"].split("/")[-1] - signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower() - signe_symptome_type = define_link_from_type(link_id) - +def create_graph(): + """ + Build and insert graph from wikidata to neo4j + """ + # Create indexes with driver.session() as session: - # add dieadiseases - session.run( - "MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})", - disease_id=disease_id, - disease_label=disease_label, - ) + session.run("CREATE INDEX ON :Disease(id);") + session.run("CREATE INDEX ON :Sign_symsymptoms(id);") + session.run("CREATE INDEX ON :Risk_factor(id);") - # add symptoms - session.run( - "MERGE (s:" + signe_symptome_type + " {id:$signe_symptome_id, label:$signe_symptome_label})", - signe_symptome_id=signe_symptome_id, - signe_symptome_label=signe_symptome_label, - ) + # Get all diseases, links, symptoms + request_disease_links = (wikidata.request(""" + SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel + WHERE { + ?maladie ?link ?signe_symptome. + SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}. + VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642} + } + ORDER BY ?maladie + """ + ))['results']['bindings'] - # add link - session.run( - "MATCH (d:" + disease_type + " {id:$disease_id})" - "MATCH (s:" + signe_symptome_type + " {id:$signe_symptome_id})" - "MERGE (d)-[l:" + link_type + " {id:$link_id, label:$link_label}]->(s)", - link_id=link_id, - link_label=link_label, - disease_id=disease_id, - signe_symptome_id=signe_symptome_id - ) + for link in request_disease_links: + disease_id = link["maladie"]["value"].split("/")[-1] + disease_label = link["maladieLabel"]["value"].lower() + disease_type = "Disease" + + link_id = link["link"]["value"].split("/")[-1] + link_label = link["linkLabel"]["value"].lower() + link_type = "link_"+define_link_from_type(link_id) + + signe_symptome_id = link["signe_symptome"]["value"].split("/")[-1] + signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower() + signe_symptome_type = define_link_from_type(link_id) + + with driver.session() as session: + # add dieadiseases + session.run( + "MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})", + disease_id=disease_id, + disease_label=disease_label, + ) + + # add symptoms + session.run( + "MERGE (s:" + signe_symptome_type + " {id:$signe_symptome_id, label:$signe_symptome_label})", + signe_symptome_id=signe_symptome_id, + signe_symptome_label=signe_symptome_label, + ) + + # add link + session.run( + "MATCH (d:" + disease_type + " {id:$disease_id})" + "MATCH (s:" + signe_symptome_type + " {id:$signe_symptome_id})" + "MERGE (d)-[l:" + link_type + " {id:$link_id, label:$link_label}]->(s)", + link_id=link_id, + link_label=link_label, + disease_id=disease_id, + signe_symptome_id=signe_symptome_id + ) + + + +# Conection with Neo4j +driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW)) + +create_graph() # Close Neo4j connection driver.close()