build_graph: réorganisation du code en fonction

This commit is contained in:
Rémi Cérès 2019-11-27 00:52:21 -05:00
parent 0d149bc9e7
commit ea0be79129
1 changed files with 64 additions and 56 deletions

View File

@ -7,9 +7,6 @@ NEO4J_URI = "bolt://localhost:7687"
NEO4J_USR = "neo4j" NEO4J_USR = "neo4j"
NEO4J_PSW = "test" NEO4J_PSW = "test"
# Conection with Neo4j
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW))
def define_link_from_type(link_id): def define_link_from_type(link_id):
@ -28,62 +25,73 @@ def define_link_from_type(link_id):
# Create indexes def create_graph():
with driver.session() as session: """
session.run("CREATE INDEX ON :Disease(id);") Build and insert graph from wikidata to neo4j
session.run("CREATE INDEX ON :Sign_symsymptoms(id);") """
session.run("CREATE INDEX ON :Risk_factor(id);") # Create indexes
# Get all diseases, links, symptoms
request_disease_links = (wikidata.request("""
SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel
WHERE {
?maladie ?link ?signe_symptome.
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}.
VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642}
}
ORDER BY ?maladie
"""
))['results']['bindings']
for link in request_disease_links:
disease_id = link["maladie"]["value"].split("/")[-1]
disease_label = link["maladieLabel"]["value"].lower()
disease_type = "Disease"
link_id = link["link"]["value"].split("/")[-1]
link_label = link["linkLabel"]["value"].lower()
link_type = "link_"+define_link_from_type(link_id)
signe_symptome_id = link["signe_symptome"]["value"].split("/")[-1]
signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower()
signe_symptome_type = define_link_from_type(link_id)
with driver.session() as session: with driver.session() as session:
# add dieadiseases session.run("CREATE INDEX ON :Disease(id);")
session.run( session.run("CREATE INDEX ON :Sign_symsymptoms(id);")
"MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})", session.run("CREATE INDEX ON :Risk_factor(id);")
disease_id=disease_id,
disease_label=disease_label,
)
# add symptoms # Get all diseases, links, symptoms
session.run( request_disease_links = (wikidata.request("""
"MERGE (s:" + signe_symptome_type + " {id:$signe_symptome_id, label:$signe_symptome_label})", SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel
signe_symptome_id=signe_symptome_id, WHERE {
signe_symptome_label=signe_symptome_label, ?maladie ?link ?signe_symptome.
) SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}.
VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642}
}
ORDER BY ?maladie
"""
))['results']['bindings']
# add link for link in request_disease_links:
session.run( disease_id = link["maladie"]["value"].split("/")[-1]
"MATCH (d:" + disease_type + " {id:$disease_id})" disease_label = link["maladieLabel"]["value"].lower()
"MATCH (s:" + signe_symptome_type + " {id:$signe_symptome_id})" disease_type = "Disease"
"MERGE (d)-[l:" + link_type + " {id:$link_id, label:$link_label}]->(s)",
link_id=link_id, link_id = link["link"]["value"].split("/")[-1]
link_label=link_label, link_label = link["linkLabel"]["value"].lower()
disease_id=disease_id, link_type = "link_"+define_link_from_type(link_id)
signe_symptome_id=signe_symptome_id
) signe_symptome_id = link["signe_symptome"]["value"].split("/")[-1]
signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower()
signe_symptome_type = define_link_from_type(link_id)
with driver.session() as session:
# add dieadiseases
session.run(
"MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})",
disease_id=disease_id,
disease_label=disease_label,
)
# add symptoms
session.run(
"MERGE (s:" + signe_symptome_type + " {id:$signe_symptome_id, label:$signe_symptome_label})",
signe_symptome_id=signe_symptome_id,
signe_symptome_label=signe_symptome_label,
)
# add link
session.run(
"MATCH (d:" + disease_type + " {id:$disease_id})"
"MATCH (s:" + signe_symptome_type + " {id:$signe_symptome_id})"
"MERGE (d)-[l:" + link_type + " {id:$link_id, label:$link_label}]->(s)",
link_id=link_id,
link_label=link_label,
disease_id=disease_id,
signe_symptome_id=signe_symptome_id
)
# Conection with Neo4j
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW))
create_graph()
# Close Neo4j connection # Close Neo4j connection
driver.close() driver.close()