From ea0be79129eeb9071d611f141b1521c2a01d4de2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Wed, 27 Nov 2019 00:52:21 -0500 Subject: [PATCH 1/3] =?UTF-8?q?build=5Fgraph:=20r=C3=A9organisation=20du?= =?UTF-8?q?=20code=20en=20fonction?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data/build_graph.py | 120 +++++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 56 deletions(-) diff --git a/data/build_graph.py b/data/build_graph.py index 470e3e3..a3e794e 100755 --- a/data/build_graph.py +++ b/data/build_graph.py @@ -7,9 +7,6 @@ NEO4J_URI = "bolt://localhost:7687" NEO4J_USR = "neo4j" NEO4J_PSW = "test" -# Conection with Neo4j -driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW)) - def define_link_from_type(link_id): @@ -28,62 +25,73 @@ def define_link_from_type(link_id): -# Create indexes -with driver.session() as session: - session.run("CREATE INDEX ON :Disease(id);") - session.run("CREATE INDEX ON :Sign_symsymptoms(id);") - session.run("CREATE INDEX ON :Risk_factor(id);") - -# Get all diseases, links, symptoms -request_disease_links = (wikidata.request(""" - SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel - WHERE { - ?maladie ?link ?signe_symptome. - SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}. - VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642} - } - ORDER BY ?maladie -""" -))['results']['bindings'] - -for link in request_disease_links: - disease_id = link["maladie"]["value"].split("/")[-1] - disease_label = link["maladieLabel"]["value"].lower() - disease_type = "Disease" - - link_id = link["link"]["value"].split("/")[-1] - link_label = link["linkLabel"]["value"].lower() - link_type = "link_"+define_link_from_type(link_id) - - signe_symptome_id = link["signe_symptome"]["value"].split("/")[-1] - signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower() - signe_symptome_type = define_link_from_type(link_id) - +def create_graph(): + """ + Build and insert graph from wikidata to neo4j + """ + # Create indexes with driver.session() as session: - # add dieadiseases - session.run( - "MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})", - disease_id=disease_id, - disease_label=disease_label, - ) + session.run("CREATE INDEX ON :Disease(id);") + session.run("CREATE INDEX ON :Sign_symsymptoms(id);") + session.run("CREATE INDEX ON :Risk_factor(id);") - # add symptoms - session.run( - "MERGE (s:" + signe_symptome_type + " {id:$signe_symptome_id, label:$signe_symptome_label})", - signe_symptome_id=signe_symptome_id, - signe_symptome_label=signe_symptome_label, - ) + # Get all diseases, links, symptoms + request_disease_links = (wikidata.request(""" + SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel + WHERE { + ?maladie ?link ?signe_symptome. + SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}. + VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642} + } + ORDER BY ?maladie + """ + ))['results']['bindings'] - # add link - session.run( - "MATCH (d:" + disease_type + " {id:$disease_id})" - "MATCH (s:" + signe_symptome_type + " {id:$signe_symptome_id})" - "MERGE (d)-[l:" + link_type + " {id:$link_id, label:$link_label}]->(s)", - link_id=link_id, - link_label=link_label, - disease_id=disease_id, - signe_symptome_id=signe_symptome_id - ) + for link in request_disease_links: + disease_id = link["maladie"]["value"].split("/")[-1] + disease_label = link["maladieLabel"]["value"].lower() + disease_type = "Disease" + + link_id = link["link"]["value"].split("/")[-1] + link_label = link["linkLabel"]["value"].lower() + link_type = "link_"+define_link_from_type(link_id) + + signe_symptome_id = link["signe_symptome"]["value"].split("/")[-1] + signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower() + signe_symptome_type = define_link_from_type(link_id) + + with driver.session() as session: + # add dieadiseases + session.run( + "MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})", + disease_id=disease_id, + disease_label=disease_label, + ) + + # add symptoms + session.run( + "MERGE (s:" + signe_symptome_type + " {id:$signe_symptome_id, label:$signe_symptome_label})", + signe_symptome_id=signe_symptome_id, + signe_symptome_label=signe_symptome_label, + ) + + # add link + session.run( + "MATCH (d:" + disease_type + " {id:$disease_id})" + "MATCH (s:" + signe_symptome_type + " {id:$signe_symptome_id})" + "MERGE (d)-[l:" + link_type + " {id:$link_id, label:$link_label}]->(s)", + link_id=link_id, + link_label=link_label, + disease_id=disease_id, + signe_symptome_id=signe_symptome_id + ) + + + +# Conection with Neo4j +driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW)) + +create_graph() # Close Neo4j connection driver.close() From 824b97dc2ea3ea9a6e17bd765e589d6cd8e5b443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Wed, 27 Nov 2019 01:27:31 -0500 Subject: [PATCH 2/3] BUILD_GRAPH: ajout allignement entre neo4j et wikimedica --- data/build_graph.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/data/build_graph.py b/data/build_graph.py index a3e794e..7276de4 100755 --- a/data/build_graph.py +++ b/data/build_graph.py @@ -1,12 +1,15 @@ #!/usr/bin/env python from fetch import wikidata from neo4j import GraphDatabase +import json NEO4J_URI = "bolt://localhost:7687" NEO4J_USR = "neo4j" NEO4J_PSW = "test" +ALIGNEMENT_FILE_PATH = "data/alignment_result.json" + def define_link_from_type(link_id): @@ -88,10 +91,29 @@ def create_graph(): +def align_with_wikimedica(): + """ + Align neo4j graph and Wikidata + """ + with open(ALIGNEMENT_FILE_PATH, "r") as align_file: + align = json.loads(align_file.read()) + + for entity in align: + if 'wikidata_id' in entity: + with driver.session() as session: + session.run( + "MATCH (d {id:$wikidata_id})" + "SET d.wikimedia_id = $wikimedica_uri", + wikidata_id=entity['wikidata_id'], + wikimedica_uri=entity['wikimedica_uri'], + ) + + # Conection with Neo4j driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW)) create_graph() +align_with_wikimedica() # Close Neo4j connection driver.close() From 5bebba3e606ba6e7372ea3b0e2a18dc29263e521 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20C=C3=A9r=C3=A8s?= Date: Wed, 27 Nov 2019 03:24:01 -0500 Subject: [PATCH 3/3] build_graph: Transforme les uri en page --- data/build_graph.py | 11 ++++++++--- data/fetch/wikimedica.py | 12 +++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/data/build_graph.py b/data/build_graph.py index 7276de4..6359864 100755 --- a/data/build_graph.py +++ b/data/build_graph.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -from fetch import wikidata +from fetch import wikidata, wikimedica from neo4j import GraphDatabase import json @@ -100,15 +100,20 @@ def align_with_wikimedica(): for entity in align: if 'wikidata_id' in entity: + with driver.session() as session: + wikidata_id = entity['wikidata_id'] + wikidata_page = wikimedica.get_web_page(entity['wikimedica_uri']) + session.run( "MATCH (d {id:$wikidata_id})" "SET d.wikimedia_id = $wikimedica_uri", - wikidata_id=entity['wikidata_id'], - wikimedica_uri=entity['wikimedica_uri'], + wikidata_id=wikidata_id, + wikimedica_uri=wikidata_page, ) + # Conection with Neo4j driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW)) diff --git a/data/fetch/wikimedica.py b/data/fetch/wikimedica.py index 65d991f..0fdb1fa 100644 --- a/data/fetch/wikimedica.py +++ b/data/fetch/wikimedica.py @@ -1,18 +1,20 @@ from .http import session -import rdflib -def request(page, request): +def request(uri, request): """ - Effectue une requête SPARQL sur une page de WikiMedica + Effectue une requête SPARQL depuis une uri de WikiMedica - :param page: Page de WikiMedica ciblé + :param uri: uri de WikiMedica ciblé :param request: Requête SPARQL appliqué :return: Réponse du point d'accés sous forme d'un tableau de dictionaire """ - data = session.get(page, stream=True) + data = session.get(uri, stream=True) g = rdflib.Graph() g.parse(data.raw) qres = g.query(request) return [row.asdict() for row in qres] + +def get_web_page(uri): + return uri.replace("/Special:ExportRDF", "")