Merge branch 'master' of gitlab.com:matteodelabre/wikimedica-disease-search
This commit is contained in:
		
						commit
						e551df60ec
					
				|  | @ -1,14 +1,14 @@ | |||
| #!/usr/bin/env python | ||||
| from fetch import wikidata | ||||
| from fetch import wikidata, wikimedica | ||||
| from neo4j import GraphDatabase | ||||
| import json | ||||
| 
 | ||||
| 
 | ||||
| NEO4J_URI = "bolt://localhost:7687" | ||||
| NEO4J_USR = "neo4j" | ||||
| NEO4J_PSW = "test" | ||||
| 
 | ||||
| # Conection with Neo4j | ||||
| driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW)) | ||||
| ALIGNEMENT_FILE_PATH = "data/alignment_result.json" | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
|  | @ -28,62 +28,97 @@ def define_link_from_type(link_id): | |||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| # Create indexes | ||||
| with driver.session() as session: | ||||
|         session.run("CREATE INDEX ON :Disease(id);") | ||||
|         session.run("CREATE INDEX ON :Sign_symsymptoms(id);") | ||||
|         session.run("CREATE INDEX ON :Risk_factor(id);") | ||||
| 
 | ||||
| # Get all diseases, links, symptoms | ||||
| request_disease_links = (wikidata.request(""" | ||||
|     SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel | ||||
|     WHERE { | ||||
|     ?maladie ?link ?signe_symptome. | ||||
|     SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}. | ||||
|     VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642} | ||||
|     } | ||||
|     ORDER BY ?maladie | ||||
| """ | ||||
| ))['results']['bindings'] | ||||
| 
 | ||||
| for link in request_disease_links: | ||||
|     disease_id = link["maladie"]["value"].split("/")[-1] | ||||
|     disease_label = link["maladieLabel"]["value"].lower() | ||||
|     disease_type = "Disease" | ||||
| 
 | ||||
|     link_id = link["link"]["value"].split("/")[-1] | ||||
|     link_label = link["linkLabel"]["value"].lower() | ||||
|     link_type = "link_"+define_link_from_type(link_id) | ||||
| 
 | ||||
|     signe_symptome_id = link["signe_symptome"]["value"].split("/")[-1] | ||||
|     signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower() | ||||
|     signe_symptome_type = define_link_from_type(link_id) | ||||
| 
 | ||||
| def create_graph(): | ||||
|     """ | ||||
|     Build and insert graph from wikidata to neo4j | ||||
|     """ | ||||
|     # Create indexes | ||||
|     with driver.session() as session: | ||||
|         # add dieadiseases | ||||
|         session.run( | ||||
|             "MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})", | ||||
|             disease_id=disease_id, | ||||
|             disease_label=disease_label, | ||||
|         ) | ||||
|             session.run("CREATE INDEX ON :Disease(id);") | ||||
|             session.run("CREATE INDEX ON :Sign_symsymptoms(id);") | ||||
|             session.run("CREATE INDEX ON :Risk_factor(id);") | ||||
| 
 | ||||
|         # add symptoms | ||||
|         session.run( | ||||
|             "MERGE (s:" + signe_symptome_type + " {id:$signe_symptome_id, label:$signe_symptome_label})", | ||||
|             signe_symptome_id=signe_symptome_id, | ||||
|             signe_symptome_label=signe_symptome_label, | ||||
|         ) | ||||
|     # Get all diseases, links, symptoms | ||||
|     request_disease_links = (wikidata.request(""" | ||||
|         SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel | ||||
|         WHERE { | ||||
|         ?maladie ?link ?signe_symptome. | ||||
|         SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}. | ||||
|         VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642} | ||||
|         } | ||||
|         ORDER BY ?maladie | ||||
|     """ | ||||
|     ))['results']['bindings'] | ||||
| 
 | ||||
|         # add link | ||||
|         session.run( | ||||
|             "MATCH (d:" + disease_type + " {id:$disease_id})" | ||||
|             "MATCH (s:" + signe_symptome_type + " {id:$signe_symptome_id})" | ||||
|             "MERGE (d)-[l:" + link_type + " {id:$link_id, label:$link_label}]->(s)", | ||||
|             link_id=link_id, | ||||
|             link_label=link_label, | ||||
|             disease_id=disease_id, | ||||
|             signe_symptome_id=signe_symptome_id | ||||
|         ) | ||||
|     for link in request_disease_links: | ||||
|         disease_id = link["maladie"]["value"].split("/")[-1] | ||||
|         disease_label = link["maladieLabel"]["value"].lower() | ||||
|         disease_type = "Disease" | ||||
| 
 | ||||
|         link_id = link["link"]["value"].split("/")[-1] | ||||
|         link_label = link["linkLabel"]["value"].lower() | ||||
|         link_type = "link_"+define_link_from_type(link_id) | ||||
| 
 | ||||
|         signe_symptome_id = link["signe_symptome"]["value"].split("/")[-1] | ||||
|         signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower() | ||||
|         signe_symptome_type = define_link_from_type(link_id) | ||||
| 
 | ||||
|         with driver.session() as session: | ||||
|             # add dieadiseases | ||||
|             session.run( | ||||
|                 "MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})", | ||||
|                 disease_id=disease_id, | ||||
|                 disease_label=disease_label, | ||||
|             ) | ||||
| 
 | ||||
|             # add symptoms | ||||
|             session.run( | ||||
|                 "MERGE (s:" + signe_symptome_type + " {id:$signe_symptome_id, label:$signe_symptome_label})", | ||||
|                 signe_symptome_id=signe_symptome_id, | ||||
|                 signe_symptome_label=signe_symptome_label, | ||||
|             ) | ||||
| 
 | ||||
|             # add link | ||||
|             session.run( | ||||
|                 "MATCH (d:" + disease_type + " {id:$disease_id})" | ||||
|                 "MATCH (s:" + signe_symptome_type + " {id:$signe_symptome_id})" | ||||
|                 "MERGE (d)-[l:" + link_type + " {id:$link_id, label:$link_label}]->(s)", | ||||
|                 link_id=link_id, | ||||
|                 link_label=link_label, | ||||
|                 disease_id=disease_id, | ||||
|                 signe_symptome_id=signe_symptome_id | ||||
|             ) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| def align_with_wikimedica(): | ||||
|     """ | ||||
|     Align neo4j graph and Wikidata | ||||
|     """ | ||||
|     with open(ALIGNEMENT_FILE_PATH, "r") as align_file: | ||||
|         align = json.loads(align_file.read()) | ||||
|          | ||||
|         for entity in align: | ||||
|             if 'wikidata_id' in entity: | ||||
|                  | ||||
|                 with driver.session() as session: | ||||
|                     wikidata_id = entity['wikidata_id'] | ||||
|                     wikidata_page = wikimedica.get_web_page(entity['wikimedica_uri']) | ||||
| 
 | ||||
|                     session.run( | ||||
|                         "MATCH (d {id:$wikidata_id})" | ||||
|                         "SET d.wikimedia_id = $wikimedica_uri", | ||||
|                         wikidata_id=wikidata_id, | ||||
|                         wikimedica_uri=wikidata_page, | ||||
|                     ) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| # Conection with Neo4j | ||||
| driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW)) | ||||
| 
 | ||||
| create_graph() | ||||
| align_with_wikimedica() | ||||
| 
 | ||||
| # Close Neo4j connection | ||||
| driver.close() | ||||
|  |  | |||
|  | @ -1,18 +1,20 @@ | |||
| from .http import session | ||||
| import rdflib | ||||
| 
 | ||||
| def request(page, request): | ||||
| def request(uri, request): | ||||
|     """ | ||||
|     Effectue une requête SPARQL sur une page de WikiMedica | ||||
|     Effectue une requête SPARQL depuis une uri de WikiMedica | ||||
| 
 | ||||
|     :param page: Page de WikiMedica ciblé | ||||
|     :param uri: uri de WikiMedica ciblé | ||||
|     :param request: Requête SPARQL appliqué | ||||
|     :return: Réponse du point d'accés sous forme d'un tableau de dictionaire | ||||
|     """ | ||||
|     data = session.get(page, stream=True) | ||||
|     data = session.get(uri, stream=True) | ||||
| 
 | ||||
|     g = rdflib.Graph() | ||||
|     g.parse(data.raw) | ||||
|     qres = g.query(request) | ||||
| 
 | ||||
|     return [row.asdict() for row in qres] | ||||
| 
 | ||||
| def get_web_page(uri): | ||||
|     return uri.replace("/Special:ExportRDF", "") | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue