Merge branch 'master' of gitlab.com:matteodelabre/wikimedica-disease-search
This commit is contained in:
commit
e551df60ec
|
@ -1,14 +1,14 @@
|
|||
#!/usr/bin/env python
|
||||
from fetch import wikidata
|
||||
from fetch import wikidata, wikimedica
|
||||
from neo4j import GraphDatabase
|
||||
import json
|
||||
|
||||
|
||||
NEO4J_URI = "bolt://localhost:7687"
|
||||
NEO4J_USR = "neo4j"
|
||||
NEO4J_PSW = "test"
|
||||
|
||||
# Conection with Neo4j
|
||||
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW))
|
||||
ALIGNEMENT_FILE_PATH = "data/alignment_result.json"
|
||||
|
||||
|
||||
|
||||
|
@ -28,62 +28,97 @@ def define_link_from_type(link_id):
|
|||
|
||||
|
||||
|
||||
# Create indexes
|
||||
with driver.session() as session:
|
||||
session.run("CREATE INDEX ON :Disease(id);")
|
||||
session.run("CREATE INDEX ON :Sign_symsymptoms(id);")
|
||||
session.run("CREATE INDEX ON :Risk_factor(id);")
|
||||
|
||||
# Get all diseases, links, symptoms
|
||||
request_disease_links = (wikidata.request("""
|
||||
SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel
|
||||
WHERE {
|
||||
?maladie ?link ?signe_symptome.
|
||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}.
|
||||
VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642}
|
||||
}
|
||||
ORDER BY ?maladie
|
||||
"""
|
||||
))['results']['bindings']
|
||||
|
||||
for link in request_disease_links:
|
||||
disease_id = link["maladie"]["value"].split("/")[-1]
|
||||
disease_label = link["maladieLabel"]["value"].lower()
|
||||
disease_type = "Disease"
|
||||
|
||||
link_id = link["link"]["value"].split("/")[-1]
|
||||
link_label = link["linkLabel"]["value"].lower()
|
||||
link_type = "link_"+define_link_from_type(link_id)
|
||||
|
||||
signe_symptome_id = link["signe_symptome"]["value"].split("/")[-1]
|
||||
signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower()
|
||||
signe_symptome_type = define_link_from_type(link_id)
|
||||
|
||||
def create_graph():
|
||||
"""
|
||||
Build and insert graph from wikidata to neo4j
|
||||
"""
|
||||
# Create indexes
|
||||
with driver.session() as session:
|
||||
# add dieadiseases
|
||||
session.run(
|
||||
"MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})",
|
||||
disease_id=disease_id,
|
||||
disease_label=disease_label,
|
||||
)
|
||||
session.run("CREATE INDEX ON :Disease(id);")
|
||||
session.run("CREATE INDEX ON :Sign_symsymptoms(id);")
|
||||
session.run("CREATE INDEX ON :Risk_factor(id);")
|
||||
|
||||
# add symptoms
|
||||
session.run(
|
||||
"MERGE (s:" + signe_symptome_type + " {id:$signe_symptome_id, label:$signe_symptome_label})",
|
||||
signe_symptome_id=signe_symptome_id,
|
||||
signe_symptome_label=signe_symptome_label,
|
||||
)
|
||||
# Get all diseases, links, symptoms
|
||||
request_disease_links = (wikidata.request("""
|
||||
SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel
|
||||
WHERE {
|
||||
?maladie ?link ?signe_symptome.
|
||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}.
|
||||
VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642}
|
||||
}
|
||||
ORDER BY ?maladie
|
||||
"""
|
||||
))['results']['bindings']
|
||||
|
||||
# add link
|
||||
session.run(
|
||||
"MATCH (d:" + disease_type + " {id:$disease_id})"
|
||||
"MATCH (s:" + signe_symptome_type + " {id:$signe_symptome_id})"
|
||||
"MERGE (d)-[l:" + link_type + " {id:$link_id, label:$link_label}]->(s)",
|
||||
link_id=link_id,
|
||||
link_label=link_label,
|
||||
disease_id=disease_id,
|
||||
signe_symptome_id=signe_symptome_id
|
||||
)
|
||||
for link in request_disease_links:
|
||||
disease_id = link["maladie"]["value"].split("/")[-1]
|
||||
disease_label = link["maladieLabel"]["value"].lower()
|
||||
disease_type = "Disease"
|
||||
|
||||
link_id = link["link"]["value"].split("/")[-1]
|
||||
link_label = link["linkLabel"]["value"].lower()
|
||||
link_type = "link_"+define_link_from_type(link_id)
|
||||
|
||||
signe_symptome_id = link["signe_symptome"]["value"].split("/")[-1]
|
||||
signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower()
|
||||
signe_symptome_type = define_link_from_type(link_id)
|
||||
|
||||
with driver.session() as session:
|
||||
# add dieadiseases
|
||||
session.run(
|
||||
"MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})",
|
||||
disease_id=disease_id,
|
||||
disease_label=disease_label,
|
||||
)
|
||||
|
||||
# add symptoms
|
||||
session.run(
|
||||
"MERGE (s:" + signe_symptome_type + " {id:$signe_symptome_id, label:$signe_symptome_label})",
|
||||
signe_symptome_id=signe_symptome_id,
|
||||
signe_symptome_label=signe_symptome_label,
|
||||
)
|
||||
|
||||
# add link
|
||||
session.run(
|
||||
"MATCH (d:" + disease_type + " {id:$disease_id})"
|
||||
"MATCH (s:" + signe_symptome_type + " {id:$signe_symptome_id})"
|
||||
"MERGE (d)-[l:" + link_type + " {id:$link_id, label:$link_label}]->(s)",
|
||||
link_id=link_id,
|
||||
link_label=link_label,
|
||||
disease_id=disease_id,
|
||||
signe_symptome_id=signe_symptome_id
|
||||
)
|
||||
|
||||
|
||||
|
||||
def align_with_wikimedica():
|
||||
"""
|
||||
Align neo4j graph and Wikidata
|
||||
"""
|
||||
with open(ALIGNEMENT_FILE_PATH, "r") as align_file:
|
||||
align = json.loads(align_file.read())
|
||||
|
||||
for entity in align:
|
||||
if 'wikidata_id' in entity:
|
||||
|
||||
with driver.session() as session:
|
||||
wikidata_id = entity['wikidata_id']
|
||||
wikidata_page = wikimedica.get_web_page(entity['wikimedica_uri'])
|
||||
|
||||
session.run(
|
||||
"MATCH (d {id:$wikidata_id})"
|
||||
"SET d.wikimedia_id = $wikimedica_uri",
|
||||
wikidata_id=wikidata_id,
|
||||
wikimedica_uri=wikidata_page,
|
||||
)
|
||||
|
||||
|
||||
|
||||
# Conection with Neo4j
|
||||
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW))
|
||||
|
||||
create_graph()
|
||||
align_with_wikimedica()
|
||||
|
||||
# Close Neo4j connection
|
||||
driver.close()
|
||||
|
|
|
@ -1,18 +1,20 @@
|
|||
from .http import session
|
||||
import rdflib
|
||||
|
||||
def request(page, request):
|
||||
def request(uri, request):
|
||||
"""
|
||||
Effectue une requête SPARQL sur une page de WikiMedica
|
||||
Effectue une requête SPARQL depuis une uri de WikiMedica
|
||||
|
||||
:param page: Page de WikiMedica ciblé
|
||||
:param uri: uri de WikiMedica ciblé
|
||||
:param request: Requête SPARQL appliqué
|
||||
:return: Réponse du point d'accés sous forme d'un tableau de dictionaire
|
||||
"""
|
||||
data = session.get(page, stream=True)
|
||||
data = session.get(uri, stream=True)
|
||||
|
||||
g = rdflib.Graph()
|
||||
g.parse(data.raw)
|
||||
qres = g.query(request)
|
||||
|
||||
return [row.asdict() for row in qres]
|
||||
|
||||
def get_web_page(uri):
|
||||
return uri.replace("/Special:ExportRDF", "")
|
||||
|
|
Loading…
Reference in New Issue