Merge branch 'master' of gitlab.com:matteodelabre/wikimedica-disease-search

This commit is contained in:
Mattéo Delabre 2019-11-27 03:33:19 -05:00
commit e551df60ec
Signed by: matteo
GPG Key ID: AE3FBD02DC583ABB
2 changed files with 98 additions and 61 deletions

View File

@ -1,14 +1,14 @@
#!/usr/bin/env python
from fetch import wikidata
from fetch import wikidata, wikimedica
from neo4j import GraphDatabase
import json
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USR = "neo4j"
NEO4J_PSW = "test"
# Conection with Neo4j
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW))
ALIGNEMENT_FILE_PATH = "data/alignment_result.json"
@ -28,14 +28,18 @@ def define_link_from_type(link_id):
# Create indexes
with driver.session() as session:
def create_graph():
"""
Build and insert graph from wikidata to neo4j
"""
# Create indexes
with driver.session() as session:
session.run("CREATE INDEX ON :Disease(id);")
session.run("CREATE INDEX ON :Sign_symsymptoms(id);")
session.run("CREATE INDEX ON :Risk_factor(id);")
# Get all diseases, links, symptoms
request_disease_links = (wikidata.request("""
# Get all diseases, links, symptoms
request_disease_links = (wikidata.request("""
SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel
WHERE {
?maladie ?link ?signe_symptome.
@ -43,10 +47,10 @@ request_disease_links = (wikidata.request("""
VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642}
}
ORDER BY ?maladie
"""
))['results']['bindings']
"""
))['results']['bindings']
for link in request_disease_links:
for link in request_disease_links:
disease_id = link["maladie"]["value"].split("/")[-1]
disease_label = link["maladieLabel"]["value"].lower()
disease_type = "Disease"
@ -85,5 +89,36 @@ for link in request_disease_links:
signe_symptome_id=signe_symptome_id
)
def align_with_wikimedica():
"""
Align neo4j graph and Wikidata
"""
with open(ALIGNEMENT_FILE_PATH, "r") as align_file:
align = json.loads(align_file.read())
for entity in align:
if 'wikidata_id' in entity:
with driver.session() as session:
wikidata_id = entity['wikidata_id']
wikidata_page = wikimedica.get_web_page(entity['wikimedica_uri'])
session.run(
"MATCH (d {id:$wikidata_id})"
"SET d.wikimedia_id = $wikimedica_uri",
wikidata_id=wikidata_id,
wikimedica_uri=wikidata_page,
)
# Conection with Neo4j
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW))
create_graph()
align_with_wikimedica()
# Close Neo4j connection
driver.close()

View File

@ -1,18 +1,20 @@
from .http import session
import rdflib
def request(page, request):
def request(uri, request):
"""
Effectue une requête SPARQL sur une page de WikiMedica
Effectue une requête SPARQL depuis une uri de WikiMedica
:param page: Page de WikiMedica ciblé
:param uri: uri de WikiMedica ciblé
:param request: Requête SPARQL appliqué
:return: Réponse du point d'accés sous forme d'un tableau de dictionaire
"""
data = session.get(page, stream=True)
data = session.get(uri, stream=True)
g = rdflib.Graph()
g.parse(data.raw)
qres = g.query(request)
return [row.asdict() for row in qres]
def get_web_page(uri):
return uri.replace("/Special:ExportRDF", "")