#!/usr/bin/env python from fetch import wikidata, wikimedica, mediawiki_api, wikipedia_pageviews from neo4j import GraphDatabase import json import collections import urllib.parse NEO4J_URI = "bolt://localhost:7687" NEO4J_USR = "neo4j" NEO4J_PSW = "test" ALIGNEMENT_FILE_PATH = "data/alignment_result.json" def define_link_from_type(link_id): """ Define typy of link from ID :param link_id: id of link :return: type corresponding """ if link_id in ["P780","P1542"]: return "Sign_symsymptoms" elif link_id == "P5642": return "Risk_factor" else: raise Exception("Error : unknow link id: " + link_id) def get_score_visitor(wikipedia_page_uri): project = wikipedia_page_uri.split("/")[2] article = urllib.parse.unquote(wikipedia_page_uri.split("/")[-1]) site = mediawiki_api.instanciate(project) project_views = wikipedia_pageviews.get_aggregate(project) canonical = mediawiki_api.article_canonical(site, article) article = canonical del canonical redirects = mediawiki_api.article_redirects(site, article) total_views = sum( (wikipedia_pageviews.get_article(project, page) for page in redirects + [article]), start=collections.Counter() ) relative_views = dict(( (date, total_view / project_views[date]) for date, total_view in total_views.items() )) mean_views = wikipedia_pageviews.mean(relative_views) smoothed_views = wikipedia_pageviews.smooth(mean_views, 10) return smoothed_views def create_graph(): """ Build and insert graph from wikidata to neo4j """ # Create indexes with driver.session() as session: session.run("CREATE INDEX ON :Disease(id);") session.run("CREATE INDEX ON :Sign_symsymptoms(id);") session.run("CREATE INDEX ON :Risk_factor(id);") # Get all diseases, links, symptoms request_disease_links = (wikidata.request(""" SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel ?wikipediaArticle WHERE { ?maladie ?link ?signe_symptome. OPTIONAL { ?wikipediaArticle schema:about ?maladie; schema:isPartOf . } SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}. VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642} } """ ))['results']['bindings'] for link in request_disease_links: disease_id = link["maladie"]["value"].split("/")[-1] disease_label = link["maladieLabel"]["value"].lower() disease_type = "Disease" wikipedia_uri = link["wikipediaArticle"]["value"] weights = list(get_score_visitor(wikipedia_uri)) link_id = link["link"]["value"].split("/")[-1] link_label = link["linkLabel"]["value"].lower() link_type = "link_"+define_link_from_type(link_id) signe_symptome_id = link["signe_symptome"]["value"].split("/")[-1] signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower() signe_symptome_type = define_link_from_type(link_id) with driver.session() as session: # add dieadiseases session.run( "MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label, weights:$weights, wikipedia_uri:$wikipedia_uri})", disease_id=disease_id, disease_label=disease_label, weights=weights, wikipedia_uri=wikipedia_uri ) # add symptoms session.run( "MERGE (s:" + signe_symptome_type + " {id:$signe_symptome_id, label:$signe_symptome_label})", signe_symptome_id=signe_symptome_id, signe_symptome_label=signe_symptome_label, ) # add link session.run( "MATCH (d:" + disease_type + " {id:$disease_id})" "MATCH (s:" + signe_symptome_type + " {id:$signe_symptome_id})" "MERGE (d)-[l:" + link_type + " {id:$link_id, label:$link_label}]->(s)", link_id=link_id, link_label=link_label, disease_id=disease_id, signe_symptome_id=signe_symptome_id ) def align_with_wikimedica(): """ Align neo4j graph and Wikidata """ with open(ALIGNEMENT_FILE_PATH, "r") as align_file: align = json.loads(align_file.read()) for entity in align: if 'wikidata_id' in entity: with driver.session() as session: wikidata_id = entity['wikidata_id'] wikidata_page = wikimedica.get_web_page(entity['wikimedica_uri']) session.run( "MATCH (d {id:$wikidata_id})" "SET d.wikimedia_id = $wikimedica_uri", wikidata_id=wikidata_id, wikimedica_uri=wikidata_page, ) # Conection with Neo4j driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USR, NEO4J_PSW)) create_graph() align_with_wikimedica() # Close Neo4j connection driver.close()