Merge branch 'master' of gitlab.com:matteodelabre/wikimedica-disease-search
This commit is contained in:
		
						commit
						57b78d2f54
					
				|  | @ -1,7 +1,9 @@ | |||
| #!/usr/bin/env python | ||||
| from fetch import wikidata, wikimedica | ||||
| from fetch import wikidata, wikimedica, mediawiki_api, wikipedia_pageviews | ||||
| from neo4j import GraphDatabase | ||||
| import json | ||||
| import collections | ||||
| import urllib.parse | ||||
| 
 | ||||
| 
 | ||||
| NEO4J_URI = "bolt://localhost:7687" | ||||
|  | @ -28,6 +30,35 @@ def define_link_from_type(link_id): | |||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| def get_score_visitor(wikipedia_page_uri): | ||||
|     project = wikipedia_page_uri.split("/")[2] | ||||
|     article = urllib.parse.unquote(wikipedia_page_uri.split("/")[-1]) | ||||
| 
 | ||||
|     site = mediawiki_api.instanciate(project) | ||||
|     project_views = wikipedia_pageviews.get_aggregate(project) | ||||
|     canonical = mediawiki_api.article_canonical(site, article) | ||||
| 
 | ||||
|     article = canonical | ||||
|     del canonical | ||||
| 
 | ||||
|     redirects = mediawiki_api.article_redirects(site, article) | ||||
|     total_views = sum( | ||||
|         (wikipedia_pageviews.get_article(project, page) | ||||
|             for page in redirects + [article]), | ||||
|         start=collections.Counter() | ||||
|     ) | ||||
| 
 | ||||
|     relative_views = dict(( | ||||
|         (date, total_view / project_views[date]) | ||||
|         for date, total_view in total_views.items() | ||||
|     )) | ||||
| 
 | ||||
|     mean_views = wikipedia_pageviews.mean(relative_views) | ||||
|     smoothed_views = wikipedia_pageviews.smooth(mean_views, 10) | ||||
|     return smoothed_views | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| def create_graph(): | ||||
|     """ | ||||
|     Build and insert graph from wikidata to neo4j | ||||
|  | @ -40,13 +71,16 @@ def create_graph(): | |||
| 
 | ||||
|     # Get all diseases, links, symptoms | ||||
|     request_disease_links = (wikidata.request(""" | ||||
|         SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel | ||||
|         SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel ?wikipediaArticle | ||||
|         WHERE { | ||||
|         ?maladie ?link ?signe_symptome. | ||||
|         OPTIONAL { | ||||
|             ?wikipediaArticle schema:about ?maladie; | ||||
|             schema:isPartOf <https://fr.wikipedia.org/>. | ||||
|         } | ||||
|         SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}. | ||||
|         VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642} | ||||
|         } | ||||
|         ORDER BY ?maladie | ||||
|     """ | ||||
|     ))['results']['bindings'] | ||||
| 
 | ||||
|  | @ -54,6 +88,8 @@ def create_graph(): | |||
|         disease_id = link["maladie"]["value"].split("/")[-1] | ||||
|         disease_label = link["maladieLabel"]["value"].lower() | ||||
|         disease_type = "Disease" | ||||
|         wikipedia_uri = link["wikipediaArticle"]["value"] | ||||
|         weights = list(get_score_visitor(wikipedia_uri)) | ||||
| 
 | ||||
|         link_id = link["link"]["value"].split("/")[-1] | ||||
|         link_label = link["linkLabel"]["value"].lower() | ||||
|  | @ -63,12 +99,16 @@ def create_graph(): | |||
|         signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower() | ||||
|         signe_symptome_type = define_link_from_type(link_id) | ||||
| 
 | ||||
| 
 | ||||
|         with driver.session() as session: | ||||
|             # add dieadiseases | ||||
|             session.run( | ||||
|                 "MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})", | ||||
|                 "MERGE (d:" + disease_type + | ||||
|                 " {id:$disease_id, label:$disease_label, weights:$weights, wikipedia_uri:$wikipedia_uri})", | ||||
|                 disease_id=disease_id, | ||||
|                 disease_label=disease_label, | ||||
|                 weights=weights, | ||||
|                 wikipedia_uri=wikipedia_uri | ||||
|             ) | ||||
| 
 | ||||
|             # add symptoms | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue