Merge branch 'master' of gitlab.com:matteodelabre/wikimedica-disease-search
This commit is contained in:
commit
57b78d2f54
|
@ -1,7 +1,9 @@
|
|||
#!/usr/bin/env python
|
||||
from fetch import wikidata, wikimedica
|
||||
from fetch import wikidata, wikimedica, mediawiki_api, wikipedia_pageviews
|
||||
from neo4j import GraphDatabase
|
||||
import json
|
||||
import collections
|
||||
import urllib.parse
|
||||
|
||||
|
||||
NEO4J_URI = "bolt://localhost:7687"
|
||||
|
@ -28,6 +30,35 @@ def define_link_from_type(link_id):
|
|||
|
||||
|
||||
|
||||
def get_score_visitor(wikipedia_page_uri):
|
||||
project = wikipedia_page_uri.split("/")[2]
|
||||
article = urllib.parse.unquote(wikipedia_page_uri.split("/")[-1])
|
||||
|
||||
site = mediawiki_api.instanciate(project)
|
||||
project_views = wikipedia_pageviews.get_aggregate(project)
|
||||
canonical = mediawiki_api.article_canonical(site, article)
|
||||
|
||||
article = canonical
|
||||
del canonical
|
||||
|
||||
redirects = mediawiki_api.article_redirects(site, article)
|
||||
total_views = sum(
|
||||
(wikipedia_pageviews.get_article(project, page)
|
||||
for page in redirects + [article]),
|
||||
start=collections.Counter()
|
||||
)
|
||||
|
||||
relative_views = dict((
|
||||
(date, total_view / project_views[date])
|
||||
for date, total_view in total_views.items()
|
||||
))
|
||||
|
||||
mean_views = wikipedia_pageviews.mean(relative_views)
|
||||
smoothed_views = wikipedia_pageviews.smooth(mean_views, 10)
|
||||
return smoothed_views
|
||||
|
||||
|
||||
|
||||
def create_graph():
|
||||
"""
|
||||
Build and insert graph from wikidata to neo4j
|
||||
|
@ -40,13 +71,16 @@ def create_graph():
|
|||
|
||||
# Get all diseases, links, symptoms
|
||||
request_disease_links = (wikidata.request("""
|
||||
SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel
|
||||
SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel ?wikipediaArticle
|
||||
WHERE {
|
||||
?maladie ?link ?signe_symptome.
|
||||
OPTIONAL {
|
||||
?wikipediaArticle schema:about ?maladie;
|
||||
schema:isPartOf <https://fr.wikipedia.org/>.
|
||||
}
|
||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}.
|
||||
VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642}
|
||||
}
|
||||
ORDER BY ?maladie
|
||||
"""
|
||||
))['results']['bindings']
|
||||
|
||||
|
@ -54,6 +88,8 @@ def create_graph():
|
|||
disease_id = link["maladie"]["value"].split("/")[-1]
|
||||
disease_label = link["maladieLabel"]["value"].lower()
|
||||
disease_type = "Disease"
|
||||
wikipedia_uri = link["wikipediaArticle"]["value"]
|
||||
weights = list(get_score_visitor(wikipedia_uri))
|
||||
|
||||
link_id = link["link"]["value"].split("/")[-1]
|
||||
link_label = link["linkLabel"]["value"].lower()
|
||||
|
@ -63,12 +99,16 @@ def create_graph():
|
|||
signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower()
|
||||
signe_symptome_type = define_link_from_type(link_id)
|
||||
|
||||
|
||||
with driver.session() as session:
|
||||
# add dieadiseases
|
||||
session.run(
|
||||
"MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})",
|
||||
"MERGE (d:" + disease_type +
|
||||
" {id:$disease_id, label:$disease_label, weights:$weights, wikipedia_uri:$wikipedia_uri})",
|
||||
disease_id=disease_id,
|
||||
disease_label=disease_label,
|
||||
weights=weights,
|
||||
wikipedia_uri=wikipedia_uri
|
||||
)
|
||||
|
||||
# add symptoms
|
||||
|
|
Loading…
Reference in New Issue