Merge branch 'master' of gitlab.com:matteodelabre/wikimedica-disease-search

This commit is contained in:
Mattéo Delabre 2019-12-01 22:39:38 -05:00
commit 57b78d2f54
Signed by: matteo
GPG Key ID: AE3FBD02DC583ABB
1 changed files with 44 additions and 4 deletions

View File

@ -1,7 +1,9 @@
#!/usr/bin/env python #!/usr/bin/env python
from fetch import wikidata, wikimedica from fetch import wikidata, wikimedica, mediawiki_api, wikipedia_pageviews
from neo4j import GraphDatabase from neo4j import GraphDatabase
import json import json
import collections
import urllib.parse
NEO4J_URI = "bolt://localhost:7687" NEO4J_URI = "bolt://localhost:7687"
@ -28,6 +30,35 @@ def define_link_from_type(link_id):
def get_score_visitor(wikipedia_page_uri):
project = wikipedia_page_uri.split("/")[2]
article = urllib.parse.unquote(wikipedia_page_uri.split("/")[-1])
site = mediawiki_api.instanciate(project)
project_views = wikipedia_pageviews.get_aggregate(project)
canonical = mediawiki_api.article_canonical(site, article)
article = canonical
del canonical
redirects = mediawiki_api.article_redirects(site, article)
total_views = sum(
(wikipedia_pageviews.get_article(project, page)
for page in redirects + [article]),
start=collections.Counter()
)
relative_views = dict((
(date, total_view / project_views[date])
for date, total_view in total_views.items()
))
mean_views = wikipedia_pageviews.mean(relative_views)
smoothed_views = wikipedia_pageviews.smooth(mean_views, 10)
return smoothed_views
def create_graph(): def create_graph():
""" """
Build and insert graph from wikidata to neo4j Build and insert graph from wikidata to neo4j
@ -40,13 +71,16 @@ def create_graph():
# Get all diseases, links, symptoms # Get all diseases, links, symptoms
request_disease_links = (wikidata.request(""" request_disease_links = (wikidata.request("""
SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel ?wikipediaArticle
WHERE { WHERE {
?maladie ?link ?signe_symptome. ?maladie ?link ?signe_symptome.
OPTIONAL {
?wikipediaArticle schema:about ?maladie;
schema:isPartOf <https://fr.wikipedia.org/>.
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}. SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}.
VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642} VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642}
} }
ORDER BY ?maladie
""" """
))['results']['bindings'] ))['results']['bindings']
@ -54,6 +88,8 @@ def create_graph():
disease_id = link["maladie"]["value"].split("/")[-1] disease_id = link["maladie"]["value"].split("/")[-1]
disease_label = link["maladieLabel"]["value"].lower() disease_label = link["maladieLabel"]["value"].lower()
disease_type = "Disease" disease_type = "Disease"
wikipedia_uri = link["wikipediaArticle"]["value"]
weights = list(get_score_visitor(wikipedia_uri))
link_id = link["link"]["value"].split("/")[-1] link_id = link["link"]["value"].split("/")[-1]
link_label = link["linkLabel"]["value"].lower() link_label = link["linkLabel"]["value"].lower()
@ -63,12 +99,16 @@ def create_graph():
signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower() signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower()
signe_symptome_type = define_link_from_type(link_id) signe_symptome_type = define_link_from_type(link_id)
with driver.session() as session: with driver.session() as session:
# add dieadiseases # add dieadiseases
session.run( session.run(
"MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})", "MERGE (d:" + disease_type +
" {id:$disease_id, label:$disease_label, weights:$weights, wikipedia_uri:$wikipedia_uri})",
disease_id=disease_id, disease_id=disease_id,
disease_label=disease_label, disease_label=disease_label,
weights=weights,
wikipedia_uri=wikipedia_uri
) )
# add symptoms # add symptoms