Merge branch 'master' of gitlab.com:matteodelabre/wikimedica-disease-search
This commit is contained in:
commit
57b78d2f54
|
@ -1,7 +1,9 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from fetch import wikidata, wikimedica
|
from fetch import wikidata, wikimedica, mediawiki_api, wikipedia_pageviews
|
||||||
from neo4j import GraphDatabase
|
from neo4j import GraphDatabase
|
||||||
import json
|
import json
|
||||||
|
import collections
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
|
||||||
NEO4J_URI = "bolt://localhost:7687"
|
NEO4J_URI = "bolt://localhost:7687"
|
||||||
|
@ -28,6 +30,35 @@ def define_link_from_type(link_id):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_score_visitor(wikipedia_page_uri):
|
||||||
|
project = wikipedia_page_uri.split("/")[2]
|
||||||
|
article = urllib.parse.unquote(wikipedia_page_uri.split("/")[-1])
|
||||||
|
|
||||||
|
site = mediawiki_api.instanciate(project)
|
||||||
|
project_views = wikipedia_pageviews.get_aggregate(project)
|
||||||
|
canonical = mediawiki_api.article_canonical(site, article)
|
||||||
|
|
||||||
|
article = canonical
|
||||||
|
del canonical
|
||||||
|
|
||||||
|
redirects = mediawiki_api.article_redirects(site, article)
|
||||||
|
total_views = sum(
|
||||||
|
(wikipedia_pageviews.get_article(project, page)
|
||||||
|
for page in redirects + [article]),
|
||||||
|
start=collections.Counter()
|
||||||
|
)
|
||||||
|
|
||||||
|
relative_views = dict((
|
||||||
|
(date, total_view / project_views[date])
|
||||||
|
for date, total_view in total_views.items()
|
||||||
|
))
|
||||||
|
|
||||||
|
mean_views = wikipedia_pageviews.mean(relative_views)
|
||||||
|
smoothed_views = wikipedia_pageviews.smooth(mean_views, 10)
|
||||||
|
return smoothed_views
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def create_graph():
|
def create_graph():
|
||||||
"""
|
"""
|
||||||
Build and insert graph from wikidata to neo4j
|
Build and insert graph from wikidata to neo4j
|
||||||
|
@ -40,13 +71,16 @@ def create_graph():
|
||||||
|
|
||||||
# Get all diseases, links, symptoms
|
# Get all diseases, links, symptoms
|
||||||
request_disease_links = (wikidata.request("""
|
request_disease_links = (wikidata.request("""
|
||||||
SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel
|
SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel ?wikipediaArticle
|
||||||
WHERE {
|
WHERE {
|
||||||
?maladie ?link ?signe_symptome.
|
?maladie ?link ?signe_symptome.
|
||||||
|
OPTIONAL {
|
||||||
|
?wikipediaArticle schema:about ?maladie;
|
||||||
|
schema:isPartOf <https://fr.wikipedia.org/>.
|
||||||
|
}
|
||||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}.
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}.
|
||||||
VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642}
|
VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642}
|
||||||
}
|
}
|
||||||
ORDER BY ?maladie
|
|
||||||
"""
|
"""
|
||||||
))['results']['bindings']
|
))['results']['bindings']
|
||||||
|
|
||||||
|
@ -54,6 +88,8 @@ def create_graph():
|
||||||
disease_id = link["maladie"]["value"].split("/")[-1]
|
disease_id = link["maladie"]["value"].split("/")[-1]
|
||||||
disease_label = link["maladieLabel"]["value"].lower()
|
disease_label = link["maladieLabel"]["value"].lower()
|
||||||
disease_type = "Disease"
|
disease_type = "Disease"
|
||||||
|
wikipedia_uri = link["wikipediaArticle"]["value"]
|
||||||
|
weights = list(get_score_visitor(wikipedia_uri))
|
||||||
|
|
||||||
link_id = link["link"]["value"].split("/")[-1]
|
link_id = link["link"]["value"].split("/")[-1]
|
||||||
link_label = link["linkLabel"]["value"].lower()
|
link_label = link["linkLabel"]["value"].lower()
|
||||||
|
@ -63,12 +99,16 @@ def create_graph():
|
||||||
signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower()
|
signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower()
|
||||||
signe_symptome_type = define_link_from_type(link_id)
|
signe_symptome_type = define_link_from_type(link_id)
|
||||||
|
|
||||||
|
|
||||||
with driver.session() as session:
|
with driver.session() as session:
|
||||||
# add dieadiseases
|
# add dieadiseases
|
||||||
session.run(
|
session.run(
|
||||||
"MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})",
|
"MERGE (d:" + disease_type +
|
||||||
|
" {id:$disease_id, label:$disease_label, weights:$weights, wikipedia_uri:$wikipedia_uri})",
|
||||||
disease_id=disease_id,
|
disease_id=disease_id,
|
||||||
disease_label=disease_label,
|
disease_label=disease_label,
|
||||||
|
weights=weights,
|
||||||
|
wikipedia_uri=wikipedia_uri
|
||||||
)
|
)
|
||||||
|
|
||||||
# add symptoms
|
# add symptoms
|
||||||
|
|
Loading…
Reference in New Issue