Intégration non fonctionelle des score de page

This commit is contained in:
Rémi Cérès 2019-11-27 04:16:05 -05:00
parent 67405c9004
commit a3d55bddaf
1 changed files with 44 additions and 4 deletions

View File

@ -1,7 +1,9 @@
#!/usr/bin/env python
from fetch import wikidata, wikimedica
from fetch import wikidata, wikimedica, mediawiki_api, wikipedia_pageviews
from neo4j import GraphDatabase
import json
import collections
import urllib.parse
NEO4J_URI = "bolt://localhost:7687"
@ -28,6 +30,35 @@ def define_link_from_type(link_id):
def get_score_visitor(wikipedia_page_uri):
project = wikipedia_page_uri.split("/")[2]
article = urllib.parse.unquote(wikipedia_page_uri.split("/")[-1])
site = mediawiki_api.instanciate(project)
project_views = wikipedia_pageviews.get_aggregate(project)
canonical = mediawiki_api.article_canonical(site, article)
article = canonical
del canonical
redirects = mediawiki_api.article_redirects(site, article)
total_views = sum(
(wikipedia_pageviews.get_article(project, page)
for page in redirects + [article]),
start=collections.Counter()
)
relative_views = dict((
(date, total_view / project_views[date])
for date, total_view in total_views.items()
))
mean_views = wikipedia_pageviews.mean(relative_views)
smoothed_views = wikipedia_pageviews.smooth(mean_views, 10)
return smoothed_views
def create_graph():
"""
Build and insert graph from wikidata to neo4j
@ -40,13 +71,16 @@ def create_graph():
# Get all diseases, links, symptoms
request_disease_links = (wikidata.request("""
SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel
SELECT ?maladie ?maladieLabel ?link ?linkLabel ?signe_symptome ?signe_symptomeLabel ?wikipediaArticle
WHERE {
?maladie ?link ?signe_symptome.
OPTIONAL {
?wikipediaArticle schema:about ?maladie;
schema:isPartOf <https://fr.wikipedia.org/>.
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr"}.
VALUES ?link {wdt:P780 wdt:p1542 wdt:P5642}
}
ORDER BY ?maladie
"""
))['results']['bindings']
@ -54,6 +88,8 @@ def create_graph():
disease_id = link["maladie"]["value"].split("/")[-1]
disease_label = link["maladieLabel"]["value"].lower()
disease_type = "Disease"
wikipedia_uri = link["wikipediaArticle"]["value"]
weights = list(get_score_visitor(wikipedia_uri))
link_id = link["link"]["value"].split("/")[-1]
link_label = link["linkLabel"]["value"].lower()
@ -63,12 +99,16 @@ def create_graph():
signe_symptome_label = link["signe_symptomeLabel"]["value"].split("/")[-1].lower()
signe_symptome_type = define_link_from_type(link_id)
with driver.session() as session:
# add dieadiseases
session.run(
"MERGE (d:" + disease_type + " {id:$disease_id, label:$disease_label})",
"MERGE (d:" + disease_type +
" {id:$disease_id, label:$disease_label, weights:$weights, wikipedia_uri:$wikipedia_uri})",
disease_id=disease_id,
disease_label=disease_label,
weights=weights,
wikipedia_uri=wikipedia_uri
)
# add symptoms