Intégration du module fetch
This commit is contained in:
parent
4c094699ee
commit
fec298b472
|
@ -1,3 +1,4 @@
|
||||||
|
from fetch import wikidata, wikimedica
|
||||||
import requests
|
import requests
|
||||||
import rdflib
|
import rdflib
|
||||||
|
|
||||||
|
@ -8,48 +9,16 @@ SIGNES_ET_SYMPTOMES_WIKIMEDICA_PAGE = (
|
||||||
|
|
||||||
PREFIX_PROPERTY = "http://wikimedi.ca/wiki/Special:URIResolver/Property-3A"
|
PREFIX_PROPERTY = "http://wikimedi.ca/wiki/Special:URIResolver/Property-3A"
|
||||||
|
|
||||||
WIKIDATA_SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
|
|
||||||
|
|
||||||
USER_AGENT = "WikimedicaDiseaseSearch/0.1 (https://gitlab.com/matteodelabre/wikimedica-disease-search)"
|
USER_AGENT = "WikimedicaDiseaseSearch/0.1 (https://gitlab.com/matteodelabre/wikimedica-disease-search)"
|
||||||
|
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
session.headers = {'User-Agent': USER_AGENT}
|
session.headers = {'User-Agent': USER_AGENT}
|
||||||
|
|
||||||
def request_wikimedica(page, request):
|
|
||||||
"""
|
|
||||||
Effectue une requête SPARQL sur une page de WikiMedica
|
|
||||||
|
|
||||||
:param page: Page de WikiMedica ciblé
|
|
||||||
:param request: Requête SPARQL appliqué
|
|
||||||
"""
|
|
||||||
data = session.get(page, stream=True)
|
|
||||||
|
|
||||||
g = rdflib.Graph()
|
|
||||||
g.parse(data.raw)
|
|
||||||
qres = g.query(request)
|
|
||||||
|
|
||||||
return [row.asdict() for row in qres]
|
|
||||||
|
|
||||||
|
|
||||||
def request_endpoint(endpoint, request):
|
|
||||||
"""
|
|
||||||
Effectue une requête SPARQL sur point d'accés
|
|
||||||
|
|
||||||
:param page: Adresse du point d'accés
|
|
||||||
:param request: Requête SPARQL appliqué
|
|
||||||
"""
|
|
||||||
res = session.get(endpoint, params={'format': 'json' ,'query': request})
|
|
||||||
|
|
||||||
if res.status_code != 200:
|
|
||||||
raise Exception('Erreur {}'.format(res.status_code))
|
|
||||||
|
|
||||||
return res.json()
|
|
||||||
|
|
||||||
|
|
||||||
nb_wikimedica_entity = 0
|
nb_wikimedica_entity = 0
|
||||||
nb_align_by_property = 0
|
nb_align_by_property = 0
|
||||||
nb_align_by_name = 0
|
nb_align_by_name = 0
|
||||||
nb_align_conflict = 0
|
nb_multi_align_possibility = 0
|
||||||
|
|
||||||
# get name and URI from WikiMedica
|
# get name and URI from WikiMedica
|
||||||
request_name_uri = """
|
request_name_uri = """
|
||||||
|
@ -60,7 +29,7 @@ request_name_uri = """
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
entitys_dic = request_wikimedica(
|
entitys_dic = wikimedica.request(
|
||||||
SIGNES_ET_SYMPTOMES_WIKIMEDICA_PAGE,
|
SIGNES_ET_SYMPTOMES_WIKIMEDICA_PAGE,
|
||||||
request_name_uri
|
request_name_uri
|
||||||
)
|
)
|
||||||
|
@ -69,10 +38,13 @@ nb_wikimedica_entity = len(entitys_dic)
|
||||||
|
|
||||||
# Try align entity with Wikidata
|
# Try align entity with Wikidata
|
||||||
for entity in entitys_dic:
|
for entity in entitys_dic:
|
||||||
print()
|
# Pre-traitment on name
|
||||||
print("="*10 + " " + entity['name'] + " " + "="*10)
|
entity['name'] = (entity['name'].split("(")[0]).strip()
|
||||||
print(entity['uri'])
|
name = entity['name']
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("="*10 + " " + name + " " + "="*10)
|
||||||
|
print(entity['uri'])
|
||||||
|
|
||||||
# use Wikidata_id property
|
# use Wikidata_id property
|
||||||
request_prop_Wikidata_id = """
|
request_prop_Wikidata_id = """
|
||||||
|
@ -83,7 +55,7 @@ for entity in entitys_dic:
|
||||||
}}
|
}}
|
||||||
""".format(PREFIX_PROPERTY)
|
""".format(PREFIX_PROPERTY)
|
||||||
|
|
||||||
prop_Wikidata_id = request_wikimedica(
|
prop_Wikidata_id = wikimedica.request(
|
||||||
entity['uri'],
|
entity['uri'],
|
||||||
request_prop_Wikidata_id
|
request_prop_Wikidata_id
|
||||||
)
|
)
|
||||||
|
@ -95,7 +67,6 @@ for entity in entitys_dic:
|
||||||
elif len(prop_Wikidata_id) > 1:
|
elif len(prop_Wikidata_id) > 1:
|
||||||
print("Attention : il y a plus de 1 lien 'property:wikidata_id'")
|
print("Attention : il y a plus de 1 lien 'property:wikidata_id'")
|
||||||
|
|
||||||
|
|
||||||
# use name of entity
|
# use name of entity
|
||||||
else:
|
else:
|
||||||
request_search_by_name = """
|
request_search_by_name = """
|
||||||
|
@ -106,12 +77,11 @@ for entity in entitys_dic:
|
||||||
VALUES ?entityLabel {{"{}"@fr "{}"@fr}}
|
VALUES ?entityLabel {{"{}"@fr "{}"@fr}}
|
||||||
VALUES ?type {{wd:Q12136 wd:Q1441305 wd:Q169872}}
|
VALUES ?type {{wd:Q12136 wd:Q1441305 wd:Q169872}}
|
||||||
}}
|
}}
|
||||||
""".format((entity['name']).lower(), (entity['name']).capitalize())
|
""".format(name.lower(), name.capitalize())
|
||||||
|
|
||||||
list_wikidata_pages_uri = (request_endpoint(
|
list_wikidata_pages_uri = (
|
||||||
WIKIDATA_SPARQL_ENDPOINT,
|
wikidata.request(request_search_by_name)
|
||||||
request_search_by_name
|
)['results']['bindings']
|
||||||
))['results']['bindings']
|
|
||||||
|
|
||||||
if len(list_wikidata_pages_uri) == 1:
|
if len(list_wikidata_pages_uri) == 1:
|
||||||
nb_align_by_name = nb_align_by_name + 1
|
nb_align_by_name = nb_align_by_name + 1
|
||||||
|
@ -120,12 +90,14 @@ for entity in entitys_dic:
|
||||||
print("align with 'name', 1 response")
|
print("align with 'name', 1 response")
|
||||||
print(wikidata_uri['entity']['value'])
|
print(wikidata_uri['entity']['value'])
|
||||||
elif len(list_wikidata_pages_uri) > 1:
|
elif len(list_wikidata_pages_uri) > 1:
|
||||||
nb_align_conflict = nb_align_conflict + 1
|
nb_multi_align_possibility = nb_multi_align_possibility + 1
|
||||||
print("align with 'name', multi response")
|
print("align with 'name', multi response")
|
||||||
print(list_wikidata_pages_uri)
|
print(list_wikidata_pages_uri)
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print("nb_wikimedica_entity : ", nb_wikimedica_entity)
|
print("nb wikimedica_entity : ", nb_wikimedica_entity)
|
||||||
print("nb_align_by_property : ", nb_align_by_property)
|
print("nb align by property : ", nb_align_by_property)
|
||||||
print("nb_align_by_name : ", nb_align_by_name)
|
print("nb align by name : ", nb_align_by_name)
|
||||||
print("nb_align_conflict : ", nb_align_conflict)
|
print("nb multi align possibility : ", nb_multi_align_possibility)
|
||||||
|
print("nb no align : ", nb_wikimedica_entity -
|
||||||
|
nb_align_by_property - nb_align_by_name)
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,20 @@
|
||||||
|
from .http import session
|
||||||
|
|
||||||
|
WIKIDATA_SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
|
||||||
|
|
||||||
|
def request(request):
|
||||||
|
"""
|
||||||
|
Effectue une requête SPARQL sur le point d'accés de Wikidata
|
||||||
|
|
||||||
|
:param request: Requête SPARQL appliqué
|
||||||
|
:return: Réponse en json du point d'accés
|
||||||
|
"""
|
||||||
|
res = session.get(
|
||||||
|
WIKIDATA_SPARQL_ENDPOINT,
|
||||||
|
params={'format': 'json' ,'query': request}
|
||||||
|
)
|
||||||
|
|
||||||
|
if res.status_code != 200:
|
||||||
|
raise Exception('Erreur {}'.format(res.status_code))
|
||||||
|
|
||||||
|
return res.json()
|
|
@ -0,0 +1,18 @@
|
||||||
|
from .http import session
|
||||||
|
import rdflib
|
||||||
|
|
||||||
|
def request(page, request):
|
||||||
|
"""
|
||||||
|
Effectue une requête SPARQL sur une page de WikiMedica
|
||||||
|
|
||||||
|
:param page: Page de WikiMedica ciblé
|
||||||
|
:param request: Requête SPARQL appliqué
|
||||||
|
:return: Réponse du point d'accés sous forme d'un tableau de dictionaire
|
||||||
|
"""
|
||||||
|
data = session.get(page, stream=True)
|
||||||
|
|
||||||
|
g = rdflib.Graph()
|
||||||
|
g.parse(data.raw)
|
||||||
|
qres = g.query(request)
|
||||||
|
|
||||||
|
return [row.asdict() for row in qres]
|
Loading…
Reference in New Issue