2019-11-26 15:36:07 +00:00
|
|
|
|
import logging
|
2019-11-26 05:43:40 +00:00
|
|
|
|
import requests
|
|
|
|
|
import urllib3
|
|
|
|
|
|
|
|
|
|
# Session de requêtage HTTP
|
|
|
|
|
session = requests.Session()
|
|
|
|
|
|
2019-11-26 15:36:07 +00:00
|
|
|
|
# Objet pour afficher le journal d’exécution
|
|
|
|
|
logger = logging.getLogger('fetch.http')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LoggingAdapter(requests.adapters.HTTPAdapter):
|
|
|
|
|
"""
|
|
|
|
|
Adapteur de requêtes vérifiant le nombre d’accès réalisés par seconde.
|
|
|
|
|
"""
|
|
|
|
|
def __init__(self):
|
|
|
|
|
# Réessaie jusqu’à dix fois la récupération de chaque page, en
|
|
|
|
|
# augmentant progressivement le délai entre chaque essai
|
|
|
|
|
retry = urllib3.util.Retry(total=10, backoff_factor=0.3)
|
|
|
|
|
super(LoggingAdapter, self).__init__(max_retries=retry)
|
|
|
|
|
|
|
|
|
|
def send(
|
|
|
|
|
self, request, stream=False, timeout=None, verify=True,
|
|
|
|
|
cert=None, proxies=None
|
|
|
|
|
):
|
|
|
|
|
logger.debug('{} {}'.format(request.method, request.url))
|
|
|
|
|
return super(LoggingAdapter, self).send(
|
|
|
|
|
request, stream=stream, timeout=timeout,
|
|
|
|
|
verify=verify, cert=cert, proxies=proxies
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
adapter = LoggingAdapter()
|
|
|
|
|
session.mount('http://', adapter)
|
|
|
|
|
session.mount('https://', adapter)
|
2019-11-26 05:43:40 +00:00
|
|
|
|
|
|
|
|
|
# Identification du robot
|
|
|
|
|
session.headers['User-Agent'] = (
|
|
|
|
|
'WikimedicaDiseaseSearch/0.1 '
|
|
|
|
|
'(https://gitlab.com/matteodelabre/wikimedica-disease-search)'
|
|
|
|
|
)
|