pageviews: Relatives au nombre de vues total/jour
This commit is contained in:
		
							parent
							
								
									9f3459fd24
								
							
						
					
					
						commit
						0faad66aa2
					
				|  | @ -5,20 +5,27 @@ import numpy | |||
| from scipy import stats | ||||
| 
 | ||||
| # Chemin racine pour les API Wikimedia | ||||
| wikimedia_base_path = 'https://wikimedia.org/api/rest_v1' | ||||
| pageviews_root = 'https://wikimedia.org/api/rest_v1' | ||||
| 
 | ||||
| # Patron d’accès à l’API pageviews de Wikimedia | ||||
| wikimedia_pageviews_path = '/' + '/'.join([ | ||||
| # Patron d’accès à l’API pageviews pour un article | ||||
| pageviews_article_endpoint = '/' + '/'.join([ | ||||
|     'metrics', 'pageviews', 'per-article', '{project}', | ||||
|     '{access}', '{agent}', '{article}', '{granularity}', | ||||
|     '{start}', '{end}' | ||||
| ]) | ||||
| 
 | ||||
| # Chemin d’accès à l’API pageviews pour un projet complet | ||||
| pageviews_project_endpoint = '/' + '/'.join([ | ||||
|     'metrics', 'pageviews', 'aggregate', '{project}', | ||||
|     '{access}', '{agent}', '{granularity}', | ||||
|     '{start}', '{end}' | ||||
| ]) | ||||
| 
 | ||||
| # Format de dates utilisée pour l’API Wikimedia | ||||
| wikimedia_date_format = '%Y%m%d' | ||||
| pageviews_date_format = '%Y%m%d' | ||||
| 
 | ||||
| # Date de première disponibilité des pageviews sur l’API Wikimedia | ||||
| wikimedia_pageviews_start = datetime(2015, 7, 1) | ||||
| pageviews_first_data = datetime(2015, 7, 1) | ||||
| 
 | ||||
| # Tableau contenant tous les jours de l’année de 1 à 365 | ||||
| year_all_days = numpy.arange(1, 366) | ||||
|  | @ -68,23 +75,56 @@ def smooth(views, scale): | |||
|     return pdf_matrix.dot(views) | ||||
| 
 | ||||
| 
 | ||||
| def get(project, article): | ||||
| def get_aggregate(project): | ||||
|     """ | ||||
|     Obtient le nombre de visites sur une page Wikipédia par jour. | ||||
|     Obtient le nombre de visites sur Wikipédia par jour. | ||||
| 
 | ||||
|     :param project: Projet Wikipédia ciblé. | ||||
|     :return: Compteur associant chaque jour à son nombre de visites. | ||||
|     """ | ||||
|     res = session.get(pageviews_root + pageviews_project_endpoint.format( | ||||
|         project=project, | ||||
|         access='all-access', | ||||
|         agent='user', | ||||
|         granularity='daily', | ||||
|         start=pageviews_first_data.strftime(pageviews_date_format), | ||||
|         end=datetime.today().strftime(pageviews_date_format) | ||||
|     )) | ||||
| 
 | ||||
|     data = res.json() | ||||
| 
 | ||||
|     # Vérifie que la réponse reçue indique un succès | ||||
|     if res.status_code != 200: | ||||
|         if 'detail' in data: | ||||
|             detail = data['detail'] | ||||
|             message = ', '.join(detail) if type(detail) == list else detail | ||||
|             raise Exception(message) | ||||
|         else: | ||||
|             raise Exception('Erreur {}'.format(res.status_code)) | ||||
| 
 | ||||
|     # Construit le dictionnaire résultant | ||||
|     return collections.Counter(dict( | ||||
|         (record['timestamp'][:8], record['views']) | ||||
|         for record in data['items'] | ||||
|     )) | ||||
| 
 | ||||
| 
 | ||||
| def get_article(project, article): | ||||
|     """ | ||||
|     Obtient le nombre de visites sur un article Wikipédia par jour. | ||||
| 
 | ||||
|     :param project: Projet Wikipédia ciblé. | ||||
|     :param article: Article ciblé dans le site. | ||||
|     :return: Compteur associant chaque jour à son nombre de visites. | ||||
|     """ | ||||
|     # Soumet une requête à l’API REST pour obtenir les vues de l’article | ||||
|     res = session.get(wikimedia_base_path + wikimedia_pageviews_path.format( | ||||
|     res = session.get(pageviews_root + pageviews_article_endpoint.format( | ||||
|         project=project, | ||||
|         article=article, | ||||
|         access='all-access', | ||||
|         agent='user', | ||||
|         granularity='daily', | ||||
|         start=wikimedia_pageviews_start.strftime(wikimedia_date_format), | ||||
|         end=datetime.today().strftime(wikimedia_date_format) | ||||
|         start=pageviews_first_data.strftime(pageviews_date_format), | ||||
|         end=datetime.today().strftime(pageviews_date_format) | ||||
|     )) | ||||
| 
 | ||||
|     data = res.json() | ||||
|  | @ -123,7 +163,7 @@ def mean(views): | |||
|         accumulator[datemonth] = [] | ||||
| 
 | ||||
|     for date_str, views in views.items(): | ||||
|         date = datetime.strptime(date_str, wikimedia_date_format) | ||||
|         date = datetime.strptime(date_str, pageviews_date_format) | ||||
| 
 | ||||
|         if not (date.month == 2 and date.day == 29): | ||||
|             datemonth = date.strftime(datemonth_format) | ||||
|  |  | |||
|  | @ -35,6 +35,8 @@ output = sys.argv[2] | |||
| articles = sys.argv[3:] | ||||
| 
 | ||||
| site = mediawiki_api.instanciate(project) | ||||
| project_views = wikipedia_pageviews.get_aggregate(project) | ||||
| 
 | ||||
| output_to_file = output != '-' | ||||
| 
 | ||||
| if output_to_file: | ||||
|  | @ -49,14 +51,22 @@ if output_to_file: | |||
| fig = pyplot.figure(figsize=(4.7, 3.3)) | ||||
| ax = fig.add_subplot(111) | ||||
| 
 | ||||
| # Configuration de l’abscisse pour afficher les jours de l’année | ||||
| ax.set_xlabel('Jours de l’année') | ||||
| ax.set_xticks([ | ||||
|     datetime(1, month, 1).toordinal() | ||||
|     for month in range(1, 13) | ||||
| ]) | ||||
| 
 | ||||
| ax.set_xticklabels(calendar.month_abbr[1:13]) | ||||
| 
 | ||||
| # Configuration de l’ordonnée pour être affichée en pourcentage | ||||
| ax.set_ylabel('Proportion de vues par jour') | ||||
| ax.yaxis.set_major_formatter( | ||||
|     matplotlib.ticker.FuncFormatter( | ||||
|         lambda y, _: '{:.5} ‰'.format(y * 1000).replace('.', ',') | ||||
|     ) | ||||
| ) | ||||
| 
 | ||||
| for article in articles: | ||||
|     canonical = mediawiki_api.article_canonical(site, article) | ||||
| 
 | ||||
|  | @ -66,21 +76,24 @@ for article in articles: | |||
|             .format(article, canonical) | ||||
|         ) | ||||
| 
 | ||||
|     del article | ||||
|     article = canonical | ||||
|     del canonical | ||||
| 
 | ||||
|     redirects = mediawiki_api.article_redirects(site, canonical) | ||||
|     mean_views = wikipedia_pageviews.mean(sum( | ||||
|         (wikipedia_pageviews.get(project, page) | ||||
|             for page in redirects + [canonical]), | ||||
|     redirects = mediawiki_api.article_redirects(site, article) | ||||
|     total_views = sum( | ||||
|         (wikipedia_pageviews.get_article(project, page) | ||||
|             for page in redirects + [article]), | ||||
|         start=collections.Counter() | ||||
|     )) | ||||
| 
 | ||||
|     ax.plot( | ||||
|         wikipedia_pageviews.smooth(mean_views, 10), | ||||
|         label=canonical | ||||
|     ) | ||||
| 
 | ||||
| ax.set_ylabel('Vues par jour') | ||||
|     relative_views = dict(( | ||||
|         (date, total_view / project_views[date]) | ||||
|         for date, total_view in total_views.items() | ||||
|     )) | ||||
| 
 | ||||
|     mean_views = wikipedia_pageviews.mean(relative_views) | ||||
|     smoothed_views = wikipedia_pageviews.smooth(mean_views, 10) | ||||
|     ax.plot(smoothed_views, label=article) | ||||
| 
 | ||||
| fig.legend(framealpha=1) | ||||
| fig.autofmt_xdate() | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue