youtube: Filter out YouTube Shorts from feeds

This commit is contained in:
Mattéo Delabre 2023-05-13 00:41:56 -04:00
parent f22a758e2e
commit 23c2b4b664
Signed by: matteo
GPG Key ID: AE3FBD02DC583ABB
5 changed files with 68 additions and 30 deletions

View File

@ -1,5 +1,5 @@
from ..feedformatter import Feed from ..feedformatter import Feed
from ..util import parse_iso from ..util import parse_iso_date
from .twitch import APIClient from .twitch import APIClient
@ -34,7 +34,7 @@ def construct_rss(client: APIClient, login: str) -> str:
item["title"] = "[⏺️ Live] " + stream.get("title", "Untitled Stream") item["title"] = "[⏺️ Live] " + stream.get("title", "Untitled Stream")
item["link"] = user_url item["link"] = user_url
item["description"] = stream.get("game_name", "") item["description"] = stream.get("game_name", "")
item["pubDate"] = parse_iso(stream["started_at"]).timetuple() item["pubDate"] = parse_iso_date(stream["started_at"]).timetuple()
feed.items.append(item) feed.items.append(item)
@ -65,7 +65,7 @@ def construct_rss(client: APIClient, login: str) -> str:
item["link"] = link item["link"] = link
item["description"] = f'<a href="{link}"><img src="{thumbnail}" /></a>' item["description"] = f'<a href="{link}"><img src="{thumbnail}" /></a>'
item["pubDate"] = parse_iso(video["published_at"]).timetuple() item["pubDate"] = parse_iso_date(video["published_at"]).timetuple()
feed.items.append(item) feed.items.append(item)

View File

@ -1,4 +1,5 @@
from datetime import datetime from datetime import datetime, timedelta
import isodate
import http import http
import urllib import urllib
import urllib.request import urllib.request
@ -8,10 +9,14 @@ HTTPResponse = http.client.HTTPResponse
HTTPException = http.client.HTTPException HTTPException = http.client.HTTPException
def parse_iso(iso: str) -> datetime: def parse_iso_date(iso: str) -> datetime:
return datetime.fromisoformat(iso.replace("Z", "+00:00")) return datetime.fromisoformat(iso.replace("Z", "+00:00"))
def parse_iso_duration(iso: str) -> timedelta:
return isodate.parse_duration(iso)
def send_with_retry(request: HTTPRequest, retries: int) -> HTTPResponse: def send_with_retry(request: HTTPRequest, retries: int) -> HTTPResponse:
""" """
Send an HTTP request and retry in case of failure. Send an HTTP request and retry in case of failure.

View File

@ -1,8 +1,12 @@
from datetime import timedelta
from ..feedformatter import Feed from ..feedformatter import Feed
from ..util import parse_iso
from .youtube import APIClient from .youtube import APIClient
# Minimum duration for videos to be listed in the feed
MINIMUM_DURATION = timedelta(seconds=70)
def construct_rss(client: APIClient, channel_id: str) -> str: def construct_rss(client: APIClient, channel_id: str) -> str:
""" """
Build a RSS stream for a YouTube channel. Build a RSS stream for a YouTube channel.
@ -27,25 +31,16 @@ def construct_rss(client: APIClient, channel_id: str) -> str:
feed.feed["ttl"] = "30" feed.feed["ttl"] = "30"
for video in videos: for video in videos:
item = {} if video["duration"] >= MINIMUM_DURATION:
feed.items.append({
video_id = video["resourceId"]["videoId"] "guid": video["id"],
link = f"https://www.youtube.com/watch?v={video_id}" "title": video["title"],
thumbnail = "" "link": video["url"],
"description": (
for size in ("standard", "maxres", *video["thumbnails"].keys()): f'<a href="{video["url"]}"><img src="{video["thumbnail"]}" /></a><br><br>'
if size in video["thumbnails"]:
thumbnail = video["thumbnails"][size]["url"]
item["guid"] = video["resourceId"]["videoId"]
item["title"] = video.get("title", "Untitled Video")
item["link"] = link
item["description"] = (
f'<a href="{link}"><img src="{thumbnail}" /></a><br><br>'
+ video["description"] + video["description"]
) ),
item["pubDate"] = parse_iso(video["publishedAt"]).timetuple() "pubDate": video["published"].timetuple(),
})
feed.items.append(item)
return feed.format_rss2_string() return feed.format_rss2_string()

View File

@ -6,7 +6,7 @@ import urllib
import urllib.request import urllib.request
from typing import Any, Iterable, Tuple from typing import Any, Iterable, Tuple
from cachetools import cached, TTLCache from cachetools import cached, TTLCache
from ..util import send_with_retry from ..util import send_with_retry, parse_iso_date, parse_iso_duration
HTTPError = urllib.error.HTTPError HTTPError = urllib.error.HTTPError
@ -123,13 +123,15 @@ class APIClient:
:returns: list of latest videos :returns: list of latest videos
:throws HTTPException: if the query fails :throws HTTPException: if the query fails
""" """
# Query list of latest videos
try: try:
response = self._query( playlist_response = self._query(
url="https://youtube.googleapis.com/youtube/v3/playlistItems", url="https://youtube.googleapis.com/youtube/v3/playlistItems",
method="GET", method="GET",
data=( data=(
("part", "snippet"), ("part", "snippet"),
("part", "status"), ("part", "status"),
("part", "contentDetails"),
("playlistId", playlist_id), ("playlistId", playlist_id),
("maxResults", 50), ("maxResults", 50),
) )
@ -140,9 +142,43 @@ class APIClient:
raise err raise err
return [ # Filter only public videos
videos = [
item["snippet"] item["snippet"]
for item in response["items"] for item in playlist_response["items"]
if item["status"]["privacyStatus"] == "public" if item["status"]["privacyStatus"] == "public"
and item["snippet"]["resourceId"]["kind"] == "youtube#video" and item["snippet"]["resourceId"]["kind"] == "youtube#video"
] ]
# Retrieve video durations
videos_response = self._query(
url="https://youtube.googleapis.com/youtube/v3/videos",
method="GET",
data=(
*[("id", video["resourceId"]["videoId"]) for video in videos],
("part", "contentDetails"),
),
)
# Merge and normalize data
results = []
for video_item, detail_item in zip(videos, videos_response["items"]):
video_id = video_item["resourceId"]["videoId"]
thumbnail = ""
for size in ("standard", "maxres", *video_item["thumbnails"].keys()):
if size in video_item["thumbnails"]:
thumbnail = video_item["thumbnails"][size]["url"]
results.append({
"id": video_id,
"title": video_item.get("title", "Untitled Video"),
"description": video_item["description"],
"url": f"https://www.youtube.com/watch?v={video_id}",
"thumbnail": thumbnail,
"published": parse_iso_date(video_item["publishedAt"]),
"duration": parse_iso_duration(detail_item["contentDetails"]["duration"]),
})
return results

View File

@ -2,7 +2,9 @@ cachetools==4.2.2
click==8.0.1 click==8.0.1
Flask==2.0.1 Flask==2.0.1
gunicorn==20.1.0 gunicorn==20.1.0
isodate==0.6.1
itsdangerous==2.0.1 itsdangerous==2.0.1
Jinja2==3.0.1 Jinja2==3.0.1
MarkupSafe==2.0.1 MarkupSafe==2.0.1
six==1.16.0
Werkzeug==2.0.1 Werkzeug==2.0.1