youtube: Filter out YouTube Shorts from feeds

This commit is contained in:
Mattéo Delabre 2023-05-13 00:41:56 -04:00
parent f22a758e2e
commit 23c2b4b664
Signed by: matteo
GPG Key ID: AE3FBD02DC583ABB
5 changed files with 68 additions and 30 deletions

View File

@ -1,5 +1,5 @@
from ..feedformatter import Feed
from ..util import parse_iso
from ..util import parse_iso_date
from .twitch import APIClient
@ -34,7 +34,7 @@ def construct_rss(client: APIClient, login: str) -> str:
item["title"] = "[⏺️ Live] " + stream.get("title", "Untitled Stream")
item["link"] = user_url
item["description"] = stream.get("game_name", "")
item["pubDate"] = parse_iso(stream["started_at"]).timetuple()
item["pubDate"] = parse_iso_date(stream["started_at"]).timetuple()
feed.items.append(item)
@ -65,7 +65,7 @@ def construct_rss(client: APIClient, login: str) -> str:
item["link"] = link
item["description"] = f'<a href="{link}"><img src="{thumbnail}" /></a>'
item["pubDate"] = parse_iso(video["published_at"]).timetuple()
item["pubDate"] = parse_iso_date(video["published_at"]).timetuple()
feed.items.append(item)

View File

@ -1,4 +1,5 @@
from datetime import datetime
from datetime import datetime, timedelta
import isodate
import http
import urllib
import urllib.request
@ -8,10 +9,14 @@ HTTPResponse = http.client.HTTPResponse
HTTPException = http.client.HTTPException
def parse_iso(iso: str) -> datetime:
def parse_iso_date(iso: str) -> datetime:
return datetime.fromisoformat(iso.replace("Z", "+00:00"))
def parse_iso_duration(iso: str) -> timedelta:
return isodate.parse_duration(iso)
def send_with_retry(request: HTTPRequest, retries: int) -> HTTPResponse:
"""
Send an HTTP request and retry in case of failure.

View File

@ -1,8 +1,12 @@
from datetime import timedelta
from ..feedformatter import Feed
from ..util import parse_iso
from .youtube import APIClient
# Minimum duration for videos to be listed in the feed
MINIMUM_DURATION = timedelta(seconds=70)
def construct_rss(client: APIClient, channel_id: str) -> str:
"""
Build a RSS stream for a YouTube channel.
@ -27,25 +31,16 @@ def construct_rss(client: APIClient, channel_id: str) -> str:
feed.feed["ttl"] = "30"
for video in videos:
item = {}
video_id = video["resourceId"]["videoId"]
link = f"https://www.youtube.com/watch?v={video_id}"
thumbnail = ""
for size in ("standard", "maxres", *video["thumbnails"].keys()):
if size in video["thumbnails"]:
thumbnail = video["thumbnails"][size]["url"]
item["guid"] = video["resourceId"]["videoId"]
item["title"] = video.get("title", "Untitled Video")
item["link"] = link
item["description"] = (
f'<a href="{link}"><img src="{thumbnail}" /></a><br><br>'
if video["duration"] >= MINIMUM_DURATION:
feed.items.append({
"guid": video["id"],
"title": video["title"],
"link": video["url"],
"description": (
f'<a href="{video["url"]}"><img src="{video["thumbnail"]}" /></a><br><br>'
+ video["description"]
)
item["pubDate"] = parse_iso(video["publishedAt"]).timetuple()
feed.items.append(item)
),
"pubDate": video["published"].timetuple(),
})
return feed.format_rss2_string()

View File

@ -6,7 +6,7 @@ import urllib
import urllib.request
from typing import Any, Iterable, Tuple
from cachetools import cached, TTLCache
from ..util import send_with_retry
from ..util import send_with_retry, parse_iso_date, parse_iso_duration
HTTPError = urllib.error.HTTPError
@ -123,13 +123,15 @@ class APIClient:
:returns: list of latest videos
:throws HTTPException: if the query fails
"""
# Query list of latest videos
try:
response = self._query(
playlist_response = self._query(
url="https://youtube.googleapis.com/youtube/v3/playlistItems",
method="GET",
data=(
("part", "snippet"),
("part", "status"),
("part", "contentDetails"),
("playlistId", playlist_id),
("maxResults", 50),
)
@ -140,9 +142,43 @@ class APIClient:
raise err
return [
# Filter only public videos
videos = [
item["snippet"]
for item in response["items"]
for item in playlist_response["items"]
if item["status"]["privacyStatus"] == "public"
and item["snippet"]["resourceId"]["kind"] == "youtube#video"
]
# Retrieve video durations
videos_response = self._query(
url="https://youtube.googleapis.com/youtube/v3/videos",
method="GET",
data=(
*[("id", video["resourceId"]["videoId"]) for video in videos],
("part", "contentDetails"),
),
)
# Merge and normalize data
results = []
for video_item, detail_item in zip(videos, videos_response["items"]):
video_id = video_item["resourceId"]["videoId"]
thumbnail = ""
for size in ("standard", "maxres", *video_item["thumbnails"].keys()):
if size in video_item["thumbnails"]:
thumbnail = video_item["thumbnails"][size]["url"]
results.append({
"id": video_id,
"title": video_item.get("title", "Untitled Video"),
"description": video_item["description"],
"url": f"https://www.youtube.com/watch?v={video_id}",
"thumbnail": thumbnail,
"published": parse_iso_date(video_item["publishedAt"]),
"duration": parse_iso_duration(detail_item["contentDetails"]["duration"]),
})
return results

View File

@ -2,7 +2,9 @@ cachetools==4.2.2
click==8.0.1
Flask==2.0.1
gunicorn==20.1.0
isodate==0.6.1
itsdangerous==2.0.1
Jinja2==3.0.1
MarkupSafe==2.0.1
six==1.16.0
Werkzeug==2.0.1