diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index bdb0cab..0000000 --- a/.gitattributes +++ /dev/null @@ -1,17 +0,0 @@ -# Auto detect text files and perform LF normalization -* text=auto - -# Custom for Visual Studio -*.cs diff=csharp - -# Standard to msysgit -*.doc diff=astextplain -*.DOC diff=astextplain -*.docx diff=astextplain -*.DOCX diff=astextplain -*.dot diff=astextplain -*.DOT diff=astextplain -*.pdf diff=astextplain -*.PDF diff=astextplain -*.rtf diff=astextplain -*.RTF diff=astextplain diff --git a/.gitignore b/.gitignore index abfcf71..3b08151 100644 --- a/.gitignore +++ b/.gitignore @@ -1,104 +1,3 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] - -# C extensions -*.so - -# Distribution / packaging -.Python -venv/ -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -# lib/ -lib64/ -parts/ -sdist/ -var/ -*.egg-info/ -.installed.cfg -*.egg - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.cache -nosetests.xml -coverage.xml - -# Translations -*.mo -*.pot - -# Django stuff: -*.log - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# ========================= -# Operating System Files -# ========================= - -# OSX -# ========================= - -.DS_Store -.AppleDouble -.LSOverride - -# Thumbnails -._* - -# Files that might appear on external disk -.Spotlight-V100 -.Trashes - -# Directories potentially created on remote AFP share -.AppleDB -.AppleDesktop -Network Trash Folder -Temporary Items -.apdisk - -# Windows -# ========================= - -# Windows image file caches -Thumbs.db -ehthumbs.db - -# Folder config file -Desktop.ini - -# Recycle Bin used on file shares -$RECYCLE.BIN/ - -# Windows Installer files -*.cab -*.msi -*.msm -*.msp - -# Windows shortcuts -*.lnk - -*.gcloudignore +.venv +.mypy_cache +config.ini diff --git a/LICENSE.md b/LICENSE.md index c4c01c8..8c98a5f 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,4 +1,6 @@ -Copyright 2015 Laszlo Zeke +Copyright 2021 Mattéo Delabre +Copyright 2021 Mattia Di Eleuterio +Copyright 2015 Laszlo Zeke Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 96bee0f..30e8812 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,6 @@ -## Twitch RSS Webapp for Google App Engine -This project is a very small web application for serving RSS feed for broadcasts -in Twitch. It fetches data from [Twitch API](https://dev.twitch.tv/docs) and caches in Memcache. -The engine is webapp2. +# feedleware -A running version can be tried out at: -https://twitchrss.appspot.com/vod/twitch +This is a lightweight Python application that generates RSS feeds for services that don’t provide usable feeds. +Currently supported services: -There is also a VOD only endpoint if you don't want to see ongoing streams which are known to break some readers: -https://twitchrss.appspot.com/vodonly/twitch - -### Caching requests -This service caches requests from twitch for 10 minutes meaning that you will only get new answers once in -10 minutes. Please keep this in mind when polling the service. - -### Deployment -First you should set your own Twitch API client ID in the app.yaml. -See how to deploy on [Google App Engine](https://cloud.google.com/appengine/docs/standard/python3). - -### Other things -The project uses a slightly modified [Feedformatter](https://code.google.com/p/feedformatter/) to support -more tags and time zone in pubDate tag. - -### About -The project has been developed by László Zeke. +* Twitch diff --git a/TwitchRSS/app.yaml b/TwitchRSS/app.yaml deleted file mode 100644 index d936afa..0000000 --- a/TwitchRSS/app.yaml +++ /dev/null @@ -1,21 +0,0 @@ -runtime: python38 - -entrypoint: gunicorn -b :$PORT -k gthread --threads 2 twitchrss:app - -env_variables: - TWITCH_CLIENT_ID: __INSERT_TWITCH_CLIENT_ID_HERE__ - -handlers: -- url: /favicon\.ico - static_files: favicon.ico - upload: favicon\.ico - -- url: / - static_files: index.html - upload: index\.html - -- url: /.+ - script: auto - -automatic_scaling: - max_instances: 1 diff --git a/TwitchRSS/favicon.ico b/TwitchRSS/favicon.ico deleted file mode 100644 index c8d0516..0000000 Binary files a/TwitchRSS/favicon.ico and /dev/null differ diff --git a/TwitchRSS/index.html b/TwitchRSS/index.html deleted file mode 100644 index df27a5b..0000000 --- a/TwitchRSS/index.html +++ /dev/null @@ -1,17 +0,0 @@ - - - Twitch stream RSS generator - - -

- Twitch stream RSS generator -

-

- You can get RSS of broadcasts by subscribing to https://twitchrss.appspot.com/vod/<channel name>
- For example: https://twitchrss.appspot.com/vod/riotgames

- You can use the /vodonly handle to get only vods without ongoing streams. - Not endorsed by Twitch.tv, just a fun project.
- Project home -

- - diff --git a/TwitchRSS/requirements.txt b/TwitchRSS/requirements.txt deleted file mode 100644 index 5f6f59a..0000000 --- a/TwitchRSS/requirements.txt +++ /dev/null @@ -1,31 +0,0 @@ -appdirs==1.4.3 -CacheControl==0.12.6 -cachetools==4.1.1 -certifi==2019.11.28 -chardet==3.0.4 -click==7.1.2 -colorama==0.4.3 -contextlib2==0.6.0 -distlib==0.3.0 -distro==1.4.0 -Flask==1.1.2 -gunicorn==20.0.4 -html5lib==1.0.1 -idna==2.8 -ipaddr==2.2.0 -itsdangerous==1.1.0 -Jinja2==2.11.2 -lockfile==0.12.2 -MarkupSafe==1.1.1 -msgpack==0.6.2 -packaging==20.3 -pep517==0.8.2 -progress==1.5 -pyparsing==2.4.6 -pytoml==0.1.21 -requests==2.22.0 -retrying==1.3.3 -six==1.14.0 -urllib3==1.25.8 -webencodings==0.5.1 -Werkzeug==1.0.1 diff --git a/TwitchRSS/twitchrss.py b/TwitchRSS/twitchrss.py deleted file mode 100644 index 26b1ca5..0000000 --- a/TwitchRSS/twitchrss.py +++ /dev/null @@ -1,219 +0,0 @@ -# -# Copyright 2020 Laszlo Zeke -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from cachetools import cached, TTLCache, LRUCache -from feedformatter import Feed -from flask import abort, Flask, request -from io import BytesIO -from os import environ -import datetime -import gzip -import time -import json -import logging -import re -import urllib - - -VOD_URL_TEMPLATE = 'https://api.twitch.tv/helix/videos?user_id=%s&type=all' -USERID_URL_TEMPLATE = 'https://api.twitch.tv/helix/users?login=%s' -VODCACHE_LIFETIME = 10 * 60 -USERIDCACHE_LIFETIME = 24 * 60 * 60 -CHANNEL_FILTER = re.compile("^[a-zA-Z0-9_]{2,25}$") -TWITCH_CLIENT_ID = environ.get("TWITCH_CLIENT_ID") -TWITCH_SECRET = environ.get("TWITCH_SECRET") -TWITCH_OAUTH_TOKEN = "" -TWITCH_OAUTH_EXPIRE_EPOCH = 0 -logging.basicConfig(level=logging.DEBUG if environ.get('DEBUG') else logging.INFO) - -if not TWITCH_CLIENT_ID: - raise Exception("Twitch API client id is not set.") -if not TWITCH_SECRET: - raise Exception("Twitch API secret env variable not set.") - - -app = Flask(__name__) - -def authorize(): - global TWITCH_OAUTH_TOKEN - global TWITCH_OAUTH_EXPIRE_EPOCH - - # return if token has not expired - if (TWITCH_OAUTH_EXPIRE_EPOCH >= round(time.time())): - return - - logging.debug("requesting a new oauth token") - data = { - 'client_id': TWITCH_CLIENT_ID, - 'client_secret': TWITCH_SECRET, - 'grant_type': 'client_credentials', - } - url = 'https://id.twitch.tv/oauth2/token' - request = urllib.request.Request(url, data=urllib.parse.urlencode(data).encode("utf-8"), method='POST') - retries = 0 - while retries < 3: - try: - result = urllib.request.urlopen(request, timeout=3) - r = json.loads(result.read().decode("utf-8")) - TWITCH_OAUTH_TOKEN = r['access_token'] - TWITCH_OAUTH_EXPIRE_EPOCH = int(r['expires_in']) + round(time.time()) - logging.debug("oauth token aquired") - return - except Exception as e: - logging.warning("Fetch exception caught: %s" % e) - retries += 1 - abort(503) - -@app.route('/vod/', methods=['GET', 'HEAD']) -def vod(channel): - if CHANNEL_FILTER.match(channel): - return get_inner(channel) - else: - abort(404) - - -@app.route('/vodonly/', methods=['GET', 'HEAD']) -def vodonly(channel): - if CHANNEL_FILTER.match(channel): - return get_inner(channel, add_live=False) - else: - abort(404) - - -def get_inner(channel, add_live=True): - userid_json = fetch_userid(channel) - if not userid_json: - abort(404) - - (channel_display_name, channel_id) = extract_userid(json.loads(userid_json)['data'][0]) - - channel_json = fetch_vods(channel_id) - if not channel_json: - abort(404) - - decoded_json = json.loads(channel_json)['data'] - rss_data = construct_rss(channel, decoded_json, channel_display_name, add_live) - headers = {'Content-Type': 'application/rss+xml'} - - if 'gzip' in request.headers.get("Accept-Encoding", ''): - headers['Content-Encoding'] = 'gzip' - rss_data = gzip.compress(rss_data) - - return rss_data, headers - - -@cached(cache=TTLCache(maxsize=3000, ttl=USERIDCACHE_LIFETIME)) -def fetch_userid(channel_name): - return fetch_json(channel_name, USERID_URL_TEMPLATE) - - -@cached(cache=TTLCache(maxsize=500, ttl=VODCACHE_LIFETIME)) -def fetch_vods(channel_id): - return fetch_json(channel_id, VOD_URL_TEMPLATE) - - -def fetch_json(id, url_template): - #update the oauth token - authorize() - - url = url_template % id - headers = { - 'Authorization': 'Bearer '+TWITCH_OAUTH_TOKEN, - 'Client-Id': TWITCH_CLIENT_ID, - 'Accept-Encoding': 'gzip' - } - request = urllib.request.Request(url, headers=headers) - retries = 0 - while retries < 3: - try: - result = urllib.request.urlopen(request, timeout=3) - logging.debug('Fetch from twitch for %s with code %s' % (id, result.getcode())) - if result.info().get('Content-Encoding') == 'gzip': - logging.debug('Fetched gzip content') - return gzip.decompress(result.read()) - return result.read() - except Exception as e: - logging.warning("Fetch exception caught: %s" % e) - retries += 1 - abort(503) - - -def extract_userid(user_info): - # Get the first id in the list - userid = user_info['id'] - username = user_info['display_name'] - if username and userid: - return username, userid - else: - logging.warning('Userid is not found in %s' % user_info) - abort(404) - - -def construct_rss(channel_name, vods_info, display_name, add_live=True): - feed = Feed() - - # Set the feed/channel level properties - feed.feed["title"] = "%s's Twitch video RSS" % display_name - feed.feed["link"] = "https://twitchrss.appspot.com/" - feed.feed["author"] = "Twitch RSS Generated" - feed.feed["description"] = "The RSS Feed of %s's videos on Twitch" % display_name - feed.feed["ttl"] = '10' - - # Create an item - try: - if vods_info: - for vod in vods_info: - item = {} - - # @madiele: in twitch new API the current stream now it's not bundled in the same request - # maybe to be re-implemented later on - - #if vod["status"] == "recording": - # if not add_live: - # continue - # link = "http://www.twitch.tv/%s" % channel_name - # item["title"] = "%s - LIVE" % vod['title'] - # item["category"] = "live" - #else: - link = vod['url'] - item["title"] = vod['title'] - item["category"] = vod['type'] - - item["link"] = link - item["description"] = "" % (link, vod['thumbnail_url'].replace("%{width}", "512").replace("%{height}","288")) - - #@madiele: for some reason the new API does not have the game field anymore... - #if vod.get('game'): - # item["description"] += "
" + vod['game'] - - if vod.get('description'): - item["description"] += "
" + vod['description'] - d = datetime.datetime.strptime(vod['created_at'], '%Y-%m-%dT%H:%M:%SZ') - item["pubDate"] = d.timetuple() - item["guid"] = vod['id'] - #if vod["status"] == "recording": # To show a different news item when recording is over - # item["guid"] += "_live" - feed.items.append(item) - except KeyError as e: - logging.warning('Issue with json: %s\nException: %s' % (vods_info, e)) - abort(404) - - return feed.format_rss2_string() - - -# For debug -if __name__ == "__main__": - app.run(host='127.0.0.1', port=8080, debug=True) diff --git a/feedleware/__init__.py b/feedleware/__init__.py new file mode 100644 index 0000000..70c120f --- /dev/null +++ b/feedleware/__init__.py @@ -0,0 +1,41 @@ +import configparser +import logging +import sys +from os import environ +from flask import Flask +from . import twitch + + +logger = logging.getLogger(__name__) +blueprints = { + "twitch": twitch, +} + + +def create_app(): + """Read the app configuration and instantiate service blueprints.""" + debug = environ.get("FLASK_ENV", "production") == "development" + level = logging.DEBUG if debug else logging.INFO + logging.basicConfig(level=level) + + config_path = environ.get("FEEDLEWARE_CONFIG") + + if not config_path: + print( + "Please set the FEEDLEWARE_CONFIG environment variable", + file=sys.stderr, + ) + sys.exit(1) + + config = configparser.ConfigParser() + config.read(config_path) + app = Flask(__name__) + + for section in config.sections(): + if section in blueprints: + blueprint = blueprints[section].create_blueprint(config[section]) + app.register_blueprint(blueprint, url_prefix="/" + section) + else: + logger.warning("Unknown service '%s'", section) + + return app diff --git a/TwitchRSS/feedformatter.py b/feedleware/feedformatter.py similarity index 99% rename from TwitchRSS/feedformatter.py rename to feedleware/feedformatter.py index 8a8caa5..58f1c3b 100644 --- a/TwitchRSS/feedformatter.py +++ b/feedleware/feedformatter.py @@ -176,9 +176,9 @@ def _format_datetime(feed_type, time): time = _convert_datetime(time) # Then, convert that to the appropriate string - if feed_type is "rss2": + if feed_type == "rss2": return strftime("%a, %d %b %Y %H:%M:%S UT", time) - elif feed_type is "atom": + elif feed_type == "atom": return strftime("%Y-%m-%dT%H:%M:%S", time) + _get_tz_offset(); def _atomise_link(link): diff --git a/feedleware/twitch/__init__.py b/feedleware/twitch/__init__.py new file mode 100644 index 0000000..55241b9 --- /dev/null +++ b/feedleware/twitch/__init__.py @@ -0,0 +1,21 @@ +from flask import abort, Blueprint +from .twitch import APIClient, NoSuchUser +from .feed import construct_rss + + +def create_blueprint(config): + """Create a Twitch endpoint blueprint.""" + client = APIClient(config["client_id"], config["secret"]) + twitch = Blueprint("twitch", __name__) + + @twitch.route("/", methods=["GET", "HEAD"]) + def vod(login: str): + try: + return ( + construct_rss(client, login), + {"Content-Type": "application/rss+xml"}, + ) + except NoSuchUser: + abort(404) + + return twitch diff --git a/feedleware/twitch/feed.py b/feedleware/twitch/feed.py new file mode 100644 index 0000000..d4b3009 --- /dev/null +++ b/feedleware/twitch/feed.py @@ -0,0 +1,73 @@ +from datetime import datetime +from ..feedformatter import Feed +from .twitch import APIClient + + +def parse_iso(iso: str) -> datetime: + return datetime.fromisoformat(iso.replace("Z", "+00:00")) + + +def construct_rss(client: APIClient, login: str) -> str: + """ + Build a RSS stream for a Twitch user. + + :param client: Twitch API client + :param login: user login + :returns: RSS stream + :raises HTTPException: if one of the requests fail + :raises NoSuchUser: if the user does not exist + """ + user_info = client.user(login) + channel_id = user_info["id"] + videos = client.videos(channel_id) + stream = client.stream(channel_id) + + feed = Feed() + user_url = f"https://www.twitch.tv/{user_info['login']}" + + # Set the feed/channel level properties + feed.feed["title"] = user_info["display_name"] + feed.feed["link"] = user_url + feed.feed["author"] = "Twitch RSS Generated" + feed.feed["description"] = user_info["description"] + feed.feed["ttl"] = '10' + + if stream is not None: + item = {} + item["guid"] = stream["id"] + item["title"] = "[⏺️ Live] " + stream.get("title", "Untitled Stream") + item["link"] = user_url + item["description"] = stream.get("game_name", "") + item["pubDate"] = parse_iso(stream["started_at"]).timetuple() + + feed.items.append(item) + + for video in videos: + if video.get("viewable", "public") != "public": + continue + + item = {} + + if video.get("stream_id") is not None: + if stream is not None and stream["id"] == video["stream_id"]: + # Do not add a second item for the active stream + continue + + item["guid"] = video["stream_id"] + else: + item["guid"] = video["id"] + + item["title"] = video.get("title", "Untitled Video") + + link = video.get("url", user_url) + thumbnail = video["thumbnail_url"] \ + .replace("%{width}", "600") \ + .replace("%{height}", "400") + + item["link"] = link + item["description"] = f'' + item["pubDate"] = parse_iso(video["published_at"]).timetuple() + + feed.items.append(item) + + return feed.format_rss2_string() diff --git a/feedleware/twitch/twitch.py b/feedleware/twitch/twitch.py new file mode 100644 index 0000000..d1b460b --- /dev/null +++ b/feedleware/twitch/twitch.py @@ -0,0 +1,218 @@ +import gzip +import http +import json +import logging +import re +import time +import urllib +import urllib.request +from typing import Any, Dict, Optional +from cachetools import cached, TTLCache + + +HTTPRequest = urllib.request.Request +HTTPResponse = http.client.HTTPResponse +HTTPException = http.client.HTTPException +logger = logging.getLogger(__name__) + + +def now(): + """Get current epoch in seconds.""" + return int(time.time()) + + +class NoSuchUser(Exception): + """Raised when an unknown user is queried.""" + + +class APIClient: + """Client for the Twitch API app endpoints.""" + + # Expression that matches allowed logins + CHANNEL_FILTER = re.compile("^[a-zA-Z0-9_]{2,25}$") + + def __init__(self, client_id: str = "", secret: str = "", retries: int = 3): + """ + Create a Twitch API client. + + See for details. + + :param client_id: client ID of the app + :param secret: app secret + :param retries: number of times to retry each request in case of failure + """ + self.client_id: str = client_id + self.secret: str = secret + self.retries: int = 3 + self.oauth_token: str = "" + self.oauth_expire_epoch: int = 0 + + def _send_with_retry(self, request: HTTPRequest) -> HTTPResponse: + """ + Send an HTTP request and retry in case of failure. + + The number of retries is configured by `self.retries`. + + :param request: request to try sending + :returns: response sent by the server + :throws HTTPException: if the number of retries is exceeded + """ + retries = self.retries + last_err = HTTPException() + + while retries: + try: + return urllib.request.urlopen(request, timeout=3) + except HTTPException as err: + logger.warning("HTTP error: %s", err) + retries -= 1 + last_err = err + + raise last_err + + + def authorize(self) -> bool: + """ + Refresh the current OAuth app access token if needed. + + This uses the Twitch OAuth client credentials flow. See: + + + :returns: true if the OAuth app access token was refreshed, + false if the existing one was still valid + :throws HTTPException: if the request fails + """ + if now() < self.oauth_expire_epoch: + return False + + logger.debug("Refreshing the OAuth app access token") + + payload = { + "client_id": self.client_id, + "client_secret": self.secret, + "grant_type": "client_credentials", + } + + request = HTTPRequest( + url="https://id.twitch.tv/oauth2/token", + data=urllib.parse.urlencode(payload).encode("utf-8"), + method="POST", + ) + + http_response = self._send_with_retry(request) + response = json.loads(http_response.read().decode("utf-8")) + + self.oauth_token = response["access_token"] + self.oauth_expire_epoch = now() + int(response["expires_in"]) + return True + + def _query( + self, + url: str, + method: str = "GET", + data: Optional[Dict[str, str]] = None + ) -> Any: + """ + Low-level method to make an authenticated query to the API. + + :param url: URL to query + :param method: HTTP method to use + :param data: payload dictionary to send + :returns: JSON data + :throws HTTPException: if the query fails + """ + self.authorize() + logger.debug("Querying %s %s %s", method, url, data) + + headers = { + "Authorization": f"Bearer {self.oauth_token}", + "Client-Id": self.client_id, + "Accept-Encoding": "gzip", + } + + payload = data if data is not None else {} + + request = HTTPRequest( + url=f"{url}?{urllib.parse.urlencode(payload)}", + headers=headers, + method=method, + ) + + http_response = self._send_with_retry(request) + + if http_response.info().get("Content-Encoding") == "gzip": + return json.loads(gzip.decompress(http_response.read())) + else: + return json.loads(http_response.read()) + + @cached(cache=TTLCache(maxsize=1000, ttl=24 * 60 * 60)) + def user(self, login: str) -> Any: + """ + Get information about a user. + + :param login: user login + :returns: user information + :throws HTTPException: if the query fails + :throws NoSuchUser: if the user doesn’t exist + """ + if not self.CHANNEL_FILTER.match(login): + raise NoSuchUser(f"Login '{login}' is invalid") + + response = self._query( + url="https://api.twitch.tv/helix/users", + method="GET", + data={"login": login}, + ) + assert type(response) == dict + + if "data" not in response or not response["data"]: + raise NoSuchUser(f"User '{login}' does not exist") + + assert type(response["data"]) == list + assert type(response["data"][0]) == dict + return response["data"][0] + + @cached(cache=TTLCache(maxsize=1000, ttl=10 * 60)) + def videos(self, channel_id: str) -> Any: + """ + Get the list of videos from a channel. + + :param channel_id: channel ID + :returns: list of videos + :throws HTTPException: if the query fails + :throws RuntimeError: if the server response is malformed + """ + response = self._query( + url="https://api.twitch.tv/helix/videos", + method="GET", + data={"user_id": channel_id}, + ) + + if "data" not in response: + raise RuntimeError("Unexpected response from Twitch API") + + return response["data"] + + @cached(cache=TTLCache(maxsize=1000, ttl=10 * 60)) + def stream(self, channel_id: str) -> Optional[Any]: + """ + Get the information about the stream currently active on a channel. + + :param channel_id: channel ID + :returns: stream information or None + :throws HTTPException: if the query fails + :throws RuntimeError: if the server response is malformed + """ + response = self._query( + url="https://api.twitch.tv/helix/streams", + method="GET", + data={"user_id": channel_id}, + ) + + if "data" not in response or not response["data"]: + return None + + if response["data"][0]["type"] != "live": + return None + + return response["data"][0]