Code cleanup and make service generic

This commit is contained in:
Mattéo Delabre 2021-09-12 15:42:57 +02:00
parent 031671f94a
commit 9c1aa9d346
Signed by: matteo
GPG Key ID: AE3FBD02DC583ABB
14 changed files with 365 additions and 435 deletions

17
.gitattributes vendored
View File

@ -1,17 +0,0 @@
# Auto detect text files and perform LF normalization
* text=auto
# Custom for Visual Studio
*.cs diff=csharp
# Standard to msysgit
*.doc diff=astextplain
*.DOC diff=astextplain
*.docx diff=astextplain
*.DOCX diff=astextplain
*.dot diff=astextplain
*.DOT diff=astextplain
*.pdf diff=astextplain
*.PDF diff=astextplain
*.rtf diff=astextplain
*.RTF diff=astextplain

107
.gitignore vendored
View File

@ -1,104 +1,3 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# C extensions
*.so
# Distribution / packaging
.Python
venv/
env/
build/
develop-eggs/
dist/
downloads/
eggs/
# lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# =========================
# Operating System Files
# =========================
# OSX
# =========================
.DS_Store
.AppleDouble
.LSOverride
# Thumbnails
._*
# Files that might appear on external disk
.Spotlight-V100
.Trashes
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# Windows
# =========================
# Windows image file caches
Thumbs.db
ehthumbs.db
# Folder config file
Desktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msm
*.msp
# Windows shortcuts
*.lnk
*.gcloudignore
.venv
.mypy_cache
config.ini

View File

@ -1,4 +1,6 @@
Copyright 2015 Laszlo Zeke
Copyright 2021 Mattéo Delabre <https://forge.delab.re/matteo/feedleware>
Copyright 2021 Mattia Di Eleuterio <https://github.com/madiele/TwitchToPodcastRSS>
Copyright 2015 Laszlo Zeke <https://github.com/lzeke0/TwitchRSS>
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@ -1,25 +1,6 @@
## Twitch RSS Webapp for Google App Engine
This project is a very small web application for serving RSS feed for broadcasts
in Twitch. It fetches data from [Twitch API](https://dev.twitch.tv/docs) and caches in Memcache.
The engine is webapp2.
# feedleware
A running version can be tried out at:
https://twitchrss.appspot.com/vod/twitch
This is a lightweight Python application that generates RSS feeds for services that dont provide usable feeds.
Currently supported services:
There is also a VOD only endpoint if you don't want to see ongoing streams which are known to break some readers:
https://twitchrss.appspot.com/vodonly/twitch
### Caching requests
This service caches requests from twitch for 10 minutes meaning that you will only get new answers once in
10 minutes. Please keep this in mind when polling the service.
### Deployment
First you should set your own Twitch API client ID in the app.yaml.
See how to deploy on [Google App Engine](https://cloud.google.com/appengine/docs/standard/python3).
### Other things
The project uses a slightly modified [Feedformatter](https://code.google.com/p/feedformatter/) to support
more tags and time zone in pubDate tag.
### About
The project has been developed by László Zeke.
* Twitch

View File

@ -1,21 +0,0 @@
runtime: python38
entrypoint: gunicorn -b :$PORT -k gthread --threads 2 twitchrss:app
env_variables:
TWITCH_CLIENT_ID: __INSERT_TWITCH_CLIENT_ID_HERE__
handlers:
- url: /favicon\.ico
static_files: favicon.ico
upload: favicon\.ico
- url: /
static_files: index.html
upload: index\.html
- url: /.+
script: auto
automatic_scaling:
max_instances: 1

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.0 KiB

View File

@ -1,17 +0,0 @@
<html>
<head>
<title>Twitch stream RSS generator</title>
</head>
<body>
<p style="font-family: helvetica; font-size:20pt; padding: 20px;">
Twitch stream RSS generator
</p>
<p style="font-family: helvetica; font-size:12pt; padding: 20px;">
You can get RSS of broadcasts by subscribing to https://twitchrss.appspot.com/vod/&lt;channel name&gt;<br/>
For example: <a href="https://twitchrss.appspot.com/vod/riotgames">https://twitchrss.appspot.com/vod/riotgames</a><br/><br/>
You can use the /vodonly handle to get only vods without ongoing streams.
Not endorsed by Twitch.tv, just a fun project.<br/>
<a href="https://github.com/lzeke0/TwitchRSS">Project home</a>
</p>
</body>
</html>

View File

@ -1,31 +0,0 @@
appdirs==1.4.3
CacheControl==0.12.6
cachetools==4.1.1
certifi==2019.11.28
chardet==3.0.4
click==7.1.2
colorama==0.4.3
contextlib2==0.6.0
distlib==0.3.0
distro==1.4.0
Flask==1.1.2
gunicorn==20.0.4
html5lib==1.0.1
idna==2.8
ipaddr==2.2.0
itsdangerous==1.1.0
Jinja2==2.11.2
lockfile==0.12.2
MarkupSafe==1.1.1
msgpack==0.6.2
packaging==20.3
pep517==0.8.2
progress==1.5
pyparsing==2.4.6
pytoml==0.1.21
requests==2.22.0
retrying==1.3.3
six==1.14.0
urllib3==1.25.8
webencodings==0.5.1
Werkzeug==1.0.1

View File

@ -1,219 +0,0 @@
#
# Copyright 2020 Laszlo Zeke
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from cachetools import cached, TTLCache, LRUCache
from feedformatter import Feed
from flask import abort, Flask, request
from io import BytesIO
from os import environ
import datetime
import gzip
import time
import json
import logging
import re
import urllib
VOD_URL_TEMPLATE = 'https://api.twitch.tv/helix/videos?user_id=%s&type=all'
USERID_URL_TEMPLATE = 'https://api.twitch.tv/helix/users?login=%s'
VODCACHE_LIFETIME = 10 * 60
USERIDCACHE_LIFETIME = 24 * 60 * 60
CHANNEL_FILTER = re.compile("^[a-zA-Z0-9_]{2,25}$")
TWITCH_CLIENT_ID = environ.get("TWITCH_CLIENT_ID")
TWITCH_SECRET = environ.get("TWITCH_SECRET")
TWITCH_OAUTH_TOKEN = ""
TWITCH_OAUTH_EXPIRE_EPOCH = 0
logging.basicConfig(level=logging.DEBUG if environ.get('DEBUG') else logging.INFO)
if not TWITCH_CLIENT_ID:
raise Exception("Twitch API client id is not set.")
if not TWITCH_SECRET:
raise Exception("Twitch API secret env variable not set.")
app = Flask(__name__)
def authorize():
global TWITCH_OAUTH_TOKEN
global TWITCH_OAUTH_EXPIRE_EPOCH
# return if token has not expired
if (TWITCH_OAUTH_EXPIRE_EPOCH >= round(time.time())):
return
logging.debug("requesting a new oauth token")
data = {
'client_id': TWITCH_CLIENT_ID,
'client_secret': TWITCH_SECRET,
'grant_type': 'client_credentials',
}
url = 'https://id.twitch.tv/oauth2/token'
request = urllib.request.Request(url, data=urllib.parse.urlencode(data).encode("utf-8"), method='POST')
retries = 0
while retries < 3:
try:
result = urllib.request.urlopen(request, timeout=3)
r = json.loads(result.read().decode("utf-8"))
TWITCH_OAUTH_TOKEN = r['access_token']
TWITCH_OAUTH_EXPIRE_EPOCH = int(r['expires_in']) + round(time.time())
logging.debug("oauth token aquired")
return
except Exception as e:
logging.warning("Fetch exception caught: %s" % e)
retries += 1
abort(503)
@app.route('/vod/<string:channel>', methods=['GET', 'HEAD'])
def vod(channel):
if CHANNEL_FILTER.match(channel):
return get_inner(channel)
else:
abort(404)
@app.route('/vodonly/<string:channel>', methods=['GET', 'HEAD'])
def vodonly(channel):
if CHANNEL_FILTER.match(channel):
return get_inner(channel, add_live=False)
else:
abort(404)
def get_inner(channel, add_live=True):
userid_json = fetch_userid(channel)
if not userid_json:
abort(404)
(channel_display_name, channel_id) = extract_userid(json.loads(userid_json)['data'][0])
channel_json = fetch_vods(channel_id)
if not channel_json:
abort(404)
decoded_json = json.loads(channel_json)['data']
rss_data = construct_rss(channel, decoded_json, channel_display_name, add_live)
headers = {'Content-Type': 'application/rss+xml'}
if 'gzip' in request.headers.get("Accept-Encoding", ''):
headers['Content-Encoding'] = 'gzip'
rss_data = gzip.compress(rss_data)
return rss_data, headers
@cached(cache=TTLCache(maxsize=3000, ttl=USERIDCACHE_LIFETIME))
def fetch_userid(channel_name):
return fetch_json(channel_name, USERID_URL_TEMPLATE)
@cached(cache=TTLCache(maxsize=500, ttl=VODCACHE_LIFETIME))
def fetch_vods(channel_id):
return fetch_json(channel_id, VOD_URL_TEMPLATE)
def fetch_json(id, url_template):
#update the oauth token
authorize()
url = url_template % id
headers = {
'Authorization': 'Bearer '+TWITCH_OAUTH_TOKEN,
'Client-Id': TWITCH_CLIENT_ID,
'Accept-Encoding': 'gzip'
}
request = urllib.request.Request(url, headers=headers)
retries = 0
while retries < 3:
try:
result = urllib.request.urlopen(request, timeout=3)
logging.debug('Fetch from twitch for %s with code %s' % (id, result.getcode()))
if result.info().get('Content-Encoding') == 'gzip':
logging.debug('Fetched gzip content')
return gzip.decompress(result.read())
return result.read()
except Exception as e:
logging.warning("Fetch exception caught: %s" % e)
retries += 1
abort(503)
def extract_userid(user_info):
# Get the first id in the list
userid = user_info['id']
username = user_info['display_name']
if username and userid:
return username, userid
else:
logging.warning('Userid is not found in %s' % user_info)
abort(404)
def construct_rss(channel_name, vods_info, display_name, add_live=True):
feed = Feed()
# Set the feed/channel level properties
feed.feed["title"] = "%s's Twitch video RSS" % display_name
feed.feed["link"] = "https://twitchrss.appspot.com/"
feed.feed["author"] = "Twitch RSS Generated"
feed.feed["description"] = "The RSS Feed of %s's videos on Twitch" % display_name
feed.feed["ttl"] = '10'
# Create an item
try:
if vods_info:
for vod in vods_info:
item = {}
# @madiele: in twitch new API the current stream now it's not bundled in the same request
# maybe to be re-implemented later on
#if vod["status"] == "recording":
# if not add_live:
# continue
# link = "http://www.twitch.tv/%s" % channel_name
# item["title"] = "%s - LIVE" % vod['title']
# item["category"] = "live"
#else:
link = vod['url']
item["title"] = vod['title']
item["category"] = vod['type']
item["link"] = link
item["description"] = "<a href=\"%s\"><img src=\"%s\" /></a>" % (link, vod['thumbnail_url'].replace("%{width}", "512").replace("%{height}","288"))
#@madiele: for some reason the new API does not have the game field anymore...
#if vod.get('game'):
# item["description"] += "<br/>" + vod['game']
if vod.get('description'):
item["description"] += "<br/>" + vod['description']
d = datetime.datetime.strptime(vod['created_at'], '%Y-%m-%dT%H:%M:%SZ')
item["pubDate"] = d.timetuple()
item["guid"] = vod['id']
#if vod["status"] == "recording": # To show a different news item when recording is over
# item["guid"] += "_live"
feed.items.append(item)
except KeyError as e:
logging.warning('Issue with json: %s\nException: %s' % (vods_info, e))
abort(404)
return feed.format_rss2_string()
# For debug
if __name__ == "__main__":
app.run(host='127.0.0.1', port=8080, debug=True)

41
feedleware/__init__.py Normal file
View File

@ -0,0 +1,41 @@
import configparser
import logging
import sys
from os import environ
from flask import Flask
from . import twitch
logger = logging.getLogger(__name__)
blueprints = {
"twitch": twitch,
}
def create_app():
"""Read the app configuration and instantiate service blueprints."""
debug = environ.get("FLASK_ENV", "production") == "development"
level = logging.DEBUG if debug else logging.INFO
logging.basicConfig(level=level)
config_path = environ.get("FEEDLEWARE_CONFIG")
if not config_path:
print(
"Please set the FEEDLEWARE_CONFIG environment variable",
file=sys.stderr,
)
sys.exit(1)
config = configparser.ConfigParser()
config.read(config_path)
app = Flask(__name__)
for section in config.sections():
if section in blueprints:
blueprint = blueprints[section].create_blueprint(config[section])
app.register_blueprint(blueprint, url_prefix="/" + section)
else:
logger.warning("Unknown service '%s'", section)
return app

View File

@ -176,9 +176,9 @@ def _format_datetime(feed_type, time):
time = _convert_datetime(time)
# Then, convert that to the appropriate string
if feed_type is "rss2":
if feed_type == "rss2":
return strftime("%a, %d %b %Y %H:%M:%S UT", time)
elif feed_type is "atom":
elif feed_type == "atom":
return strftime("%Y-%m-%dT%H:%M:%S", time) + _get_tz_offset();
def _atomise_link(link):

View File

@ -0,0 +1,21 @@
from flask import abort, Blueprint
from .twitch import APIClient, NoSuchUser
from .feed import construct_rss
def create_blueprint(config):
"""Create a Twitch endpoint blueprint."""
client = APIClient(config["client_id"], config["secret"])
twitch = Blueprint("twitch", __name__)
@twitch.route("/<string:login>", methods=["GET", "HEAD"])
def vod(login: str):
try:
return (
construct_rss(client, login),
{"Content-Type": "application/rss+xml"},
)
except NoSuchUser:
abort(404)
return twitch

73
feedleware/twitch/feed.py Normal file
View File

@ -0,0 +1,73 @@
from datetime import datetime
from ..feedformatter import Feed
from .twitch import APIClient
def parse_iso(iso: str) -> datetime:
return datetime.fromisoformat(iso.replace("Z", "+00:00"))
def construct_rss(client: APIClient, login: str) -> str:
"""
Build a RSS stream for a Twitch user.
:param client: Twitch API client
:param login: user login
:returns: RSS stream
:raises HTTPException: if one of the requests fail
:raises NoSuchUser: if the user does not exist
"""
user_info = client.user(login)
channel_id = user_info["id"]
videos = client.videos(channel_id)
stream = client.stream(channel_id)
feed = Feed()
user_url = f"https://www.twitch.tv/{user_info['login']}"
# Set the feed/channel level properties
feed.feed["title"] = user_info["display_name"]
feed.feed["link"] = user_url
feed.feed["author"] = "Twitch RSS Generated"
feed.feed["description"] = user_info["description"]
feed.feed["ttl"] = '10'
if stream is not None:
item = {}
item["guid"] = stream["id"]
item["title"] = "[⏺️ Live] " + stream.get("title", "Untitled Stream")
item["link"] = user_url
item["description"] = stream.get("game_name", "")
item["pubDate"] = parse_iso(stream["started_at"]).timetuple()
feed.items.append(item)
for video in videos:
if video.get("viewable", "public") != "public":
continue
item = {}
if video.get("stream_id") is not None:
if stream is not None and stream["id"] == video["stream_id"]:
# Do not add a second item for the active stream
continue
item["guid"] = video["stream_id"]
else:
item["guid"] = video["id"]
item["title"] = video.get("title", "Untitled Video")
link = video.get("url", user_url)
thumbnail = video["thumbnail_url"] \
.replace("%{width}", "600") \
.replace("%{height}", "400")
item["link"] = link
item["description"] = f'<a href="{link}"><img src="{thumbnail}" /></a>'
item["pubDate"] = parse_iso(video["published_at"]).timetuple()
feed.items.append(item)
return feed.format_rss2_string()

218
feedleware/twitch/twitch.py Normal file
View File

@ -0,0 +1,218 @@
import gzip
import http
import json
import logging
import re
import time
import urllib
import urllib.request
from typing import Any, Dict, Optional
from cachetools import cached, TTLCache
HTTPRequest = urllib.request.Request
HTTPResponse = http.client.HTTPResponse
HTTPException = http.client.HTTPException
logger = logging.getLogger(__name__)
def now():
"""Get current epoch in seconds."""
return int(time.time())
class NoSuchUser(Exception):
"""Raised when an unknown user is queried."""
class APIClient:
"""Client for the Twitch API app endpoints."""
# Expression that matches allowed logins
CHANNEL_FILTER = re.compile("^[a-zA-Z0-9_]{2,25}$")
def __init__(self, client_id: str = "", secret: str = "", retries: int = 3):
"""
Create a Twitch API client.
See <https://dev.twitch.tv/docs/authentication> for details.
:param client_id: client ID of the app
:param secret: app secret
:param retries: number of times to retry each request in case of failure
"""
self.client_id: str = client_id
self.secret: str = secret
self.retries: int = 3
self.oauth_token: str = ""
self.oauth_expire_epoch: int = 0
def _send_with_retry(self, request: HTTPRequest) -> HTTPResponse:
"""
Send an HTTP request and retry in case of failure.
The number of retries is configured by `self.retries`.
:param request: request to try sending
:returns: response sent by the server
:throws HTTPException: if the number of retries is exceeded
"""
retries = self.retries
last_err = HTTPException()
while retries:
try:
return urllib.request.urlopen(request, timeout=3)
except HTTPException as err:
logger.warning("HTTP error: %s", err)
retries -= 1
last_err = err
raise last_err
def authorize(self) -> bool:
"""
Refresh the current OAuth app access token if needed.
This uses the Twitch OAuth client credentials flow. See:
<https://dev.twitch.tv/docs/authentication/getting-tokens-oauth/#oauth-client-credentials-flow>
:returns: true if the OAuth app access token was refreshed,
false if the existing one was still valid
:throws HTTPException: if the request fails
"""
if now() < self.oauth_expire_epoch:
return False
logger.debug("Refreshing the OAuth app access token")
payload = {
"client_id": self.client_id,
"client_secret": self.secret,
"grant_type": "client_credentials",
}
request = HTTPRequest(
url="https://id.twitch.tv/oauth2/token",
data=urllib.parse.urlencode(payload).encode("utf-8"),
method="POST",
)
http_response = self._send_with_retry(request)
response = json.loads(http_response.read().decode("utf-8"))
self.oauth_token = response["access_token"]
self.oauth_expire_epoch = now() + int(response["expires_in"])
return True
def _query(
self,
url: str,
method: str = "GET",
data: Optional[Dict[str, str]] = None
) -> Any:
"""
Low-level method to make an authenticated query to the API.
:param url: URL to query
:param method: HTTP method to use
:param data: payload dictionary to send
:returns: JSON data
:throws HTTPException: if the query fails
"""
self.authorize()
logger.debug("Querying %s %s %s", method, url, data)
headers = {
"Authorization": f"Bearer {self.oauth_token}",
"Client-Id": self.client_id,
"Accept-Encoding": "gzip",
}
payload = data if data is not None else {}
request = HTTPRequest(
url=f"{url}?{urllib.parse.urlencode(payload)}",
headers=headers,
method=method,
)
http_response = self._send_with_retry(request)
if http_response.info().get("Content-Encoding") == "gzip":
return json.loads(gzip.decompress(http_response.read()))
else:
return json.loads(http_response.read())
@cached(cache=TTLCache(maxsize=1000, ttl=24 * 60 * 60))
def user(self, login: str) -> Any:
"""
Get information about a user.
:param login: user login
:returns: user information
:throws HTTPException: if the query fails
:throws NoSuchUser: if the user doesnt exist
"""
if not self.CHANNEL_FILTER.match(login):
raise NoSuchUser(f"Login '{login}' is invalid")
response = self._query(
url="https://api.twitch.tv/helix/users",
method="GET",
data={"login": login},
)
assert type(response) == dict
if "data" not in response or not response["data"]:
raise NoSuchUser(f"User '{login}' does not exist")
assert type(response["data"]) == list
assert type(response["data"][0]) == dict
return response["data"][0]
@cached(cache=TTLCache(maxsize=1000, ttl=10 * 60))
def videos(self, channel_id: str) -> Any:
"""
Get the list of videos from a channel.
:param channel_id: channel ID
:returns: list of videos
:throws HTTPException: if the query fails
:throws RuntimeError: if the server response is malformed
"""
response = self._query(
url="https://api.twitch.tv/helix/videos",
method="GET",
data={"user_id": channel_id},
)
if "data" not in response:
raise RuntimeError("Unexpected response from Twitch API")
return response["data"]
@cached(cache=TTLCache(maxsize=1000, ttl=10 * 60))
def stream(self, channel_id: str) -> Optional[Any]:
"""
Get the information about the stream currently active on a channel.
:param channel_id: channel ID
:returns: stream information or None
:throws HTTPException: if the query fails
:throws RuntimeError: if the server response is malformed
"""
response = self._query(
url="https://api.twitch.tv/helix/streams",
method="GET",
data={"user_id": channel_id},
)
if "data" not in response or not response["data"]:
return None
if response["data"][0]["type"] != "live":
return None
return response["data"][0]