Migrate application to python3

Notables:
- Python 3 migration
- The client id is now set via environment variable instead of
hardcoding it in a py file
- Using Flask instead of web2
- Migrate away from Memcache (Google depeciated it in py3) to
an in-memory data structure
- Moved landing page to static serving
This commit is contained in:
Laszlo Zeke 2020-09-04 19:35:37 +02:00
parent fc67ccba75
commit 5fccffc2f0
10 changed files with 229 additions and 293 deletions

1
.gitignore vendored
View File

@ -7,6 +7,7 @@ __pycache__/
# Distribution / packaging
.Python
venv/
env/
build/
develop-eggs/

View File

@ -1,6 +1,6 @@
## Twitch RSS Webapp for Google App Engine
This project is a very small web application for serving RSS feed for broadcasts
in Twitch. It fetches data from [Twitch API](https://github.com/justintv/twitch-api) and caches in Memcache.
in Twitch. It fetches data from [Twitch API](https://dev.twitch.tv/docs) and caches in Memcache.
The engine is webapp2.
A running version can be tried out at:
@ -10,6 +10,7 @@ There is also a VOD only endpoint if you don't want to see ongoing streams which
https://twitchrss.appspot.com/vodonly/twitch
### Deployment
First you should set your own Twitch API client ID in the app.yaml.
See how to deploy on [Google App Engine](https://cloud.google.com/appengine/docs/python/gettingstartedpython27/introduction).
### Other things

View File

@ -1,15 +1,21 @@
runtime: python27
threadsafe: true
runtime: python38
entrypoint: gunicorn -b :$PORT twitchrss:app
env_variables:
TWITCH_CLIENT_ID: __INSERT_TWITCH_CLIENT_ID_HERE__
handlers:
- url: /favicon\.ico
static_files: favicon.ico
upload: favicon\.ico
- url: /.*
script: twitchrss.app
- url: /
static_files: index.html
upload: index\.html
- url: /.+
script: auto
automatic_scaling:
max_instances: 1
max_idle_instances: 1

View File

@ -1,17 +0,0 @@
#
# Copyright 2017, 2016 Laszlo Zeke
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
TWITCH_CLIENT_ID = 'Insert_key_here'

View File

@ -1,6 +0,0 @@
"""`appengine_config` gets loaded when starting a new application instance."""
import vendor
# insert `lib` as a site directory so our `main` module can load
# third-party libraries, and override built-ins with newer
# versions.
vendor.add('lib')

View File

@ -1,6 +1,6 @@
# Feedformatter
# Copyright (c) 2008, Luke Maurits <luke@maurits.id.au>
# Copyright (c) 2015, Laszlo Zeke
# Copyright (c) 2020, Laszlo Zeke
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@ -28,7 +28,7 @@
__version__ = "0.4"
from cStringIO import StringIO
from io import StringIO
# This "staircase" of import attempts is ugly. If there's a nicer way to do
# this, please let me know!
@ -75,7 +75,7 @@ _rss2_channel_mappings = (
(("title",), "title"),
(("link", "url"), "link"),
(("description", "desc", "summary"), "description"),
(("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda(x): _format_datetime("rss2",x)),
(("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda x: _format_datetime("rss2",x)),
(("category",), "category"),
(("language",), "language"),
(("copyright",), "copyright"),
@ -91,9 +91,9 @@ _rss2_item_mappings = (
(("link", "url"), "link"),
(("description", "desc", "summary"), "description"),
(("guid", "id"), "guid"),
(("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda(x): _format_datetime("rss2",x)),
(("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda x: _format_datetime("rss2",x)),
(("category",), "category"),
(("author",), "author", lambda(x): _rssify_author(x))
(("author",), "author", lambda x: _rssify_author(x))
)
# Atom 1.0 ----------
@ -102,19 +102,19 @@ _atom_feed_mappings = (
(("title",), "title"),
(("link", "url"), "id"),
(("description", "desc", "summary"), "subtitle"),
(("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda(x): _format_datetime("atom",x)),
(("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda x: _format_datetime("atom",x)),
(("category",), "category"),
(("author",), "author", lambda(x): _atomise_author(x))
(("author",), "author", lambda x: _atomise_author(x))
)
_atom_item_mappings = (
(("title",), "title"),
(("link", "url"), "id"),
(("link", "url"), "link", lambda(x): _atomise_link(x)),
(("link", "url"), "link", lambda x: _atomise_link(x)),
(("description", "desc", "summary"), "summary"),
(("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda(x): _format_datetime("atom",x)),
(("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda x: _format_datetime("atom",x)),
(("category",), "category"),
(("author",), "author", lambda(x): _atomise_author(x))
(("author",), "author", lambda x: _atomise_author(x))
)
def _get_tz_offset():
@ -472,11 +472,11 @@ def main():
item["guid"] = "1234567890"
feed.items.append(item)
print("---- RSS 1.0 ----")
print feed.format_rss1_string(pretty=True)
print(feed.format_rss1_string(pretty=True))
print("---- RSS 2.0 ----")
print feed.format_rss2_string(pretty=True)
print(feed.format_rss2_string(pretty=True))
print("---- Atom 1.0 ----")
print feed.format_atom_string(pretty=True)
print(feed.format_atom_string(pretty=True))
if __name__ == "__main__":
main()

17
TwitchRSS/index.html Normal file
View File

@ -0,0 +1,17 @@
<html>
<head>
<title>Twitch stream RSS generator</title>
</head>
<body>
<p style="font-family: helvetica; font-size:20pt; padding: 20px;">
Twitch stream RSS generator
</p>
<p style="font-family: helvetica; font-size:12pt; padding: 20px;">
You can get RSS of broadcasts by subscribing to https://twitchrss.appspot.com/vod/&lt;channel name&gt;<br/>
For example: <a href="https://twitchrss.appspot.com/vod/riotgames">https://twitchrss.appspot.com/vod/riotgames</a><br/><br/>
You can use the /vodonly handle to get only vods without ongoing streams.
Not endorsed by Twitch.tv, just a fun project.<br/>
<a href="https://github.com/lzeke0/TwitchRSS">Project home</a>
</p>
</body>
</html>

View File

@ -0,0 +1,31 @@
appdirs==1.4.3
CacheControl==0.12.6
certifi==2019.11.28
chardet==3.0.4
click==7.1.2
colorama==0.4.3
contextlib2==0.6.0
distlib==0.3.0
distro==1.4.0
expiringdict==1.2.1
Flask==1.1.2
gunicorn==20.0.4
html5lib==1.0.1
idna==2.8
ipaddr==2.2.0
itsdangerous==1.1.0
Jinja2==2.11.2
lockfile==0.12.2
MarkupSafe==1.1.1
msgpack==0.6.2
packaging==20.3
pep517==0.8.2
progress==1.5
pyparsing==2.4.6
pytoml==0.1.21
requests==2.22.0
retrying==1.3.3
six==1.14.0
urllib3==1.25.8
webencodings==0.5.1
Werkzeug==1.0.1

View File

@ -1,5 +1,5 @@
#
# Copyright 2017, 2016 Laszlo Zeke
# Copyright 2020 Laszlo Zeke
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -14,104 +14,82 @@
# limitations under the License.
#
import webapp2
from webapp2 import Route
import urllib2
from flask import abort, Flask
import urllib
import json
import datetime
import logging
import re
from os import environ
from feedformatter import Feed
from google.appengine.api import memcache
from app_id import TWITCH_CLIENT_ID
from StringIO import StringIO
from expiringdict import ExpiringDict
from io import BytesIO
import gzip
VODCACHE_PREFIX = 'vodcache'
USERIDCACHE_PREFIX = 'userid'
VOD_URL_TEMPLATE = 'https://api.twitch.tv/kraken/channels/%s/videos?broadcast_type=archive,highlight,upload&limit=10'
USERID_URL_TEMPLATE = 'https://api.twitch.tv/kraken/users?login=%s'
VODCACHE_LIFETIME = 600
USERIDCACHE_LIFETIME = 0 # No expire
USERIDCACHE_LIFETIME = 24 * 60 * 60
CHANNEL_FILTER = re.compile("^[a-zA-Z0-9_]{2,25}$")
TWITCH_CLIENT_ID = environ.get("TWITCH_CLIENT_ID")
logging.basicConfig(level=logging.DEBUG if environ.get('DEBUG') else logging.INFO)
if not TWITCH_CLIENT_ID:
raise Exception("Twitch API client id is not set.")
class MainPage(webapp2.RequestHandler):
def get(self):
self.response.headers['Content-Type'] = 'text/html'
html_resp = """
<html>
<head>
<title>Twitch stream RSS generator</title>
</head>
<body>
<p style="font-family: helvetica; font-size:20pt; padding: 20px;">
Twitch stream RSS generator
</p>
<p style="font-family: helvetica; font-size:12pt; padding: 20px;">
You can get RSS of broadcasts by subscribing to https://twitchrss.appspot.com/vod/&lt;channel name&gt;<br/>
For example: <a href="https://twitchrss.appspot.com/vod/riotgames">https://twitchrss.appspot.com/vod/riotgames</a><br/><br/>
You can use the /vodonly handle to get only vods without ongoing streams.
Not endorsed by Twitch.tv, just a fun project.<br/>
<a href="https://github.com/lzeke0/TwitchRSS">Project home</a>
</p>
</body>
</html>
"""
self.response.write(html_resp)
app = Flask(__name__)
vodcache = ExpiringDict(max_len=200, max_age_seconds=VODCACHE_LIFETIME)
useridcache = ExpiringDict(max_len=1000, max_age_seconds=USERIDCACHE_LIFETIME)
class RSSVoDServer(webapp2.RequestHandler):
def get(self, channel):
self._get_inner(channel)
def _get_inner(self, channel, add_live=True):
userid_json = self.fetch_userid(channel)
(channel_display_name, channel_id) = self.extract_userid(json.loads(userid_json))
channel_json = self.fetch_vods(channel_id)
decoded_json = json.loads(channel_json)
rss_data = self.construct_rss(channel, decoded_json, channel_display_name, add_live)
self.response.headers['Content-Type'] = 'application/rss+xml'
self.response.write(rss_data)
def head(self,channel):
self.get(channel)
def fetch_userid(self, channel_name):
return self.fetch_or_cache_object(channel_name, USERIDCACHE_PREFIX, USERID_URL_TEMPLATE, USERIDCACHE_LIFETIME)
def fetch_vods(self, channel_id):
return self.fetch_or_cache_object(channel_id, VODCACHE_PREFIX, VOD_URL_TEMPLATE, VODCACHE_LIFETIME)
def fetch_or_cache_object(self, channel, key_prefix, url_template, cache_time):
json_data = self.lookup_cache(channel, key_prefix)
if not json_data:
json_data = self.fetch_json(channel, url_template)
if not json_data:
self.abort(404)
@app.route('/vod/<string:channel>', methods=['GET', 'HEAD'])
def vod(channel):
if CHANNEL_FILTER.match(channel):
return get_inner(channel)
else:
self.store_cache(channel, json_data, key_prefix, cache_time)
abort(404)
@app.route('/vodonly/<string:channel>', methods=['GET', 'HEAD'])
def vodonly(channel):
if CHANNEL_FILTER.match(channel):
return get_inner(channel, add_live=False)
else:
abort(404)
def get_inner(channel, add_live=True):
userid_json = fetch_userid(channel)
(channel_display_name, channel_id) = extract_userid(json.loads(userid_json))
channel_json = fetch_vods(channel_id)
decoded_json = json.loads(channel_json)
rss_data = construct_rss(channel, decoded_json, channel_display_name, add_live)
headers = {'Content-Type': 'application/rss+xml'}
return rss_data, headers
def fetch_userid(channel_name):
return fetch_or_cache_object(channel_name, useridcache, USERID_URL_TEMPLATE)
def fetch_vods(channel_id):
return fetch_or_cache_object(channel_id, vodcache, VOD_URL_TEMPLATE)
def fetch_or_cache_object(key, cachedict, url_template):
json_data = cachedict.get(key)
if not json_data:
json_data = fetch_json(key, url_template)
if not json_data:
abort(404)
else:
cachedict[key] = json_data
return json_data
@staticmethod
def lookup_cache(channel_name, key_prefix):
cached_data = memcache.get('%s:v5:%s' % (key_prefix, channel_name))
if cached_data is not None:
logging.debug('Cache hit for %s' % channel_name)
return cached_data
else:
logging.debug('Cache miss for %s' % channel_name)
return ''
@staticmethod
def store_cache(channel_name, data, key_prefix, cache_lifetime):
try:
logging.debug('Cached data for %s' % channel_name)
memcache.set('%s:v5:%s' % (key_prefix, channel_name), data, cache_lifetime)
except BaseException as e:
logging.warning('Memcache exception: %s' % e)
return
@staticmethod
def fetch_json(id, url_template):
url = url_template % id
headers = {
@ -119,28 +97,29 @@ class RSSVoDServer(webapp2.RequestHandler):
'Client-ID': TWITCH_CLIENT_ID,
'Accept-Encoding': 'gzip'
}
request = urllib2.Request(url, headers=headers)
request = urllib.request.Request(url, headers=headers)
retries = 0
while retries < 3:
try:
result = urllib2.urlopen(request, timeout=3)
result = urllib.request.urlopen(request, timeout=3)
logging.debug('Fetch from twitch for %s with code %s' % (id, result.getcode()))
if result.info().get('Content-Encoding') == 'gzip':
logging.debug('Fetched gzip content')
buf = StringIO(result.read())
buf = BytesIO(result.read())
f = gzip.GzipFile(fileobj=buf)
return f.read()
return result.read()
except BaseException as e:
except Exception as e:
logging.warning("Fetch exception caught: %s" % e)
retries += 1
return ''
return None
def extract_userid(self, user_info):
def extract_userid(user_info):
userlist = user_info.get('users')
if not userlist:
logging.info('No such user found.')
self.abort(404)
abort(404)
# Get the first id in the list
userid = userlist[0].get('_id')
username = userlist[0].get('display_name')
@ -148,15 +127,16 @@ class RSSVoDServer(webapp2.RequestHandler):
return username, userid
else:
logging.warning('Userid is not found in %s' % user_info)
self.abort(404)
abort(404)
def construct_rss(self, channel_name, vods_info, display_name, add_live=True):
def construct_rss(channel_name, vods_info, display_name, add_live=True):
feed = Feed()
# Set the feed/channel level properties
feed.feed["title"] = "%s's Twitch video RSS" % display_name
feed.feed["link"] = "https://twitchrss.appspot.com/"
feed.feed["author"] = "Twitch RSS Gen"
feed.feed["author"] = "Twitch RSS Generated"
feed.feed["description"] = "The RSS Feed of %s's videos on Twitch" % display_name
feed.feed["ttl"] = '10'
@ -189,17 +169,11 @@ class RSSVoDServer(webapp2.RequestHandler):
feed.items.append(item)
except KeyError as e:
logging.warning('Issue with json: %s\nException: %s' % (vods_info, e))
self.abort(404)
abort(404)
return feed.format_rss2_string()
class RSSVoDServerOnlyVoD(RSSVoDServer):
def get(self, channel):
self._get_inner(channel, add_live=False)
app = webapp2.WSGIApplication([
Route('/', MainPage),
Route('/vod/<channel:[a-zA-Z0-9_]{2,25}>', RSSVoDServer),
Route('/vodonly/<channel:[a-zA-Z0-9_]{2,25}>', RSSVoDServerOnlyVoD)
], debug=False)
# For debug
if __name__ == "__main__":
app.run(host='127.0.0.1', port=8080, debug=True)

View File

@ -1,71 +0,0 @@
#
# Copyright 2014 Jon Wayne Parrott, [proppy], Michael R. Bernstein
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Notes:
# - Imported from https://github.com/jonparrott/Darth-Vendor/.
# - Added license header.
# - Renamed `darth.vendor` to `vendor.add` to match upcoming SDK interface.
# - Renamed `position` param to `index` to match upcoming SDK interface.
# - Removed funny arworks docstring.
import site
import os.path
import sys
def add(folder, index=1):
"""
Adds the given folder to the python path. Supports namespaced packages.
By default, packages in the given folder take precedence over site-packages
and any previous path manipulations.
Args:
folder: Path to the folder containing packages, relative to ``os.getcwd()``
position: Where in ``sys.path`` to insert the vendor packages. By default
this is set to 1. It is inadvisable to set it to 0 as it will override
any modules in the current working directory.
"""
# Check if the path contains a virtualenv.
site_dir = os.path.join(folder, 'lib', 'python' + sys.version[:3], 'site-packages')
if os.path.exists(site_dir):
folder = site_dir
# Otherwise it's just a normal path, make it absolute.
else:
folder = os.path.join(os.path.dirname(__file__), folder)
# Use site.addsitedir() because it appropriately reads .pth
# files for namespaced packages. Unfortunately, there's not an
# option to choose where addsitedir() puts its paths in sys.path
# so we have to do a little bit of magic to make it play along.
# We're going to grab the current sys.path and split it up into
# the first entry and then the rest. Essentially turning
# ['.', '/site-packages/x', 'site-packages/y']
# into
# ['.'] and ['/site-packages/x', 'site-packages/y']
# The reason for this is we want '.' to remain at the top of the
# list but we want our vendor files to override everything else.
sys.path, remainder = sys.path[:1], sys.path[1:]
# Now we call addsitedir which will append our vendor directories
# to sys.path (which was truncated by the last step.)
site.addsitedir(folder)
# Finally, we'll add the paths we removed back.
# The final product is something like this:
# ['.', '/vendor-folder', /site-packages/x', 'site-packages/y']
sys.path.extend(remainder)