Migrate application to python3

Notables: - Python 3 migration - The client id is now set via environment variable instead of hardcoding it in a py file - Using Flask instead of web2 - Migrate away from Memcache (Google depeciated it in py3) to an in-memory data structure - Moved landing page to static serving
2020-09-04 19:35:37 +02:00 · 2020-09-04 19:35:37 +02:00 · 5fccffc2f0
parent fc67ccba75
commit 5fccffc2f0
10 changed files with 229 additions and 293 deletions
--- a/.gitignore
+++ b/.gitignore
@ -7,6 +7,7 @@ __pycache__/

 # Distribution / packaging
 .Python
+venv/
 env/
 build/
 develop-eggs/
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 ## Twitch RSS Webapp for Google App Engine
 This project is a very small web application for serving RSS feed for broadcasts
-in Twitch. It fetches data from [Twitch API](https://github.com/justintv/twitch-api) and caches in Memcache.
+in Twitch. It fetches data from [Twitch API](https://dev.twitch.tv/docs) and caches in Memcache.
 The engine is webapp2.

 A running version can be tried out at:
@ -10,6 +10,7 @@ There is also a VOD only endpoint if you don't want to see ongoing streams which
 https://twitchrss.appspot.com/vodonly/twitch

 ### Deployment
+First you should set your own Twitch API client ID in the app.yaml.
 See how to deploy on [Google App Engine](https://cloud.google.com/appengine/docs/python/gettingstartedpython27/introduction).

 ### Other things
--- a/TwitchRSS/app.yaml
+++ b/TwitchRSS/app.yaml
@ -1,15 +1,21 @@
-runtime: python27
-threadsafe: true
+runtime: python38
+
+entrypoint: gunicorn -b :$PORT twitchrss:app
+
+env_variables:
+  TWITCH_CLIENT_ID: __INSERT_TWITCH_CLIENT_ID_HERE__

 handlers:
 - url: /favicon\.ico
  static_files: favicon.ico
  upload: favicon\.ico

- url: /.*
-  script: twitchrss.app
+- url: /
+  static_files: index.html
+  upload: index\.html
+
+- url: /.+
+  script: auto

 automatic_scaling:
  max_instances: 1
-  max_idle_instances: 1
-
--- a/TwitchRSS/app_id.py
+++ b/TwitchRSS/app_id.py
@ -1,17 +0,0 @@
-#
-# Copyright 2017, 2016 Laszlo Zeke
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-TWITCH_CLIENT_ID = 'Insert_key_here'
--- a/TwitchRSS/appengine_config.py
+++ b/TwitchRSS/appengine_config.py
@ -1,6 +0,0 @@
-"""`appengine_config` gets loaded when starting a new application instance."""
-import vendor
-# insert `lib` as a site directory so our `main` module can load
-# third-party libraries, and override built-ins with newer
-# versions.
-vendor.add('lib')
--- a/TwitchRSS/lib/feedformatter.py
+++ b/TwitchRSS/lib/feedformatter.py
@ -1,6 +1,6 @@
 # Feedformatter
 # Copyright (c) 2008, Luke Maurits <luke@maurits.id.au>
-# Copyright (c) 2015, Laszlo Zeke
+# Copyright (c) 2020, Laszlo Zeke
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@ -28,7 +28,7 @@

 __version__ = "0.4"

-from cStringIO import StringIO
+from io import StringIO

 # This "staircase" of import attempts is ugly.  If there's a nicer way to do
 # this, please let me know!
@ -75,7 +75,7 @@ _rss2_channel_mappings = (
    (("title",), "title"),
    (("link", "url"), "link"),
    (("description", "desc", "summary"), "description"),
-    (("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda(x): _format_datetime("rss2",x)),
+    (("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda x: _format_datetime("rss2",x)),
    (("category",), "category"),
    (("language",), "language"),
    (("copyright",), "copyright"),
@ -91,9 +91,9 @@ _rss2_item_mappings = (
    (("link", "url"), "link"),
    (("description", "desc", "summary"), "description"),
    (("guid", "id"), "guid"),
-    (("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda(x): _format_datetime("rss2",x)),
+    (("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda x: _format_datetime("rss2",x)),
    (("category",), "category"),
-    (("author",), "author", lambda(x): _rssify_author(x))
+    (("author",), "author", lambda x: _rssify_author(x))
 )

 # Atom 1.0 ----------
@ -102,19 +102,19 @@ _atom_feed_mappings = (
    (("title",), "title"),
    (("link", "url"), "id"),
    (("description", "desc", "summary"), "subtitle"),
-    (("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda(x): _format_datetime("atom",x)),
+    (("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda x: _format_datetime("atom",x)),
    (("category",), "category"),
-    (("author",), "author", lambda(x): _atomise_author(x))
+    (("author",), "author", lambda x: _atomise_author(x))
 )

 _atom_item_mappings = (
    (("title",), "title"),
    (("link", "url"), "id"),
-    (("link", "url"), "link", lambda(x): _atomise_link(x)),
+    (("link", "url"), "link", lambda x: _atomise_link(x)),
    (("description", "desc", "summary"), "summary"),
-    (("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda(x): _format_datetime("atom",x)),
+    (("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda x: _format_datetime("atom",x)),
    (("category",), "category"),
-    (("author",), "author", lambda(x): _atomise_author(x))
+    (("author",), "author", lambda x: _atomise_author(x))
 )

 def _get_tz_offset():
@ -472,11 +472,11 @@ def main():
    item["guid"] = "1234567890"
    feed.items.append(item)
    print("---- RSS 1.0 ----")
-    print feed.format_rss1_string(pretty=True)
+    print(feed.format_rss1_string(pretty=True))
    print("---- RSS 2.0 ----")
-    print feed.format_rss2_string(pretty=True)
+    print(feed.format_rss2_string(pretty=True))
    print("---- Atom 1.0 ----")
-    print feed.format_atom_string(pretty=True)
+    print(feed.format_atom_string(pretty=True))

 if __name__ == "__main__":
    main()
--- a/TwitchRSS/index.html
+++ b/TwitchRSS/index.html
@ -0,0 +1,17 @@
+<html>
+    <head>
+    <title>Twitch stream RSS generator</title>
+    </head>
+    <body>
+        <p style="font-family: helvetica; font-size:20pt; padding: 20px;">
+            Twitch stream RSS generator
+        </p>
+        <p style="font-family: helvetica; font-size:12pt; padding: 20px;">
+            You can get RSS of broadcasts by subscribing to https://twitchrss.appspot.com/vod/&lt;channel name&gt;<br/>
+            For example: <a href="https://twitchrss.appspot.com/vod/riotgames">https://twitchrss.appspot.com/vod/riotgames</a><br/><br/>
+            You can use the /vodonly handle to get only vods without ongoing streams.
+            Not endorsed by Twitch.tv, just a fun project.<br/>
+            <a href="https://github.com/lzeke0/TwitchRSS">Project home</a>
+        </p>
+    </body>
+</html>
--- a/TwitchRSS/requirements.txt
+++ b/TwitchRSS/requirements.txt
@ -0,0 +1,31 @@
+appdirs==1.4.3
+CacheControl==0.12.6
+certifi==2019.11.28
+chardet==3.0.4
+click==7.1.2
+colorama==0.4.3
+contextlib2==0.6.0
+distlib==0.3.0
+distro==1.4.0
+expiringdict==1.2.1
+Flask==1.1.2
+gunicorn==20.0.4
+html5lib==1.0.1
+idna==2.8
+ipaddr==2.2.0
+itsdangerous==1.1.0
+Jinja2==2.11.2
+lockfile==0.12.2
+MarkupSafe==1.1.1
+msgpack==0.6.2
+packaging==20.3
+pep517==0.8.2
+progress==1.5
+pyparsing==2.4.6
+pytoml==0.1.21
+requests==2.22.0
+retrying==1.3.3
+six==1.14.0
+urllib3==1.25.8
+webencodings==0.5.1
+Werkzeug==1.0.1
--- a/TwitchRSS/twitchrss.py
+++ b/TwitchRSS/twitchrss.py
@ -1,5 +1,5 @@
 #
-# Copyright 2017, 2016 Laszlo Zeke
+# Copyright 2020 Laszlo Zeke
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -14,104 +14,82 @@
 # limitations under the License.
 #

-import webapp2
-from webapp2 import Route
-import urllib2
+from flask import abort, Flask
+import urllib
 import json
 import datetime
 import logging
+import re
+from os import environ
 from feedformatter import Feed
-from google.appengine.api import memcache
-from app_id import TWITCH_CLIENT_ID
-from StringIO import StringIO
+from expiringdict import ExpiringDict
+from io import BytesIO
 import gzip


-VODCACHE_PREFIX = 'vodcache'
-USERIDCACHE_PREFIX = 'userid'
 VOD_URL_TEMPLATE = 'https://api.twitch.tv/kraken/channels/%s/videos?broadcast_type=archive,highlight,upload&limit=10'
 USERID_URL_TEMPLATE = 'https://api.twitch.tv/kraken/users?login=%s'
 VODCACHE_LIFETIME = 600
-USERIDCACHE_LIFETIME = 0  # No expire
+USERIDCACHE_LIFETIME = 24 * 60 * 60
+CHANNEL_FILTER = re.compile("^[a-zA-Z0-9_]{2,25}$")
+TWITCH_CLIENT_ID = environ.get("TWITCH_CLIENT_ID")
+logging.basicConfig(level=logging.DEBUG if environ.get('DEBUG') else logging.INFO)
+
+if not TWITCH_CLIENT_ID:
+    raise Exception("Twitch API client id is not set.")


-class MainPage(webapp2.RequestHandler):
-    def get(self):
-        self.response.headers['Content-Type'] = 'text/html'
-        html_resp = """
-        <html>
-            <head>
-            <title>Twitch stream RSS generator</title>
-            </head>
-            <body>
-                <p style="font-family: helvetica; font-size:20pt; padding: 20px;">
-                    Twitch stream RSS generator
-                </p>
-                <p style="font-family: helvetica; font-size:12pt; padding: 20px;">
-                    You can get RSS of broadcasts by subscribing to https://twitchrss.appspot.com/vod/&lt;channel name&gt;<br/>
-                    For example: <a href="https://twitchrss.appspot.com/vod/riotgames">https://twitchrss.appspot.com/vod/riotgames</a><br/><br/>
-                    You can use the /vodonly handle to get only vods without ongoing streams.
-                    Not endorsed by Twitch.tv, just a fun project.<br/>
-                    <a href="https://github.com/lzeke0/TwitchRSS">Project home</a>
-                </p>
-            </body>
-        </html>
-        """
-        self.response.write(html_resp)
+app = Flask(__name__)
+
+vodcache = ExpiringDict(max_len=200, max_age_seconds=VODCACHE_LIFETIME)
+useridcache = ExpiringDict(max_len=1000, max_age_seconds=USERIDCACHE_LIFETIME)


-class RSSVoDServer(webapp2.RequestHandler):
-    def get(self, channel):
-	self._get_inner(channel)
-
-    def _get_inner(self, channel, add_live=True):
-        userid_json = self.fetch_userid(channel)
-        (channel_display_name, channel_id) = self.extract_userid(json.loads(userid_json))
-        channel_json = self.fetch_vods(channel_id)
-        decoded_json = json.loads(channel_json)
-        rss_data = self.construct_rss(channel, decoded_json, channel_display_name, add_live)
-        self.response.headers['Content-Type'] = 'application/rss+xml'
-        self.response.write(rss_data)
-
-    def head(self,channel):
-        self.get(channel)
-
-    def fetch_userid(self, channel_name):
-        return self.fetch_or_cache_object(channel_name, USERIDCACHE_PREFIX, USERID_URL_TEMPLATE, USERIDCACHE_LIFETIME)
-
-    def fetch_vods(self, channel_id):
-        return self.fetch_or_cache_object(channel_id, VODCACHE_PREFIX, VOD_URL_TEMPLATE, VODCACHE_LIFETIME)
-
-    def fetch_or_cache_object(self, channel, key_prefix, url_template, cache_time):
-        json_data = self.lookup_cache(channel, key_prefix)
-        if not json_data:
-            json_data = self.fetch_json(channel, url_template)
-            if not json_data:
-                self.abort(404)
+@app.route('/vod/<string:channel>', methods=['GET', 'HEAD'])
+def vod(channel):
+    if CHANNEL_FILTER.match(channel):
+        return get_inner(channel)
    else:
-                self.store_cache(channel, json_data, key_prefix, cache_time)
+        abort(404)
+
+
+@app.route('/vodonly/<string:channel>', methods=['GET', 'HEAD'])
+def vodonly(channel):
+    if CHANNEL_FILTER.match(channel):
+        return get_inner(channel, add_live=False)
+    else:
+        abort(404)
+
+
+def get_inner(channel, add_live=True):
+    userid_json = fetch_userid(channel)
+    (channel_display_name, channel_id) = extract_userid(json.loads(userid_json))
+    channel_json = fetch_vods(channel_id)
+    decoded_json = json.loads(channel_json)
+    rss_data = construct_rss(channel, decoded_json, channel_display_name, add_live)
+    headers = {'Content-Type': 'application/rss+xml'}
+    return rss_data, headers
+
+
+def fetch_userid(channel_name):
+    return fetch_or_cache_object(channel_name, useridcache, USERID_URL_TEMPLATE)
+
+
+def fetch_vods(channel_id):
+    return fetch_or_cache_object(channel_id, vodcache, VOD_URL_TEMPLATE)
+
+
+def fetch_or_cache_object(key, cachedict, url_template):
+    json_data = cachedict.get(key)
+    if not json_data:
+        json_data = fetch_json(key, url_template)
+        if not json_data:
+            abort(404)
+        else:
+            cachedict[key] = json_data
    return json_data

-    @staticmethod
-    def lookup_cache(channel_name, key_prefix):
-        cached_data = memcache.get('%s:v5:%s' % (key_prefix, channel_name))
-        if cached_data is not None:
-            logging.debug('Cache hit for %s' % channel_name)
-            return cached_data
-        else:
-            logging.debug('Cache miss for %s' % channel_name)
-            return ''

-    @staticmethod
-    def store_cache(channel_name, data, key_prefix, cache_lifetime):
-        try:
-            logging.debug('Cached data for %s' % channel_name)
-            memcache.set('%s:v5:%s' % (key_prefix, channel_name), data, cache_lifetime)
-        except BaseException as e:
-            logging.warning('Memcache exception: %s' % e)
-            return
-
-    @staticmethod
 def fetch_json(id, url_template):
    url = url_template % id
    headers = {
@ -119,28 +97,29 @@ class RSSVoDServer(webapp2.RequestHandler):
        'Client-ID': TWITCH_CLIENT_ID,
        'Accept-Encoding': 'gzip'
    }
-        request = urllib2.Request(url, headers=headers)
+    request = urllib.request.Request(url, headers=headers)
    retries = 0
    while retries < 3:
        try:
-                result = urllib2.urlopen(request, timeout=3)
+            result = urllib.request.urlopen(request, timeout=3)
            logging.debug('Fetch from twitch for %s with code %s' % (id, result.getcode()))
            if result.info().get('Content-Encoding') == 'gzip':
                logging.debug('Fetched gzip content')
-                    buf = StringIO(result.read())
+                buf = BytesIO(result.read())
                f = gzip.GzipFile(fileobj=buf)
                return f.read()
            return result.read()
-            except BaseException as e:
+        except Exception as e:
            logging.warning("Fetch exception caught: %s" % e)
            retries += 1
-        return ''
+    return None

-    def extract_userid(self, user_info):
+
+def extract_userid(user_info):
    userlist = user_info.get('users')
    if not userlist:
        logging.info('No such user found.')
-            self.abort(404)
+        abort(404)
    # Get the first id in the list
    userid = userlist[0].get('_id')
    username = userlist[0].get('display_name')
@ -148,15 +127,16 @@ class RSSVoDServer(webapp2.RequestHandler):
        return username, userid
    else:
        logging.warning('Userid is not found in %s' % user_info)
-            self.abort(404)
+        abort(404)

-    def construct_rss(self, channel_name, vods_info, display_name, add_live=True):
+
+def construct_rss(channel_name, vods_info, display_name, add_live=True):
    feed = Feed()

    # Set the feed/channel level properties
    feed.feed["title"] = "%s's Twitch video RSS" % display_name
    feed.feed["link"] = "https://twitchrss.appspot.com/"
-        feed.feed["author"] = "Twitch RSS Gen"
+    feed.feed["author"] = "Twitch RSS Generated"
    feed.feed["description"] = "The RSS Feed of %s's videos on Twitch" % display_name
    feed.feed["ttl"] = '10'

@ -189,17 +169,11 @@ class RSSVoDServer(webapp2.RequestHandler):
                feed.items.append(item)
    except KeyError as e:
        logging.warning('Issue with json: %s\nException: %s' % (vods_info, e))
-            self.abort(404)
+        abort(404)

    return feed.format_rss2_string()

-class RSSVoDServerOnlyVoD(RSSVoDServer):
-    def get(self, channel):
-	self._get_inner(channel, add_live=False)

-
-app = webapp2.WSGIApplication([
-    Route('/', MainPage),
-    Route('/vod/<channel:[a-zA-Z0-9_]{2,25}>', RSSVoDServer),
-    Route('/vodonly/<channel:[a-zA-Z0-9_]{2,25}>', RSSVoDServerOnlyVoD)
-], debug=False)
+# For debug
+if __name__ == "__main__":
+    app.run(host='127.0.0.1', port=8080, debug=True)
--- a/TwitchRSS/vendor.py
+++ b/TwitchRSS/vendor.py
@ -1,71 +0,0 @@
-#
-# Copyright 2014 Jon Wayne Parrott, [proppy], Michael R. Bernstein
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Notes:
-# - Imported from https://github.com/jonparrott/Darth-Vendor/.
-# - Added license header.
-# - Renamed `darth.vendor` to `vendor.add` to match upcoming SDK interface.
-# - Renamed `position` param to `index` to match upcoming SDK interface.
-# - Removed funny arworks docstring.
-
-import site
-import os.path
-import sys
-
-
-def add(folder, index=1):
-  """
-  Adds the given folder to the python path. Supports namespaced packages.
-  By default, packages in the given folder take precedence over site-packages
-  and any previous path manipulations.
-
-  Args:
-    folder: Path to the folder containing packages, relative to ``os.getcwd()``
-    position: Where in ``sys.path`` to insert the vendor packages. By default
-      this is set to 1. It is inadvisable to set it to 0 as it will override
-      any modules in the current working directory.
-  """
-
-  # Check if the path contains a virtualenv.
-  site_dir = os.path.join(folder, 'lib', 'python' + sys.version[:3], 'site-packages')
-  if os.path.exists(site_dir):
-    folder = site_dir
-  # Otherwise it's just a normal path, make it absolute.
-  else:
-    folder = os.path.join(os.path.dirname(__file__), folder)
-
-  # Use site.addsitedir() because it appropriately reads .pth
-  # files for namespaced packages. Unfortunately, there's not an
-  # option to choose where addsitedir() puts its paths in sys.path
-  # so we have to do a little bit of magic to make it play along.
-
-  # We're going to grab the current sys.path and split it up into
-  # the first entry and then the rest. Essentially turning
-  #   ['.', '/site-packages/x', 'site-packages/y']
-  # into
-  #   ['.'] and ['/site-packages/x', 'site-packages/y']
-  # The reason for this is we want '.' to remain at the top of the
-  # list but we want our vendor files to override everything else.
-  sys.path, remainder = sys.path[:1], sys.path[1:]
-
-  # Now we call addsitedir which will append our vendor directories
-  # to sys.path (which was truncated by the last step.)
-  site.addsitedir(folder)
-
-  # Finally, we'll add the paths we removed back.
-  # The final product is something like this:
-  #   ['.', '/vendor-folder', /site-packages/x', 'site-packages/y']
-  sys.path.extend(remainder)