Initial commit

This commit is contained in:
Laszlo Zeke 2015-10-10 00:43:29 +02:00
parent 3f88b5dd52
commit 66c83f82d8
8 changed files with 725 additions and 1 deletions

2
.gitignore vendored
View File

@ -13,7 +13,7 @@ develop-eggs/
dist/ dist/
downloads/ downloads/
eggs/ eggs/
lib/ # lib/
lib64/ lib64/
parts/ parts/
sdist/ sdist/

13
LICENSE.md Normal file
View File

@ -0,0 +1,13 @@
Copyright 2015 Laszlo Zeke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

17
README.md Normal file
View File

@ -0,0 +1,17 @@
## Twitch RSS Webapp for Google App Engine
This project is a very small web application for serving RSS feed for broadcasts
in Twitch. It fetches data from [Twitch API](https://github.com/justintv/twitch-api) and caches in Memcache.
The engine is webapp2.
A running version can be tried out at:
https://twitchrss.appspot.com/vod/twitch
### Deployment
See how to deploy on [Google App Engine](https://cloud.google.com/appengine/docs/python/gettingstartedpython27/introduction).
### Other things
The project uses a slightly modified [Feedformatter](https://code.google.com/p/feedformatter/) to support
more tags and time zone in pubDate tag.
### About
The project has been developed by László Zeke.

9
TwitchRSS/app.yaml Normal file
View File

@ -0,0 +1,9 @@
application: twitchrss-app-engine
version: 1
runtime: python27
api_version: 1
threadsafe: true
handlers:
- url: /.*
script: twitchrss.app

View File

@ -0,0 +1,6 @@
"""`appengine_config` gets loaded when starting a new application instance."""
import vendor
# insert `lib` as a site directory so our `main` module can load
# third-party libraries, and override built-ins with newer
# versions.
vendor.add('lib')

View File

@ -0,0 +1,476 @@
# Feedformatter
# Copyright (c) 2008, Luke Maurits <luke@maurits.id.au>
# Copyright (c) 2015, Laszlo Zeke
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
__version__ = "0.4"
from cStringIO import StringIO
# This "staircase" of import attempts is ugly. If there's a nicer way to do
# this, please let me know!
try:
import xml.etree.cElementTree as ET
except ImportError:
try:
import xml.etree.ElementTree as ET
except ImportError:
try:
import cElementTree as ET
except ImportError:
try:
from elementtree import ElementTree as ET
except ImportError:
raise ImportError("Could not import any form of element tree!")
try:
from xml.dom.ext import PrettyPrint
from xml.dom.ext.reader.Sax import FromXml
feedformatterCanPrettyPrint = True
except ImportError:
feedformatterCanPrettyPrint = False
from time import time, strftime, localtime, mktime, struct_time, timezone
# RSS 1.0 Functions ----------
_rss1_channel_mappings = (
(("title",), "title"),
(("link", "url"), "link"),
(("description", "desc", "summary"), "description")
)
_rss1_item_mappings = (
(("title",), "title"),
(("link", "url"), "link"),
(("description", "desc", "summary"), "description")
)
# RSS 2.0 Functions ----------
_rss2_channel_mappings = (
(("title",), "title"),
(("link", "url"), "link"),
(("description", "desc", "summary"), "description"),
(("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda(x): _format_datetime("rss2",x)),
(("category",), "category"),
(("language",), "language"),
(("copyright",), "copyright"),
(("webMaster",), "webmaster"),
(("image",), "image"),
(("skipHours",), "skipHours"),
(("skipDays",), "skipDays")
)
_rss2_item_mappings = (
(("title",), "title"),
(("link", "url"), "link"),
(("description", "desc", "summary"), "description"),
(("guid", "id"), "guid"),
(("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda(x): _format_datetime("rss2",x)),
(("category",), "category"),
(("author",), "author", lambda(x): _rssify_author(x)),
(("ttl",), "ttl")
)
# Atom 1.0 ----------
_atom_feed_mappings = (
(("title",), "title"),
(("link", "url"), "id"),
(("description", "desc", "summary"), "subtitle"),
(("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda(x): _format_datetime("atom",x)),
(("category",), "category"),
(("author",), "author", lambda(x): _atomise_author(x))
)
_atom_item_mappings = (
(("title",), "title"),
(("link", "url"), "id"),
(("link", "url"), "link", lambda(x): _atomise_link(x)),
(("description", "desc", "summary"), "summary"),
(("pubDate", "pubdate", "date", "published", "updated"), "pubDate", lambda(x): _format_datetime("atom",x)),
(("category",), "category"),
(("author",), "author", lambda(x): _atomise_author(x))
)
def _get_tz_offset():
"""
Return the current timezone's offset from GMT as a string
in the format +/-HH:MM, as required by RFC3339.
"""
seconds = -1*timezone # Python gets the offset backward! >:(
minutes = seconds/60
hours = minutes/60
minutes = minutes - hours*60
if seconds < 0:
return "-%02d:%d" % (hours, minutes)
else:
return "+%02d:%d" % (hours, minutes)
def _convert_datetime(time):
"""
Convert time, which may be one of a whole lot of things, into a
standard 9 part time tuple.
"""
if (type(time) is tuple and len(time) ==9) or type(time) is struct_time:
# Already done!
return time
elif type(time) is int or type(time) is float:
# Assume this is a seconds-since-epoch time
return localtime(time)
elif type(time) is str:
if time.isalnum():
# String is alphanumeric - a time stamp?
try:
return strptime(time, "%a, %d %b %Y %H:%M:%S %Z")
except ValueError:
raise Exception("Unrecongised time format!")
else:
# Maybe this is a string of an epoch time?
try:
return localtime(float(time))
except ValueError:
# Guess not.
raise Exception("Unrecongised time format!")
else:
# No idea what this is. Give up!
raise Exception("Unrecongised time format!")
def _format_datetime(feed_type, time):
"""
Convert some representation of a date and time into a string which can be
used in a validly formatted feed of type feed_type. Raise an
Exception if this cannot be done.
"""
# First, convert time into a time structure
time = _convert_datetime(time)
# Then, convert that to the appropriate string
if feed_type is "rss2":
return strftime("%a, %d %b %Y %H:%M:%S UTC", time)
elif feed_type is "atom":
return strftime("%Y-%m-%dT%H:%M:%S", time) + _get_tz_offset();
def _atomise_link(link):
if type(link) is dict:
return dict
else:
return {"href" : link}
def _atomise_author(author):
"""
Convert author from whatever it is to a dictionary representing an
atom:Person construct.
"""
if type(author) is dict:
return author
else:
if author.startswith("http://") or author.startswith("www"):
# This is clearly a URI
return {"uri" : author}
elif "@" in author and "." in author:
# This is most probably an email address
return {"email" : author}
else:
# Must be a name
return {"name" : author}
def _rssify_author(author):
"""
Convert author from whatever it is to a plain old email string for
use in an RSS 2.0 feed.
"""
if type(author) is dict:
try:
return author["email"]
except KeyError:
return None
else:
if "@" in author and "." in author:
# Probably an email address
return author
else:
return None
def _add_subelems(root_element, mappings, dictionary):
"""
Add one subelement to root_element for each key in dictionary
which is supported by a mapping in mappings
"""
for mapping in mappings:
for key in mapping[0]:
if key in dictionary:
if len(mapping) == 2:
value = dictionary[key]
elif len(mapping) == 3:
value = mapping[2](dictionary[key])
_add_subelem(root_element, mapping[1], value)
break
def _add_subelem(root_element, name, value):
if value is None:
return
if type(value) is dict:
### HORRIBLE HACK!
if name=="link":
ET.SubElement(root_element, name, href=value["href"])
else:
subElem = ET.SubElement(root_element, name)
for key in value:
_add_subelem(subElem, key, value[key])
else:
ET.SubElement(root_element, name).text = value
def _stringify(tree, pretty):
"""
Turn an ElementTree into a string, optionally with line breaks and indentation.
"""
if pretty and feedformatterCanPrettyPrint:
string = StringIO()
doc = FromXml(ET.tostring(tree))
PrettyPrint(doc,string,indent=" ")
return string.getvalue()
else:
return ET.tostring(tree)
class Feed:
### INTERNAL METHODS ------------------------------
def __init__(self, feed=None, items=None):
if feed:
self.feed = feed
else:
self.feed = {}
if items:
self.items = items
else:
self.items = []
self.entries = self.items
### RSS 1.0 STUFF ------------------------------
def validate_rss1(self):
"""Raise an InvalidFeedException if the feed cannot be validly
formatted as RSS 1.0."""
# <channel> must contain "title"
if "title" not in self.feed:
raise InvalidFeedException("The channel element of an "
"RSS 1.0 feed must contain a title subelement")
# <channel> must contain "link"
if "link" not in self.feed:
raise InvalidFeedException("The channel element of an "
" RSS 1.0 feeds must contain a link subelement")
# <channel> must contain "description"
if "description" not in self.feed:
raise InvalidFeedException("The channel element of an "
"RSS 1.0 feeds must contain a description subelement")
# Each <item> must contain "title" and "link"
for item in self.items:
if "title" not in item:
raise InvalidFeedException("Each item element in an RSS 1.0 "
"feed must contain a title subelement")
if "link" not in item:
raise InvalidFeedException("Each item element in an RSS 1.0 "
"feed must contain a link subelement")
def format_rss1_string(self, validate=True, pretty=False):
"""Format the feed as RSS 1.0 and return the result as a string."""
if validate:
self.validate_rss1()
RSS1root = ET.Element( 'rdf:RDF',
{"xmlns:rdf" : "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"xmlns" : "http://purl.org/rss/1.0/"} )
RSS1channel = ET.SubElement(RSS1root, 'channel',
{"rdf:about" : self.feed["link"]})
_add_subelems(RSS1channel, _rss1_channel_mappings, self.feed)
RSS1contents = ET.SubElement(RSS1channel, 'items')
RSS1contents_seq = ET.SubElement (RSS1contents, 'rdf:Seq')
for item in self.items:
ET.SubElement(RSS1contents_seq, 'rdf:li', resource=item["link"])
for item in self.items:
RSS1item = ET.SubElement (RSS1root, 'item',
{"rdf:about" : item["link"]})
_add_subelems(RSS1item, _rss1_item_mappings, item)
return _stringify(RSS1root, pretty=pretty)
def format_rss1_file(self, filename, validate=True, pretty=False):
"""Format the feed as RSS 1.0 and save the result to a file."""
string = self.format_rss1_string(validate, pretty)
fp = open(filename, "w")
fp.write(string)
fp.close()
### RSS 2.0 STUFF ------------------------------
def validate_rss2(self):
"""Raise an InvalidFeedException if the feed cannot be validly
formatted as RSS 2.0."""
# <channel> must contain "title"
if "title" not in self.feed:
raise InvalidFeedException("The channel element of an "
"RSS 2.0 feed must contain a title subelement")
# <channel> must contain "link"
if "link" not in self.feed:
raise InvalidFeedException("The channel element of an "
" RSS 2.0 feeds must contain a link subelement")
# <channel> must contain "description"
if "description" not in self.feed:
raise InvalidFeedException("The channel element of an "
"RSS 2.0 feeds must contain a description subelement")
# Each <item> must contain at least "title" OR "description"
for item in self.items:
if not ("title" in item or "description" in item):
raise InvalidFeedException("Each item element in an RSS 2.0 "
"feed must contain at least a title or description subelement")
def format_rss2_string(self, validate=True, pretty=False):
"""Format the feed as RSS 2.0 and return the result as a string."""
if validate:
self.validate_rss2()
RSS2root = ET.Element( 'rss', {'version':'2.0'} )
RSS2channel = ET.SubElement( RSS2root, 'channel' )
_add_subelems(RSS2channel, _rss2_channel_mappings, self.feed)
for item in self.items:
RSS2item = ET.SubElement ( RSS2channel, 'item' )
_add_subelems(RSS2item, _rss2_item_mappings, item)
return _stringify(RSS2root, pretty=pretty)
def format_rss2_file(self, filename, validate=True, pretty=False):
"""Format the feed as RSS 2.0 and save the result to a file."""
string = self.format_rss2_string(validate, pretty)
fp = open(filename, "w")
fp.write(string)
fp.close()
### ATOM STUFF ------------------------------
def validate_atom(self):
"""Raise an InvalidFeedException if the feed cannot be validly
formatted as Atom 1.0."""
# Must have at least one "author" element in "feed" OR at least
# "author" element in each "entry".
if "author" not in self.feed:
for entry in self.entries:
if "author" not in entry:
raise InvalidFeedException("Atom feeds must have either at "
"least one author element in the feed element or at least "
" one author element in each entry element")
def format_atom_string(self, validate=True, pretty=False):
"""Format the feed as Atom 1.0 and return the result as a string."""
if validate:
self.validate_atom()
AtomRoot = ET.Element( 'feed', {"xmlns":"http://www.w3.org/2005/Atom"} )
_add_subelems(AtomRoot, _atom_feed_mappings, self.feed)
for entry in self.entries:
AtomItem = ET.SubElement ( AtomRoot, 'entry' )
_add_subelems(AtomItem, _atom_item_mappings, entry)
return _stringify(AtomRoot, pretty=pretty)
def format_atom_file(self, filename, validate=True, pretty=False):
"""Format the feed as Atom 1.0 and save the result to a file."""
string = self.format_atom_string(validate, pretty)
fp = open(filename, "w")
fp.write(string)
fp.close()
class InvalidFeedException(Exception):
pass
### FACTORY FUNCTIONS ------------------------------
def fromUFP(ufp):
return Feed(ufp["feed"], ufp["items"])
### MAIN ------------------------------
def main():
feed = Feed()
feed.feed["title"] = "Test Feed"
feed.feed["link"] = "http://code.google.com/p/feedformatter/"
feed.feed["author"] = "Luke Maurits"
feed.feed["description"] = "A simple test feed for the feedformatter project"
item = {}
item["title"] = "Test item"
item["link"] = "http://www.python.org"
item["description"] = "Python programming language"
item["guid"] = "1234567890"
feed.items.append(item)
print("---- RSS 1.0 ----")
print feed.format_rss1_string(pretty=True)
print("---- RSS 2.0 ----")
print feed.format_rss2_string(pretty=True)
print("---- Atom 1.0 ----")
print feed.format_atom_string(pretty=True)
if __name__ == "__main__":
main()

132
TwitchRSS/twitchrss.py Normal file
View File

@ -0,0 +1,132 @@
#
# Copyright 2015 Laszlo Zeke
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import webapp2
from webapp2 import Route
import urllib2
import json
import datetime
import logging
from feedformatter import Feed
from google.appengine.api import memcache
class MainPage(webapp2.RequestHandler):
def get(self):
self.response.headers['Content-Type'] = 'text/html'
html_resp = """
<html>
<head>
<title>Twitch stream RSS generator</title>
</head>
<body>
<p style="font-family: helvetica; font-size:20pt; padding: 20px;">
Twitch stream RSS generator
</p>
<p style="font-family: helvetica; font-size:12pt; padding: 20px;">
You can get RSS of broadcasts by subscribing to https://twitchrss.appspot.com/vod/&lt;channel name&gt;<br/>
For example: <a href="https://twitchrss.appspot.com/vod/riotgames">https://twitchrss.appspot.com/vod/riotgames</a><br/><br/>
Not endorsed by Twitch.tv, just a fun project.<br/>
<a href="https://github.com/lzeke0/TwitchRSS">Project home</a>
</p>
</body>
</html>
"""
self.response.write(html_resp)
class RSSVoDServer(webapp2.RequestHandler):
def get(self, channel):
channel_json = self.lookup_cache(channel)
if channel_json == '':
channel_json = self.fetch_json(channel)
if channel_json == '':
self.abort(404)
else:
self.store_cache(channel, channel_json)
decoded_json = json.loads(channel_json)
rss_data = self.construct_rss(channel, decoded_json)
self.response.headers['Content-Type'] = 'application/xhtml+xml'
self.response.write(rss_data)
@staticmethod
def lookup_cache(channel_name):
cached_data = memcache.get('vodcache:%s' % channel_name)
if cached_data is not None:
logging.debug('Cache hit for %s' % channel_name)
return cached_data
else:
logging.debug('Cache miss for %s' % channel_name)
return ''
@staticmethod
def store_cache(channel_name, data):
try:
logging.debug('Cached data for %s' % channel_name)
memcache.set('vodcache:%s' % channel_name, data, 120)
except:
return
@staticmethod
def fetch_json(channel):
url = 'https://api.twitch.tv/kraken/channels/%s/videos?broadcasts=true' % channel
request = urllib2.Request(url,headers={'Accept':'application/vnd.twitchtv.v3+json'})
try:
result = urllib2.urlopen(request)
logging.debug('Fetch from twitch for %s with code %s' % (channel, result.getcode()))
return result.read()
except urllib2.URLError, e:
return ''
def construct_rss(self, channel_name, vods_info):
feed = Feed()
# Set the feed/channel level properties
feed.feed["title"] = "%s's Twitch video RSS" % channel_name
feed.feed["link"] = "https://twitchrss.appspot.com/"
feed.feed["author"] = "Twitch RSS Gen"
feed.feed["description"] = "The RSS Feed of %s's videos on Twitch" % channel_name
# Create an item
try:
if vods_info['videos'] is not None:
for vod in vods_info['videos']:
item = {}
item["title"] = vod['title']
link = ""
if vod["status"] == "recording":
link = "http://www.twitch.tv/%s" % channel_name
else:
link = vod['url']
item["link"] = link
item["description"] = "<a href=\"%s\"><img src=\"%s\" /></a>" % (link, vod['preview'])
d = datetime.datetime.strptime(vod['recorded_at'], '%Y-%m-%dT%H:%M:%SZ')
item["pubDate"] = d.timetuple()
item["guid"] = vod['_id']
if vod["status"] == "recording": # To show a different news item when live is over
item["guid"] += "_live"
item["ttl"] = '10'
feed.items.append(item)
except KeyError:
self.abort(404)
return feed.format_rss2_string()
app = webapp2.WSGIApplication([
Route('/', MainPage),
Route('/vod/<channel:[a-zA-Z0-9_]{4,25}>', RSSVoDServer)
], debug=False)

71
TwitchRSS/vendor.py Normal file
View File

@ -0,0 +1,71 @@
#
# Copyright 2014 Jon Wayne Parrott, [proppy], Michael R. Bernstein
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Notes:
# - Imported from https://github.com/jonparrott/Darth-Vendor/.
# - Added license header.
# - Renamed `darth.vendor` to `vendor.add` to match upcoming SDK interface.
# - Renamed `position` param to `index` to match upcoming SDK interface.
# - Removed funny arworks docstring.
import site
import os.path
import sys
def add(folder, index=1):
"""
Adds the given folder to the python path. Supports namespaced packages.
By default, packages in the given folder take precedence over site-packages
and any previous path manipulations.
Args:
folder: Path to the folder containing packages, relative to ``os.getcwd()``
position: Where in ``sys.path`` to insert the vendor packages. By default
this is set to 1. It is inadvisable to set it to 0 as it will override
any modules in the current working directory.
"""
# Check if the path contains a virtualenv.
site_dir = os.path.join(folder, 'lib', 'python' + sys.version[:3], 'site-packages')
if os.path.exists(site_dir):
folder = site_dir
# Otherwise it's just a normal path, make it absolute.
else:
folder = os.path.join(os.path.dirname(__file__), folder)
# Use site.addsitedir() because it appropriately reads .pth
# files for namespaced packages. Unfortunately, there's not an
# option to choose where addsitedir() puts its paths in sys.path
# so we have to do a little bit of magic to make it play along.
# We're going to grab the current sys.path and split it up into
# the first entry and then the rest. Essentially turning
# ['.', '/site-packages/x', 'site-packages/y']
# into
# ['.'] and ['/site-packages/x', 'site-packages/y']
# The reason for this is we want '.' to remain at the top of the
# list but we want our vendor files to override everything else.
sys.path, remainder = sys.path[:1], sys.path[1:]
# Now we call addsitedir which will append our vendor directories
# to sys.path (which was truncated by the last step.)
site.addsitedir(folder)
# Finally, we'll add the paths we removed back.
# The final product is something like this:
# ['.', '/vendor-folder', /site-packages/x', 'site-packages/y']
sys.path.extend(remainder)