#!/usr/bin/env python # fetch an RSS feed with enclosures, submit any torrents found # to the Transmission bittorrent client. import sys, os, re, socket, httplib, urllib, time, pprint import feedparser class EnclosureFetcher: def __init__(self, dest_dir='', opts={}): self.dest_dir = dest_dir self.db_file = self.path('.latest.db') self.url = None self.log_started = False try: db = open(self.db_file).read() except (IOError, OSError): db = '{}' try: self.db = eval(db, {}, {}) except Exception, e: self.log(repr(e)) sys.exit(1) for opt, default in ( ('verbose', 0), ('force', False), ('dry_run', False) ): setattr(self, opt, opts.get(opt, default)) def __del__(self): self.close() def close(self): open(self.db_file, 'w').write(repr(self.db).replace('), ', '),\n ') + '\n') def log(self, msg, level=0): if level > self.verbose: return if not self.log_started: print time.asctime() if self.url: print '> %s' % self.url self.log_started = True if type(msg) in (str, unicode): print msg else: pprint.pprint(msg) def path(self, *suffixes): return os.path.join(self.dest_dir, *suffixes) def fetch(self, url, patterns=[]): self.latest_updated = self.db.setdefault(url, (0,)) self.url = url feed = feedparser.parse(url) self.log(feed, 4) if not hasattr(feed, 'status'): return self.log('! feed object has no status attribute: %r' % feed) if feed.status == 301: # permanently redirected self.log('* Redirect: %s\n ==> %s' % (url, feed.href)) elif feed.status == 410: # gone self.log('! Gone: %s' % url) if url in self.db: del self.db[url] return elif feed.status != 200: self.log('* Status %d: %s' % (feed.status, url)) self.new_latest = self.latest_updated if patterns: entries = [e for e in feed.entries for p in patterns if re.search(p, e.title)] else: entries = feed.entries for entry in entries: self.get_entry(entry) self.db[url] = self.new_latest self.url = None def get_entry(self, entry): self.log('> Entry %s' % entry.title, 1) self.log(entry, 3) if not self.force and entry.updated_parsed <= self.latest_updated: return self.log('* Old entry, already looked at earlier', 1) if self.new_latest < entry.updated_parsed: self.new_latest = entry.updated_parsed self.log('* Updating latest date to %s' % time.asctime(entry.updated_parsed), 1) for enclosure in [ e for e in entry.get('enclosures', []) if 'torrent' in e.type ]: self.submit_to_transmission(enclosure) def submit_to_transmission(self, enclosure): """Submit the enclosure to the Transmission RPC interface""" conn = httplib.HTTPConnection('localhost:9091') # the href must not be URL-encoded, or Transmission will barf url = '/transmission/rpc?method=torrent-add&filename=' + enclosure.href headers = {'User-Agent': 'fetcher/0.2 (+http://drbeat.li/py/)'} self.log('* Submitting "%s" to Transmission' % enclosure.href) self.log('* Url: %s' % url, 1) if self.dry_run: return while True: try: self.log('* Headers: %r' % headers, 2) conn.request('GET', url, headers=headers) response = conn.getresponse() self.log(response.getheaders(), 3) reply = response.read() self.log(reply, 3) except (httplib.HTTPException, socket.error), e: return self.log('! during submission: %s' % e) if response.status == 200: # ok break elif response.status == 409: # Transmission's CSRF avoidance h = 'X-Transmission-Session-Id' headers[h] = response.getheader(h) else: # error return self.log('! %d %s\n%s\n\n%s' % ( response.status, response.reason, '\n'.join(['%s: %s' % hv for hv in response.getheaders()]), reply )) try: reply = eval(reply, {}, {}) except Exception, e: self.log(reply) reply = {'result': 'exception: %s' % e} if reply['result'] != 'success': self.log(reply) def fetch(dest_dir, *feeds, **opts): f = EnclosureFetcher(dest_dir, opts) for feed in feeds: if type(feed) is tuple: f.fetch(feed[0], feed[1]) # URL, seq of patterns else: f.fetch(feed) # lone URL f.close() if __name__ == '__main__': fetch(os.path.join(os.path.expanduser('~'), 'Downloads', '1torrents'), ('http://ezrss.it/feed/', ( r'(?i)Heroes 4x.+720p', r'(?i)Numb3rs 6x.+2HD', r'(?i)Castle 2009.+2HD', )), verbose=sys.argv.count('-v'), force=('-f' in sys.argv), dry_run=('-d' in sys.argv) )