#!/usr/bin/env python # -*- coding: utf-8 -*- # Version: 2.01 # Changelog: # - Use multiple threads to optimize speed # Version: 2.0 # Changelog: # - Fixed ThePirateBay search engine # - Fixed Meganova search engine # - Fixed Mininova search engine # Version: 1.9 # Changelog: # - Various fixes # Version: 1.8 # Changelog: # - Fixed links from isohunt # Version: 1.7 # Changelog: # - merged with qbittorrent branch (code cleanup, indentation mistakes) # - separate standalone and slave mode # - added btjunkie # - added meganova # - added multithreaded mode # Version: 1.6 # Changelog: # - size is now always returned in bytes # - seeders/leechers are now always returned as integers # - cleaned up code # - results are now displayed in real time # - fixed piratebay, torrentreactor # Author: # Fabien Devaux # Contributors: # Christophe Dumez (qbittorrent integration) # Thanks to gab #gcu @ irc.freenode.net (multipage support on PirateBay) # Thanks to Elias (torrentreactor and isohunt search engines) # # Licence: BSD import sys import urllib import sgmllib from xml.dom import minidom import re import os import cgi import traceback import threading STANDALONE = True THREADED = True if os.environ.has_key('QBITTORRENT'): STANDALONE = False best_ratios = [] def prettyPrinter(dictionnary): print "%(link)s|%(name)s|%(size)s|%(seeds)s|%(leech)s|%(engine_url)s"%dictionnary if STANDALONE: def termPrettyPrinter(dictionnary): if isinstance( dictionnary['size'], int): dictionnary['size'] = bytesToHuman(dictionnary['size']) try: print "%(seeds)5s/%(leech)5s | %(size)10s | %(name)s"%dictionnary except (UnicodeDecodeError, UnicodeEncodeError): print "%(seeds)5s/%(leech)5s | %(size)10s | "%dictionnary try: print "wget '%s'"%dictionnary['link'].replace("'","\\'") except: pass dictionnary['seeds'] = int( dictionnary['seeds'] ) or 0.00000001 dictionnary['leech'] = int( dictionnary['leech'] ) or 0.00000001 best_ratios.append(dictionnary) globals()['prettyPrinter'] = termPrettyPrinter def bytesToHuman(filesize): """ Convert float (size in bytes) to readable string """ decimators = ('k','M','G','T') unit = '' for n in range(len(decimators)): if filesize > 1100.0: filesize /= 1024.0 unit = decimators[n] return '%.1f%sB'%(filesize, unit) def anySizeToBytes(size_string): """ Convert a string like '1 KB' to '1024' (bytes) """ # separate integer from unit try: size, unit = size_string.split() except (ValueError, TypeError): try: size = size_string.strip() unit = ''.join([c for c in size if c.isalpha()]) size = size[:-len(unit)] except(ValueError, TypeError): return -1 size = float(size) short_unit = unit.upper()[0] # convert units_dict = { 'T': 40, 'G': 30, 'M': 20, 'K': 10 } if units_dict.has_key( short_unit ): size = size * 2**units_dict[short_unit] return int(size) ################################################################################ # Every engine should have a "search" method taking # a space-free string as parameter (ex. "family+guy") # it should call prettyPrinter() with a dict as parameter # see above for dict keys # As a convention, try to list results by decrasing number of seeds or similar ################################################################################ class PirateBay(object): url = 'http://thepiratebay.org' def __init__(self): self.results = [] self.parser = self.SimpleSGMLParser(self.results, self.url) class SimpleSGMLParser(sgmllib.SGMLParser): def __init__(self, results, url, *args): sgmllib.SGMLParser.__init__(self) self.td_counter = None self.current_item = None self.results = results self.url = url self.code = 0 def start_a(self, attr): params = dict(attr) if params['href'].startswith('/browse'): self.current_item = {} self.td_counter = 0 elif params['href'].startswith('/tor'): self.code = params['href'].split('/')[2] elif params['href'].startswith('http://torrents.thepiratebay.org/%s'%self.code): self.current_item['link']=params['href'].strip() self.td_counter = self.td_counter+1 def handle_data(self, data): if self.td_counter == 1: if not self.current_item.has_key('name'): self.current_item['name'] = '' self.current_item['name']+= data.strip() if self.td_counter == 5: if not self.current_item.has_key('size'): self.current_item['size'] = '' self.current_item['size']+= data.strip() elif self.td_counter == 6: if not self.current_item.has_key('seeds'): self.current_item['seeds'] = '' self.current_item['seeds']+= data.strip() elif self.td_counter == 7: if not self.current_item.has_key('leech'): self.current_item['leech'] = '' self.current_item['leech']+= data.strip() def start_td(self,attr): if isinstance(self.td_counter,int): self.td_counter += 1 if self.td_counter > 7: self.td_counter = None # Display item if self.current_item: self.current_item['engine_url'] = self.url self.current_item['size']= anySizeToBytes(self.current_item['size']) if not self.current_item['seeds'].isdigit(): self.current_item['seeds'] = 0 if not self.current_item['leech'].isdigit(): self.current_item['leech'] = 0 prettyPrinter(self.current_item) self.results.append('a') def search(self, what): ret = [] i = 0 order = 'se' while True: results = [] parser = self.SimpleSGMLParser(results, self.url) dat = urllib.urlopen(self.url+'/search/%s/%u/0/0' % (what, i)).read() parser.feed(dat) parser.close() if len(results) <= 0: break i += 1 class Mininova(object): url = 'http://www.mininova.org' table_items = 'added cat name size seeds leech'.split() def search(self, what): order = 'seeds' # must be one in self.table_items def get_link(lnk): lnk = lnk.getElementsByTagName('a').item(0) return self.url+lnk.attributes.get('href').value def get_text(txt): if txt.nodeType == txt.TEXT_NODE: return txt.toxml() else: return ''.join([ get_text(n) for n in txt.childNodes]) dat = urllib.urlopen(self.url+'/search/%s/seeds'%(what,)).read().decode('utf-8', 'replace') dat = re.sub("') torrent_re = re.compile('(?s)href="(?P.*?do=download[^"]+).*?' 'class="BlckUnd">(?P.*?).*?' '>(?P\d+MB).*?' '>(?P\d+).*?' '>(?P\d+)') for match in section_re.finditer(dat): txt = match.group(0) m = torrent_re.search(txt) if m: torrent_infos = m.groupdict() torrent_infos['name'] = re.sub('', '', torrent_infos['name']) torrent_infos['engine_url'] = self.url torrent_infos['size'] = anySizeToBytes(torrent_infos['size']) torrent_infos['link'] = self.url+torrent_infos['link'] prettyPrinter(torrent_infos) class MegaNova(object): url = 'http://www.meganova.org' def search(self, what): dat = urllib.urlopen(self.url+'/find-seeds/%s.html'%what).read().decode('utf8', 'replace') print 'url is ' + self.url+'/find-seeds/%s.html'%what # I know it's not very readable, but the SGML parser feels in pain section_re = re.compile('(?s).*?/torrent/.*?)".*?' '(?P.*?).*?' '>(?P[0-9.]+\s+.B).*?' '>(?P\d+)<.*?' '>(?P\d+)<') for match in section_re.finditer(dat): txt = match.group(0) m = torrent_re.search(txt) if m: torrent_infos = m.groupdict() torrent_infos['engine_url'] = self.url torrent_infos['size'] = anySizeToBytes(torrent_infos['size']) torrent_infos['link'] = self.url+torrent_infos['link'] prettyPrinter(torrent_infos) class Reactor(object): url = 'http://tr.searching.com' class SimpleSGMLParser(sgmllib.SGMLParser): def __init__(self, results, url, *args): sgmllib.SGMLParser.__init__(self) self.td_counter = None self.current_item = None self.results = results self.id = None self.url = url def start_a(self, attr): params = dict(attr) if params['href'].startswith('view.php'): self.current_item = {} self.td_counter = 0 # extract the torrent id #I save it in a global variable for after create the link string equal = params['href'].find("=") self.id = str(int(params['href'][equal+1:])) def handle_data(self, data): if self.td_counter == 0: if not self.current_item.has_key('name'): self.current_item['name'] = '' self.current_item['name']+= data.strip() if self.td_counter == 1: if not self.current_item.has_key('size'): self.current_item['size'] = '' self.current_item['size']+= data.strip() elif self.td_counter == 2: if not self.current_item.has_key('seeds'): self.current_item['seeds'] = '' self.current_item['seeds']+= data.strip() elif self.td_counter == 3: if not self.current_item.has_key('leech'): self.current_item['leech'] = '' self.current_item['leech']+= data.strip() def start_td(self,attr): if isinstance(self.td_counter,int): self.td_counter += 1 if self.td_counter > 7: self.td_counter = None # add item to results if self.current_item: self.current_item['link']='http://download.torrentreactor.net/download.php?name=%s&id=%s'%(cgi.escape(self.current_item['name']),self.id) self.current_item['engine_url'] = self.url self.current_item['size']= anySizeToBytes(self.current_item['size']) if not self.current_item['seeds'].isdigit(): self.current_item['seeds'] = 0 if not self.current_item['leech'].isdigit(): self.current_item['leech'] = 0 prettyPrinter(self.current_item) self.has_results = True self.results.append('a') def __init__(self): self.results = [] self.parser = self.SimpleSGMLParser(self.results, self.url) def search(self, what): i = 0 while True: results = [] parser = self.SimpleSGMLParser(results, self.url) dat = urllib.urlopen(self.url+'/search.php?search=&words=%s&skip=%s'%(what,(i*50))).read().decode('utf-8', 'replace') parser.feed(dat) parser.close() if len(results) <= 0: break i += 1 class Isohunt(object): url = 'http://isohunt.com' class SimpleSGMLParser(sgmllib.SGMLParser): def __init__(self, results, url, *args): sgmllib.SGMLParser.__init__(self) self.td_counter = None self.current_item = None self.results = results self.url = url def start_tr(self, attr): params = dict(attr) if 'onclick' in params: Durl='http://isohunt.com/download' self.current_item = {} self.td_counter = 0 try: self.current_item['link'] = '%s/%s'%(Durl, params['onclick'].split('/')[2]) except IndexError: self.current_item['link'] = None def handle_data(self, data): if self.td_counter == 3: if not self.current_item.has_key('name'): self.current_item['name'] = '' self.current_item['name']+= data.strip() if self.td_counter == 4: if not self.current_item.has_key('size'): self.current_item['size'] = '' self.current_item['size']+= data.strip() if self.td_counter == 5: if not self.current_item.has_key('seeds'): self.current_item['seeds'] = '' self.current_item['seeds']+= data.strip() if self.td_counter == 6: if not self.current_item.has_key('leech'): self.current_item['leech'] = '' self.current_item['leech']+= data.strip() def start_td(self,attr): if isinstance(self.td_counter,int): self.td_counter += 1 if self.td_counter > 7: self.td_counter = None # add item to results if self.current_item: self.current_item['engine_url'] = self.url self.current_item['size']= anySizeToBytes(self.current_item['size']) if not self.current_item.has_key('seeds') or not self.current_item['seeds'].isdigit(): self.current_item['seeds'] = 0 if not self.current_item.has_key('leech') or not self.current_item['leech'].isdigit(): self.current_item['leech'] = 0 if self.current_item['link'] is not None: prettyPrinter(self.current_item) self.results.append('a') def __init__(self): self.results = [] self.parser = self.SimpleSGMLParser(self.results, self.url) def search(self, what): i = 1 while True: results = [] parser = self.SimpleSGMLParser(results, self.url) dat = urllib.urlopen(self.url+'/torrents.php?ihq=%s&ihp=%s'%(what,i)).read().decode('utf-8', 'replace') parser.feed(dat) parser.close() if len(results) <= 0: break i += 1 class EngineLauncher(threading.Thread): def __init__(self, engine, what): threading.Thread.__init__(self) self.engine = engine self.what = what def run(self): self.engine.search(self.what) if __name__ == '__main__': available_engines_list = BtJunkie, MegaNova, Mininova, PirateBay, Reactor, Isohunt if len(sys.argv) < 2: raise SystemExit('./nova.py [all|engine1[,engine2]*] \navailable engines: %s'% (','.join(e.__name__ for e in available_engines_list))) engines_list = [e.lower() for e in sys.argv[1].strip().split(',')] if 'all' in engines_list: engines_list = [e.__name__.lower() for e in available_engines_list] selected_engines = set(e for e in available_engines_list if e.__name__.lower() in engines_list) if not selected_engines: selected_engines = [BtJunkie] what = '+'.join(sys.argv[1:]) else: what = '+'.join(sys.argv[2:]) threads = [] for engine in selected_engines: try: if THREADED: l = EngineLauncher( engine(), what ) threads.append(l) l.start() else: engine().search(what) except: if STANDALONE: traceback.print_exc() if THREADED: for t in threads: t.join() best_ratios.sort(lambda a,b : cmp(a['seeds']-a['leech'], b['seeds']-b['leech'])) max_results = 10 print "########## TOP %d RATIOS ##########"%max_results for br in best_ratios: if br['seeds'] > 1: # avoid those with 0 leech to be max rated prettyPrinter(br) max_results -= 1 if not max_results: break