mirror of
https://github.com/qbittorrent/qBittorrent.git
synced 2025-12-30 12:18:05 -06:00
494 lines
15 KiB
Python
Executable File
494 lines
15 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
# Version: 2.01
|
|
# Changelog:
|
|
# - Use multiple threads to optimize speed
|
|
|
|
# Version: 2.0
|
|
# Changelog:
|
|
# - Fixed ThePirateBay search engine
|
|
# - Fixed Meganova search engine
|
|
# - Fixed Mininova search engine
|
|
|
|
# Version: 1.9
|
|
# Changelog:
|
|
# - Various fixes
|
|
|
|
# Version: 1.8
|
|
# Changelog:
|
|
# - Fixed links from isohunt
|
|
|
|
# Version: 1.7
|
|
# Changelog:
|
|
# - merged with qbittorrent branch (code cleanup, indentation mistakes)
|
|
# - separate standalone and slave mode
|
|
# - added btjunkie
|
|
# - added meganova
|
|
# - added multithreaded mode
|
|
|
|
# Version: 1.6
|
|
# Changelog:
|
|
# - size is now always returned in bytes
|
|
# - seeders/leechers are now always returned as integers
|
|
# - cleaned up code
|
|
# - results are now displayed in real time
|
|
# - fixed piratebay, torrentreactor
|
|
|
|
# Author:
|
|
# Fabien Devaux <fab AT gnux DOT info>
|
|
# Contributors:
|
|
# Christophe Dumez <chris@qbittorrent.org> (qbittorrent integration)
|
|
# Thanks to gab #gcu @ irc.freenode.net (multipage support on PirateBay)
|
|
# Thanks to Elias <gekko04@users.sourceforge.net> (torrentreactor and isohunt search engines)
|
|
#
|
|
# Licence: BSD
|
|
|
|
import sys
|
|
import urllib
|
|
import sgmllib
|
|
from xml.dom import minidom
|
|
import re
|
|
import os
|
|
import cgi
|
|
import traceback
|
|
import threading
|
|
|
|
STANDALONE = True
|
|
THREADED = True
|
|
|
|
if os.environ.has_key('QBITTORRENT'):
|
|
STANDALONE = False
|
|
|
|
best_ratios = []
|
|
|
|
def prettyPrinter(dictionnary):
|
|
print "%(link)s|%(name)s|%(size)s|%(seeds)s|%(leech)s|%(engine_url)s"%dictionnary
|
|
|
|
if STANDALONE:
|
|
def termPrettyPrinter(dictionnary):
|
|
if isinstance( dictionnary['size'], int):
|
|
dictionnary['size'] = bytesToHuman(dictionnary['size'])
|
|
try:
|
|
print "%(seeds)5s/%(leech)5s | %(size)10s | %(name)s"%dictionnary
|
|
except (UnicodeDecodeError, UnicodeEncodeError):
|
|
print "%(seeds)5s/%(leech)5s | %(size)10s | <unprintable title>"%dictionnary
|
|
try:
|
|
print "wget '%s'"%dictionnary['link'].replace("'","\\'")
|
|
except:
|
|
pass
|
|
dictionnary['seeds'] = int( dictionnary['seeds'] ) or 0.00000001
|
|
dictionnary['leech'] = int( dictionnary['leech'] ) or 0.00000001
|
|
best_ratios.append(dictionnary)
|
|
|
|
globals()['prettyPrinter'] = termPrettyPrinter
|
|
|
|
def bytesToHuman(filesize):
|
|
"""
|
|
Convert float (size in bytes) to readable string
|
|
"""
|
|
decimators = ('k','M','G','T')
|
|
unit = ''
|
|
for n in range(len(decimators)):
|
|
if filesize > 1100.0:
|
|
filesize /= 1024.0
|
|
unit = decimators[n]
|
|
return '%.1f%sB'%(filesize, unit)
|
|
|
|
def anySizeToBytes(size_string):
|
|
"""
|
|
Convert a string like '1 KB' to '1024' (bytes)
|
|
"""
|
|
# separate integer from unit
|
|
try:
|
|
size, unit = size_string.split()
|
|
except (ValueError, TypeError):
|
|
try:
|
|
size = size_string.strip()
|
|
unit = ''.join([c for c in size if c.isalpha()])
|
|
size = size[:-len(unit)]
|
|
except(ValueError, TypeError):
|
|
return -1
|
|
|
|
size = float(size)
|
|
short_unit = unit.upper()[0]
|
|
|
|
# convert
|
|
units_dict = { 'T': 40, 'G': 30, 'M': 20, 'K': 10 }
|
|
if units_dict.has_key( short_unit ):
|
|
size = size * 2**units_dict[short_unit]
|
|
return int(size)
|
|
|
|
################################################################################
|
|
# Every engine should have a "search" method taking
|
|
# a space-free string as parameter (ex. "family+guy")
|
|
# it should call prettyPrinter() with a dict as parameter
|
|
# see above for dict keys
|
|
# As a convention, try to list results by decrasing number of seeds or similar
|
|
################################################################################
|
|
|
|
class PirateBay(object):
|
|
url = 'http://thepiratebay.org'
|
|
|
|
def __init__(self):
|
|
self.results = []
|
|
self.parser = self.SimpleSGMLParser(self.results, self.url)
|
|
|
|
class SimpleSGMLParser(sgmllib.SGMLParser):
|
|
def __init__(self, results, url, *args):
|
|
sgmllib.SGMLParser.__init__(self)
|
|
self.td_counter = None
|
|
self.current_item = None
|
|
self.results = results
|
|
self.url = url
|
|
self.code = 0
|
|
|
|
def start_a(self, attr):
|
|
params = dict(attr)
|
|
if params['href'].startswith('/browse'):
|
|
self.current_item = {}
|
|
self.td_counter = 0
|
|
elif params['href'].startswith('/tor'):
|
|
self.code = params['href'].split('/')[2]
|
|
elif params['href'].startswith('http://torrents.thepiratebay.org/%s'%self.code):
|
|
self.current_item['link']=params['href'].strip()
|
|
self.td_counter = self.td_counter+1
|
|
|
|
def handle_data(self, data):
|
|
if self.td_counter == 1:
|
|
if not self.current_item.has_key('name'):
|
|
self.current_item['name'] = ''
|
|
self.current_item['name']+= data.strip()
|
|
if self.td_counter == 5:
|
|
if not self.current_item.has_key('size'):
|
|
self.current_item['size'] = ''
|
|
self.current_item['size']+= data.strip()
|
|
elif self.td_counter == 6:
|
|
if not self.current_item.has_key('seeds'):
|
|
self.current_item['seeds'] = ''
|
|
self.current_item['seeds']+= data.strip()
|
|
elif self.td_counter == 7:
|
|
if not self.current_item.has_key('leech'):
|
|
self.current_item['leech'] = ''
|
|
self.current_item['leech']+= data.strip()
|
|
|
|
def start_td(self,attr):
|
|
if isinstance(self.td_counter,int):
|
|
self.td_counter += 1
|
|
if self.td_counter > 7:
|
|
self.td_counter = None
|
|
# Display item
|
|
if self.current_item:
|
|
self.current_item['engine_url'] = self.url
|
|
self.current_item['size']= anySizeToBytes(self.current_item['size'])
|
|
if not self.current_item['seeds'].isdigit():
|
|
self.current_item['seeds'] = 0
|
|
if not self.current_item['leech'].isdigit():
|
|
self.current_item['leech'] = 0
|
|
prettyPrinter(self.current_item)
|
|
self.results.append('a')
|
|
def search(self, what):
|
|
ret = []
|
|
i = 0
|
|
order = 'se'
|
|
while True:
|
|
results = []
|
|
parser = self.SimpleSGMLParser(results, self.url)
|
|
dat = urllib.urlopen(self.url+'/search/%s/%u/0/0' % (what, i)).read()
|
|
parser.feed(dat)
|
|
parser.close()
|
|
if len(results) <= 0:
|
|
break
|
|
i += 1
|
|
|
|
class Mininova(object):
|
|
url = 'http://www.mininova.org'
|
|
table_items = 'added cat name size seeds leech'.split()
|
|
|
|
def search(self, what):
|
|
order = 'seeds' # must be one in self.table_items
|
|
|
|
def get_link(lnk):
|
|
lnk = lnk.getElementsByTagName('a').item(0)
|
|
return self.url+lnk.attributes.get('href').value
|
|
|
|
def get_text(txt):
|
|
if txt.nodeType == txt.TEXT_NODE:
|
|
return txt.toxml()
|
|
else:
|
|
return ''.join([ get_text(n) for n in txt.childNodes])
|
|
dat = urllib.urlopen(self.url+'/search/%s/seeds'%(what,)).read().decode('utf-8', 'replace')
|
|
dat = re.sub("<a href=\"http://www.boardreader.com/index.php.*\"", "<a href=\"plop\"", dat)
|
|
x = minidom.parseString(dat.encode('utf-8', 'replace'))
|
|
table = x.getElementsByTagName('table').item(0)
|
|
if not table: return
|
|
for tr in table.getElementsByTagName('tr'):
|
|
tds = tr.getElementsByTagName('td')
|
|
if tds:
|
|
i = 0
|
|
vals = {}
|
|
for td in tds:
|
|
if self.table_items[i] == 'name':
|
|
vals['link'] = get_link(td).strip()
|
|
vals[self.table_items[i]] = get_text(td).strip()
|
|
i += 1
|
|
vals['engine_url'] = self.url
|
|
vals['size']= anySizeToBytes(vals['size'])
|
|
if not vals['seeds'].isdigit():
|
|
vals['seeds'] = 0
|
|
if not vals['leech'].isdigit():
|
|
vals['leech'] = 0
|
|
prettyPrinter(vals)
|
|
|
|
# TODO: add multipage
|
|
class BtJunkie(object):
|
|
url = 'http://btjunkie.org'
|
|
|
|
def search(self, what):
|
|
dat = urllib.urlopen(self.url+'/search?q=%s'%what).read().decode('utf8', 'replace')
|
|
# I know it's not very readable, but the SGML parser feels in pain
|
|
section_re = re.compile('(?s)href="/torrent\?do=download.*?<tr>')
|
|
torrent_re = re.compile('(?s)href="(?P<link>.*?do=download[^"]+).*?'
|
|
'class="BlckUnd">(?P<name>.*?)</a>.*?'
|
|
'>(?P<size>\d+MB)</font>.*?'
|
|
'>(?P<seeds>\d+)</font>.*?'
|
|
'>(?P<leech>\d+)</font>')
|
|
for match in section_re.finditer(dat):
|
|
txt = match.group(0)
|
|
m = torrent_re.search(txt)
|
|
if m:
|
|
torrent_infos = m.groupdict()
|
|
torrent_infos['name'] = re.sub('</?font.*?>', '', torrent_infos['name'])
|
|
torrent_infos['engine_url'] = self.url
|
|
torrent_infos['size'] = anySizeToBytes(torrent_infos['size'])
|
|
torrent_infos['link'] = self.url+torrent_infos['link']
|
|
prettyPrinter(torrent_infos)
|
|
|
|
class MegaNova(object):
|
|
url = 'http://www.meganova.org'
|
|
|
|
def search(self, what):
|
|
dat = urllib.urlopen(self.url+'/find-seeds/%s.html'%what).read().decode('utf8', 'replace')
|
|
print 'url is ' + self.url+'/find-seeds/%s.html'%what
|
|
# I know it's not very readable, but the SGML parser feels in pain
|
|
|
|
section_re = re.compile('(?s)<td width="6%">.*?</tr')
|
|
torrent_re = re.compile('(?s)href="(?P<link>/torrent/.*?)".*?'
|
|
'<span.*?>(?P<name>.*?)</span>.*?'
|
|
'>(?P<size>[0-9.]+\s+.B).*?'
|
|
'>(?P<seeds>\d+)<.*?'
|
|
'>(?P<leech>\d+)<')
|
|
|
|
for match in section_re.finditer(dat):
|
|
txt = match.group(0)
|
|
m = torrent_re.search(txt)
|
|
if m:
|
|
torrent_infos = m.groupdict()
|
|
torrent_infos['engine_url'] = self.url
|
|
torrent_infos['size'] = anySizeToBytes(torrent_infos['size'])
|
|
torrent_infos['link'] = self.url+torrent_infos['link']
|
|
prettyPrinter(torrent_infos)
|
|
|
|
class Reactor(object):
|
|
url = 'http://tr.searching.com'
|
|
|
|
class SimpleSGMLParser(sgmllib.SGMLParser):
|
|
def __init__(self, results, url, *args):
|
|
sgmllib.SGMLParser.__init__(self)
|
|
self.td_counter = None
|
|
self.current_item = None
|
|
self.results = results
|
|
self.id = None
|
|
self.url = url
|
|
|
|
def start_a(self, attr):
|
|
params = dict(attr)
|
|
if params['href'].startswith('view.php'):
|
|
self.current_item = {}
|
|
self.td_counter = 0
|
|
# extract the torrent id
|
|
#I save it in a global variable for after create the link string
|
|
equal = params['href'].find("=")
|
|
self.id = str(int(params['href'][equal+1:]))
|
|
|
|
def handle_data(self, data):
|
|
if self.td_counter == 0:
|
|
if not self.current_item.has_key('name'):
|
|
self.current_item['name'] = ''
|
|
self.current_item['name']+= data.strip()
|
|
if self.td_counter == 1:
|
|
if not self.current_item.has_key('size'):
|
|
self.current_item['size'] = ''
|
|
self.current_item['size']+= data.strip()
|
|
elif self.td_counter == 2:
|
|
if not self.current_item.has_key('seeds'):
|
|
self.current_item['seeds'] = ''
|
|
self.current_item['seeds']+= data.strip()
|
|
elif self.td_counter == 3:
|
|
if not self.current_item.has_key('leech'):
|
|
self.current_item['leech'] = ''
|
|
self.current_item['leech']+= data.strip()
|
|
|
|
def start_td(self,attr):
|
|
if isinstance(self.td_counter,int):
|
|
self.td_counter += 1
|
|
if self.td_counter > 7:
|
|
self.td_counter = None
|
|
# add item to results
|
|
if self.current_item:
|
|
self.current_item['link']='http://download.torrentreactor.net/download.php?name=%s&id=%s'%(cgi.escape(self.current_item['name']),self.id)
|
|
self.current_item['engine_url'] = self.url
|
|
self.current_item['size']= anySizeToBytes(self.current_item['size'])
|
|
if not self.current_item['seeds'].isdigit():
|
|
self.current_item['seeds'] = 0
|
|
if not self.current_item['leech'].isdigit():
|
|
self.current_item['leech'] = 0
|
|
prettyPrinter(self.current_item)
|
|
self.has_results = True
|
|
self.results.append('a')
|
|
|
|
def __init__(self):
|
|
self.results = []
|
|
self.parser = self.SimpleSGMLParser(self.results, self.url)
|
|
|
|
def search(self, what):
|
|
i = 0
|
|
while True:
|
|
results = []
|
|
parser = self.SimpleSGMLParser(results, self.url)
|
|
dat = urllib.urlopen(self.url+'/search.php?search=&words=%s&skip=%s'%(what,(i*50))).read().decode('utf-8', 'replace')
|
|
parser.feed(dat)
|
|
parser.close()
|
|
if len(results) <= 0:
|
|
break
|
|
i += 1
|
|
|
|
class Isohunt(object):
|
|
url = 'http://isohunt.com'
|
|
|
|
class SimpleSGMLParser(sgmllib.SGMLParser):
|
|
def __init__(self, results, url, *args):
|
|
sgmllib.SGMLParser.__init__(self)
|
|
self.td_counter = None
|
|
self.current_item = None
|
|
self.results = results
|
|
self.url = url
|
|
|
|
def start_tr(self, attr):
|
|
params = dict(attr)
|
|
if 'onclick' in params:
|
|
Durl='http://isohunt.com/download'
|
|
self.current_item = {}
|
|
self.td_counter = 0
|
|
try:
|
|
self.current_item['link'] = '%s/%s'%(Durl, params['onclick'].split('/')[2])
|
|
except IndexError:
|
|
self.current_item['link'] = None
|
|
|
|
def handle_data(self, data):
|
|
if self.td_counter == 3:
|
|
if not self.current_item.has_key('name'):
|
|
self.current_item['name'] = ''
|
|
self.current_item['name']+= data.strip()
|
|
if self.td_counter == 4:
|
|
if not self.current_item.has_key('size'):
|
|
self.current_item['size'] = ''
|
|
self.current_item['size']+= data.strip()
|
|
if self.td_counter == 5:
|
|
if not self.current_item.has_key('seeds'):
|
|
self.current_item['seeds'] = ''
|
|
self.current_item['seeds']+= data.strip()
|
|
if self.td_counter == 6:
|
|
if not self.current_item.has_key('leech'):
|
|
self.current_item['leech'] = ''
|
|
self.current_item['leech']+= data.strip()
|
|
|
|
def start_td(self,attr):
|
|
if isinstance(self.td_counter,int):
|
|
self.td_counter += 1
|
|
if self.td_counter > 7:
|
|
self.td_counter = None
|
|
# add item to results
|
|
if self.current_item:
|
|
self.current_item['engine_url'] = self.url
|
|
self.current_item['size']= anySizeToBytes(self.current_item['size'])
|
|
if not self.current_item.has_key('seeds') or not self.current_item['seeds'].isdigit():
|
|
self.current_item['seeds'] = 0
|
|
if not self.current_item.has_key('leech') or not self.current_item['leech'].isdigit():
|
|
self.current_item['leech'] = 0
|
|
if self.current_item['link'] is not None:
|
|
prettyPrinter(self.current_item)
|
|
self.results.append('a')
|
|
|
|
def __init__(self):
|
|
self.results = []
|
|
self.parser = self.SimpleSGMLParser(self.results, self.url)
|
|
|
|
def search(self, what):
|
|
i = 1
|
|
while True:
|
|
results = []
|
|
parser = self.SimpleSGMLParser(results, self.url)
|
|
dat = urllib.urlopen(self.url+'/torrents.php?ihq=%s&ihp=%s'%(what,i)).read().decode('utf-8', 'replace')
|
|
parser.feed(dat)
|
|
parser.close()
|
|
if len(results) <= 0:
|
|
break
|
|
i += 1
|
|
|
|
class EngineLauncher(threading.Thread):
|
|
def __init__(self, engine, what):
|
|
threading.Thread.__init__(self)
|
|
self.engine = engine
|
|
self.what = what
|
|
def run(self):
|
|
self.engine.search(self.what)
|
|
|
|
if __name__ == '__main__':
|
|
available_engines_list = BtJunkie, MegaNova, Mininova, PirateBay, Reactor, Isohunt
|
|
|
|
if len(sys.argv) < 2:
|
|
raise SystemExit('./nova.py [all|engine1[,engine2]*] <keywords>\navailable engines: %s'%
|
|
(','.join(e.__name__ for e in available_engines_list)))
|
|
|
|
engines_list = [e.lower() for e in sys.argv[1].strip().split(',')]
|
|
|
|
if 'all' in engines_list:
|
|
engines_list = [e.__name__.lower() for e in available_engines_list]
|
|
|
|
selected_engines = set(e for e in available_engines_list if e.__name__.lower() in engines_list)
|
|
|
|
if not selected_engines:
|
|
selected_engines = [BtJunkie]
|
|
what = '+'.join(sys.argv[1:])
|
|
else:
|
|
what = '+'.join(sys.argv[2:])
|
|
|
|
threads = []
|
|
for engine in selected_engines:
|
|
try:
|
|
if THREADED:
|
|
l = EngineLauncher( engine(), what )
|
|
threads.append(l)
|
|
l.start()
|
|
else:
|
|
engine().search(what)
|
|
except:
|
|
if STANDALONE:
|
|
traceback.print_exc()
|
|
if THREADED:
|
|
for t in threads:
|
|
t.join()
|
|
|
|
best_ratios.sort(lambda a,b : cmp(a['seeds']-a['leech'], b['seeds']-b['leech']))
|
|
|
|
max_results = 10
|
|
|
|
print "########## TOP %d RATIOS ##########"%max_results
|
|
|
|
for br in best_ratios:
|
|
if br['seeds'] > 1: # avoid those with 0 leech to be max rated
|
|
prettyPrinter(br)
|
|
max_results -= 1
|
|
if not max_results:
|
|
break
|