mirror of
https://github.com/qbittorrent/qBittorrent.git
synced 2026-01-04 14:42:29 -06:00
- Moved everything to trunk to create a stable branch
This commit is contained in:
432
src/search_engine/nova.py
Executable file
432
src/search_engine/nova.py
Executable file
@@ -0,0 +1,432 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Version: 1.7
|
||||
# Changelog:
|
||||
# - merged with qbittorrent branch (code cleanup, indentation mistakes)
|
||||
# - separate standalone and slave mode
|
||||
# - added btjunkie
|
||||
# - added meganova
|
||||
# - added multithreaded mode
|
||||
|
||||
# Version: 1.6
|
||||
# Changelog:
|
||||
# - size is now always returned in bytes
|
||||
# - seeders/leechers are now always returned as integers
|
||||
# - cleaned up code
|
||||
# - results are now displayed in real time
|
||||
# - fixed piratebay, torrentreactor
|
||||
|
||||
# Author:
|
||||
# Fabien Devaux <fab AT gnux DOT info>
|
||||
# Contributors:
|
||||
# Christophe Dumez <chris@qbittorrent.org> (qbittorrent integration)
|
||||
# Thanks to gab #gcu @ irc.freenode.net (multipage support on PirateBay)
|
||||
# Thanks to Elias <gekko04@users.sourceforge.net> (torrentreactor and isohunt search engines)
|
||||
#
|
||||
# Licence: BSD
|
||||
|
||||
import sys
|
||||
import urllib
|
||||
import sgmllib
|
||||
from xml.dom import minidom
|
||||
import re
|
||||
import os
|
||||
import cgi
|
||||
import traceback
|
||||
import threading
|
||||
|
||||
STANDALONE = True
|
||||
THREADED = True
|
||||
|
||||
if os.environ.has_key('QBITTORRENT'):
|
||||
STANDALONE = False
|
||||
THREADED = False
|
||||
|
||||
def prettyPrinter(dictionnary):
|
||||
print "%(link)s|%(name)s|%(size)s|%(seeds)s|%(leech)s|%(engine_url)s"%dictionnary
|
||||
|
||||
if STANDALONE:
|
||||
def termPrettyPrinter(dictionnary):
|
||||
dictionnary['size'] = bytesToHuman(dictionnary['size'])
|
||||
print "%(seeds)5s/%(leech)5s | %(size)10s | %(name)s "%dictionnary
|
||||
print "wget '%s'"%dictionnary['link'].replace("'","\\'")
|
||||
|
||||
globals()['prettyPrinter'] = termPrettyPrinter
|
||||
|
||||
def bytesToHuman(filesize):
|
||||
"""
|
||||
Convert float (size in bytes) to readable string
|
||||
"""
|
||||
decimators = ('k','M','G','T')
|
||||
unit = ''
|
||||
for n in range(len(decimators)):
|
||||
if filesize > 1100.0:
|
||||
filesize /= 1024.0
|
||||
unit = decimators[n]
|
||||
return '%.1f%sB'%(filesize, unit)
|
||||
|
||||
def anySizeToBytes(size_string):
|
||||
"""
|
||||
Convert a string like '1 KB' to '1024' (bytes)
|
||||
"""
|
||||
# separate integer from unit
|
||||
try:
|
||||
size, unit = size_string.split()
|
||||
except (ValueError, TypeError):
|
||||
try:
|
||||
size = size_string.strip()
|
||||
unit = ''.join([c for c in size if c.isalpha()])
|
||||
size = size[:-len(unit)]
|
||||
except(ValueError, TypeError):
|
||||
return -1
|
||||
|
||||
size = float(size)
|
||||
short_unit = unit.upper()[0]
|
||||
|
||||
# convert
|
||||
units_dict = { 'T': 40, 'G': 30, 'M': 20, 'K': 10 }
|
||||
if units_dict.has_key( short_unit ):
|
||||
size = size * 2**units_dict[short_unit]
|
||||
return int(size)
|
||||
|
||||
################################################################################
|
||||
# Every engine should have a "search" method taking
|
||||
# a space-free string as parameter (ex. "family+guy")
|
||||
# it should call prettyPrinter() with a dict as parameter
|
||||
# see above for dict keys
|
||||
# As a convention, try to list results by decrasing number of seeds or similar
|
||||
################################################################################
|
||||
|
||||
class PirateBay(object):
|
||||
url = 'http://thepiratebay.org'
|
||||
|
||||
def __init__(self):
|
||||
self.results = []
|
||||
self.parser = self.SimpleSGMLParser(self.results, self.url)
|
||||
|
||||
class SimpleSGMLParser(sgmllib.SGMLParser):
|
||||
def __init__(self, results, url, *args):
|
||||
sgmllib.SGMLParser.__init__(self)
|
||||
self.td_counter = None
|
||||
self.current_item = None
|
||||
self.results = results
|
||||
self.url = url
|
||||
|
||||
def start_a(self, attr):
|
||||
params = dict(attr)
|
||||
if params['href'].startswith('/browse'):
|
||||
self.current_item = {}
|
||||
self.td_counter = 0
|
||||
elif params['href'].startswith('http://torrents.thepiratebay.org/hashtorrent'):
|
||||
self.current_item['link']=params['href'].strip()
|
||||
self.td_counter = self.td_counter+1
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.td_counter == 1:
|
||||
if not self.current_item.has_key('name'):
|
||||
self.current_item['name'] = ''
|
||||
self.current_item['name']+= data.strip()
|
||||
if self.td_counter == 5:
|
||||
if not self.current_item.has_key('size'):
|
||||
self.current_item['size'] = ''
|
||||
self.current_item['size']+= data.strip()
|
||||
elif self.td_counter == 6:
|
||||
if not self.current_item.has_key('seeds'):
|
||||
self.current_item['seeds'] = ''
|
||||
self.current_item['seeds']+= data.strip()
|
||||
elif self.td_counter == 7:
|
||||
if not self.current_item.has_key('leech'):
|
||||
self.current_item['leech'] = ''
|
||||
self.current_item['leech']+= data.strip()
|
||||
|
||||
def start_td(self,attr):
|
||||
if isinstance(self.td_counter,int):
|
||||
self.td_counter += 1
|
||||
if self.td_counter > 7:
|
||||
self.td_counter = None
|
||||
# Display item
|
||||
if self.current_item:
|
||||
self.current_item['engine_url'] = self.url
|
||||
self.current_item['size']= anySizeToBytes(self.current_item['size'])
|
||||
if not self.current_item['seeds'].isdigit():
|
||||
self.current_item['seeds'] = 0
|
||||
if not self.current_item['leech'].isdigit():
|
||||
self.current_item['leech'] = 0
|
||||
prettyPrinter(self.current_item)
|
||||
self.results.append('a')
|
||||
def search(self, what):
|
||||
ret = []
|
||||
i = 0
|
||||
order = 'se'
|
||||
while True:
|
||||
results = []
|
||||
parser = self.SimpleSGMLParser(results, self.url)
|
||||
dat = urllib.urlopen(self.url+'/search.php?q=%s&orderby=%s&what=search&page=%u' % (what, order, i)).read()
|
||||
parser.feed(dat)
|
||||
parser.close()
|
||||
if len(results) <= 0:
|
||||
break
|
||||
i += 1
|
||||
|
||||
class Mininova(object):
|
||||
url = 'http://www.mininova.org'
|
||||
table_items = 'added cat name size seeds leech'.split()
|
||||
|
||||
def search(self, what):
|
||||
order = 'seeds' # must be one in self.table_items
|
||||
|
||||
def get_link(lnk):
|
||||
lnk = lnk.getElementsByTagName('a').item(0)
|
||||
return self.url+lnk.attributes.get('href').value
|
||||
|
||||
def get_text(txt):
|
||||
if txt.nodeType == txt.TEXT_NODE:
|
||||
return txt.toxml()
|
||||
else:
|
||||
return ''.join([ get_text(n) for n in txt.childNodes])
|
||||
dat = urllib.urlopen(self.url+'/search/%s/%s'%(what,order)).read().decode('utf-8', 'replace')
|
||||
x = minidom.parseString(dat.encode('utf-8', 'replace'))
|
||||
table = x.getElementsByTagName('table').item(0)
|
||||
if not table: return
|
||||
for tr in table.getElementsByTagName('tr'):
|
||||
tds = tr.getElementsByTagName('td')
|
||||
if tds:
|
||||
i = 0
|
||||
vals = {}
|
||||
for td in tds:
|
||||
if self.table_items[i] == 'name':
|
||||
vals['link'] = get_link(td).strip()
|
||||
vals[self.table_items[i]] = get_text(td).strip()
|
||||
i += 1
|
||||
vals['engine_url'] = self.url
|
||||
vals['size']= anySizeToBytes(vals['size'])
|
||||
if not vals['seeds'].isdigit():
|
||||
vals['seeds'] = 0
|
||||
if not vals['leech'].isdigit():
|
||||
vals['leech'] = 0
|
||||
prettyPrinter(vals)
|
||||
|
||||
# TODO: add multipage
|
||||
class BtJunkie(object):
|
||||
url = 'http://btjunkie.org'
|
||||
|
||||
def search(self, what):
|
||||
dat = urllib.urlopen(self.url+'/search?q=%s'%what).read().decode('utf8', 'replace')
|
||||
# I know it's not very readable, but the SGML parser feels in pain
|
||||
section_re = re.compile('(?s)href="/torrent\?do=download.*?<tr>')
|
||||
torrent_re = re.compile('(?s)href="(?P<link>.*?do=download[^"]+).*?'
|
||||
'<b>(?P<name>.*?)</b>.*?'
|
||||
'>(?P<size>\d+MB)</font>.*?'
|
||||
'>(?P<seeds>\d+)</font>.*?'
|
||||
'>(?P<leech>\d+)</font>')
|
||||
for match in section_re.finditer(dat):
|
||||
txt = match.group(0)
|
||||
m = torrent_re.search(txt)
|
||||
if m:
|
||||
torrent_infos = m.groupdict()
|
||||
torrent_infos['engine_url'] = self.url
|
||||
torrent_infos['size'] = anySizeToBytes(torrent_infos['size'])
|
||||
torrent_infos['link'] = self.url+torrent_infos['link']
|
||||
prettyPrinter(torrent_infos)
|
||||
|
||||
class MegaNova(object):
|
||||
url = 'http://www.meganova.org'
|
||||
|
||||
def search(self, what):
|
||||
dat = urllib.urlopen(self.url+'/search.php?order=5&search=%s'%what).read().decode('utf8', 'replace')
|
||||
# I know it's not very readable, but the SGML parser feels in pain
|
||||
|
||||
section_re = re.compile('(?s)<td class="added".*?</tr')
|
||||
torrent_re = re.compile('(?s)href="(?P<link>/torrent/.*?)".*?'
|
||||
'<span.*?>(?P<name>.*?)</span>.*?'
|
||||
'>(?P<size>[0-9.]+\s+.B).*?'
|
||||
'>(?P<seeds>\d+)<.*?'
|
||||
'>(?P<leech>\d+)<')
|
||||
|
||||
for match in section_re.finditer(dat):
|
||||
txt = match.group(0)
|
||||
m = torrent_re.search(txt)
|
||||
if m:
|
||||
torrent_infos = m.groupdict()
|
||||
torrent_infos['engine_url'] = self.url
|
||||
torrent_infos['size'] = anySizeToBytes(torrent_infos['size'])
|
||||
torrent_infos['link'] = self.url+torrent_infos['link']
|
||||
prettyPrinter(torrent_infos)
|
||||
|
||||
class Reactor(object):
|
||||
url = 'http://tr.searching.com'
|
||||
|
||||
class SimpleSGMLParser(sgmllib.SGMLParser):
|
||||
def __init__(self, results, url, *args):
|
||||
sgmllib.SGMLParser.__init__(self)
|
||||
self.td_counter = None
|
||||
self.current_item = None
|
||||
self.results = results
|
||||
self.id = None
|
||||
self.url = url
|
||||
|
||||
def start_a(self, attr):
|
||||
params = dict(attr)
|
||||
if params['href'].startswith('view.php'):
|
||||
self.current_item = {}
|
||||
self.td_counter = 0
|
||||
# extract the torrent id
|
||||
#I save it in a global variable for after create the link string
|
||||
equal = params['href'].find("=")
|
||||
self.id = str(int(params['href'][equal+1:]))
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.td_counter == 0:
|
||||
if not self.current_item.has_key('name'):
|
||||
self.current_item['name'] = ''
|
||||
self.current_item['name']+= data.strip()
|
||||
if self.td_counter == 1:
|
||||
if not self.current_item.has_key('size'):
|
||||
self.current_item['size'] = ''
|
||||
self.current_item['size']+= data.strip()
|
||||
elif self.td_counter == 2:
|
||||
if not self.current_item.has_key('seeds'):
|
||||
self.current_item['seeds'] = ''
|
||||
self.current_item['seeds']+= data.strip()
|
||||
elif self.td_counter == 3:
|
||||
if not self.current_item.has_key('leech'):
|
||||
self.current_item['leech'] = ''
|
||||
self.current_item['leech']+= data.strip()
|
||||
|
||||
def start_td(self,attr):
|
||||
if isinstance(self.td_counter,int):
|
||||
self.td_counter += 1
|
||||
if self.td_counter > 7:
|
||||
self.td_counter = None
|
||||
# add item to results
|
||||
if self.current_item:
|
||||
self.current_item['link']='http://download.torrentreactor.net/download.php?name=%s&id=%s'%(cgi.escape(self.current_item['name']),self.id)
|
||||
self.current_item['engine_url'] = self.url
|
||||
self.current_item['size']= anySizeToBytes(self.current_item['size'])
|
||||
if not self.current_item['seeds'].isdigit():
|
||||
self.current_item['seeds'] = 0
|
||||
if not self.current_item['leech'].isdigit():
|
||||
self.current_item['leech'] = 0
|
||||
prettyPrinter(self.current_item)
|
||||
self.has_results = True
|
||||
self.results.append('a')
|
||||
|
||||
def __init__(self):
|
||||
self.results = []
|
||||
self.parser = self.SimpleSGMLParser(self.results, self.url)
|
||||
|
||||
def search(self, what):
|
||||
i = 0
|
||||
while True:
|
||||
results = []
|
||||
parser = self.SimpleSGMLParser(results, self.url)
|
||||
dat = urllib.urlopen(self.url+'/search.php?search=&words=%s&skip=%s'%(what,(i*50))).read().decode('utf-8', 'replace')
|
||||
parser.feed(dat)
|
||||
parser.close()
|
||||
if len(results) <= 0:
|
||||
break
|
||||
i += 1
|
||||
|
||||
class Isohunt(object):
|
||||
url = 'http://isohunt.com'
|
||||
|
||||
class SimpleSGMLParser(sgmllib.SGMLParser):
|
||||
def __init__(self, results, url, *args):
|
||||
sgmllib.SGMLParser.__init__(self)
|
||||
self.td_counter = None
|
||||
self.current_item = None
|
||||
self.results = results
|
||||
self.url = url
|
||||
|
||||
def start_tr(self, attr):
|
||||
params = dict(attr)
|
||||
if 'onclick' in params:
|
||||
Durl='http://isohunt.com/dl.php?id='
|
||||
self.current_item = {}
|
||||
self.td_counter = 0
|
||||
begin_id = params['onclick'].find("id=")+3
|
||||
end_id = params['onclick'][begin_id:].find("'")
|
||||
self.current_item['link'] = '%s%s'%(Durl,str(params['onclick'][begin_id:begin_id+end_id]))
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.td_counter == 3:
|
||||
if not self.current_item.has_key('name'):
|
||||
self.current_item['name'] = ''
|
||||
self.current_item['name']+= data.strip()
|
||||
if self.td_counter == 4:
|
||||
if not self.current_item.has_key('size'):
|
||||
self.current_item['size'] = ''
|
||||
self.current_item['size']+= data.strip()
|
||||
if self.td_counter == 5:
|
||||
if not self.current_item.has_key('seeds'):
|
||||
self.current_item['seeds'] = ''
|
||||
self.current_item['seeds']+= data.strip()
|
||||
if self.td_counter == 6:
|
||||
if not self.current_item.has_key('leech'):
|
||||
self.current_item['leech'] = ''
|
||||
self.current_item['leech']+= data.strip()
|
||||
|
||||
def start_td(self,attr):
|
||||
if isinstance(self.td_counter,int):
|
||||
self.td_counter += 1
|
||||
if self.td_counter > 7:
|
||||
self.td_counter = None
|
||||
# add item to results
|
||||
if self.current_item:
|
||||
self.current_item['engine_url'] = self.url
|
||||
self.current_item['size']= anySizeToBytes(self.current_item['size'])
|
||||
if not self.current_item.has_key('seeds') or not self.current_item['seeds'].isdigit():
|
||||
self.current_item['seeds'] = 0
|
||||
if not self.current_item.has_key('leech') or not self.current_item['leech'].isdigit():
|
||||
self.current_item['leech'] = 0
|
||||
prettyPrinter(self.current_item)
|
||||
self.results.append('a')
|
||||
|
||||
def __init__(self):
|
||||
self.results = []
|
||||
self.parser = self.SimpleSGMLParser(self.results, self.url)
|
||||
|
||||
def search(self, what):
|
||||
i = 1
|
||||
while True:
|
||||
results = []
|
||||
parser = self.SimpleSGMLParser(results, self.url)
|
||||
dat = urllib.urlopen(self.url+'/torrents.php?ihq=%s&ihp=%s'%(what,i)).read().decode('utf-8', 'replace')
|
||||
parser.feed(dat)
|
||||
parser.close()
|
||||
if len(results) <= 0:
|
||||
break
|
||||
i += 1
|
||||
|
||||
class EngineLauncher(threading.Thread):
|
||||
def __init__(self, engine, what):
|
||||
threading.Thread.__init__(self)
|
||||
self.engine = engine
|
||||
self.what = what
|
||||
def run(self):
|
||||
self.engine.search(self.what)
|
||||
|
||||
if __name__ == '__main__':
|
||||
available_engines_list = BtJunkie, MegaNova, Mininova, PirateBay, Reactor, Isohunt
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
raise SystemExit('./nova.py <all|engine1[,engine2]*> <keywords>\navailable engines: %s'%
|
||||
(','.join(e.__name__ for e in available_engines_list)))
|
||||
|
||||
engines_list = [e.lower() for e in sys.argv[1].strip().split(',')]
|
||||
what = '+'.join(sys.argv[2:])
|
||||
|
||||
if 'all' in engines_list:
|
||||
engines_list = [e.__name__.lower() for e in available_engines_list]
|
||||
|
||||
selected_engines = set(e for e in available_engines_list if e.__name__.lower() in engines_list)
|
||||
|
||||
for engine in selected_engines:
|
||||
try:
|
||||
if THREADED:
|
||||
EngineLauncher( engine(), what ).start()
|
||||
else:
|
||||
engine().search(what)
|
||||
except:
|
||||
if STANDALONE:
|
||||
traceback.print_exc()
|
||||
Reference in New Issue
Block a user