Reputation: 3755
error: (3, 'Illegal characters found in URL')
My url has got special characters like [AVC_(1)_(P1)_0] i cant get this to work, i tried encoding but that would give me "Could not resolve host: https%3A"
Please advice
import sys
import Queue
import threading
import pycurl
import os
import urllib
from StringIO import StringIO
num_conn = 1
# Make a queue with (url, filename) tuples
queue = Queue.Queue()
with open('list.txt') as f:
for line in f:
print line
queue.put((line, 'test.mp4'))
if 'str' in line:
break
# Check args
assert queue.queue, "no URLs given"
num_urls = len(queue.queue)
num_conn = min(num_conn, num_urls)
assert 1 <= num_conn <= 10000, "invalid number of concurrent connections"
print "PycURL %s (compiled against 0x%x)" % (pycurl.version, pycurl.COMPILE_LIBCURL_VERSION_NUM)
print "----- Getting", num_urls, "URLs using", num_conn, "connections -----"
class WorkerThread(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while 1:
try:
url, filename = self.queue.get_nowait()
except Queue.Empty:
raise SystemExit
#dirname = os.path.dirname(filename)
#fp = open(dirname, "wb")\
#url = urllib.quote(url.encode('utf-8'))
fp = open(os.getcwd()+'/'+filename, "wb")
curl = pycurl.Curl()
curl.setopt(pycurl.URL, url)
curl.setopt(pycurl.FOLLOWLOCATION, 1)
curl.setopt(pycurl.MAXREDIRS, 5)
curl.setopt(pycurl.CONNECTTIMEOUT, 30)
curl.setopt(pycurl.TIMEOUT, 300)
curl.setopt(pycurl.NOSIGNAL, 1)
curl.setopt(pycurl.WRITEDATA, fp)
try:
curl.perform()
except:
import traceback
traceback.print_exc(file=sys.stderr)
sys.stderr.flush()
curl.close()
fp.close()
sys.stdout.write(".")
sys.stdout.flush()
# Start a bunch of threads
threads = []
for dummy in range(num_conn):
t = WorkerThread(queue)
t.start()
threads.append(t)
# Wait for all threads to finish
for thread in threads:
thread.join()
Upvotes: 0
Views: 448
Reputation: 34667
Why not use requests in lieu of pycurl, which would make your run method:
def run(self):
while True:
try:
url, filename = self.queue.get_nowait()
except Queue.Empty:
raise SystemExit
with open(os.getcwd()+'/'+filename, "wb") as fp:
#fp.write(requests.get(url).content)
fp.write(requests.get(url, headers={'user-agent': 'CodeGuru'}).content
I made a few other, stylistic changes.
Upvotes: 2