froest
froest

Reputation: 3

Threads not stop in python

The purpose of my program is to download files with threads. I define the unit, and using len/unit threads, the len is the length of the file which is going to be downloaded.

Using my program, the file can be downloaded, but the threads are not stopping. I can't find the reason why.

This is my code...

#! /usr/bin/python

import urllib2
import threading
import os
from time import ctime

class MyThread(threading.Thread):
 def __init__(self,func,args,name=''):
  threading.Thread.__init__(self);
  self.func = func;
  self.args = args;
  self.name = name;
 def run(self):
  apply(self.func,self.args);

url = 'http://ubuntuone.com/1SHQeCAQWgIjUP2945hkZF';
request = urllib2.Request(url);
response = urllib2.urlopen(request);
meta = response.info();
response.close();
unit = 1000000;
flen = int(meta.getheaders('Content-Length')[0]);
print flen;
if flen%unit == 0:
 bs = flen/unit;
else :
 bs = flen/unit+1;
blocks = range(bs);
cnt = {};
for i in blocks:
 cnt[i]=i;
def getStr(i):
 try:
  print 'Thread %d start.'%(i,);
  fout = open('a.zip','wb');
  fout.seek(i*unit,0);
  if (i+1)*unit > flen:
   request.add_header('Range','bytes=%d-%d'%(i*unit,flen-1));
  else :
   request.add_header('Range','bytes=%d-%d'%(i*unit,(i+1)*unit-1));
  #opener = urllib2.build_opener();
  #buf = opener.open(request).read();
  resp = urllib2.urlopen(request);
  buf = resp.read();
  fout.write(buf);
 except BaseException:
  print 'Error';
 finally :
  #opener.close();
  fout.flush();
  fout.close();
  del cnt[i];
# filelen = os.path.getsize('a.zip');
 print 'Thread %d ended.'%(i),
 print cnt;
# print 'progress : %4.2f'%(filelen*100.0/flen,),'%';
def main():
 print 'download at:',ctime();
 threads = [];
 for i in blocks:
  t = MyThread(getStr,(blocks[i],),getStr.__name__);
  threads.append(t);
 for i in blocks:
  threads[i].start();
 for i in blocks:
#  print 'this is the %d thread;'%(i,);
  threads[i].join();
 #print 'size:',os.path.getsize('a.zip');
 print 'download done at:',ctime();
if __name__=='__main__':
 main();

Could someone please help me understand why the threads aren't stopping.

Upvotes: 0

Views: 386

Answers (1)

jdi
jdi

Reputation: 92627

I can't really address your code example because it is quite messy and hard to follow, but a potential reason you are seeing the threads not end is that a request will stall out and never finish. urllib2 allows you to specify timeouts for how long you will allow the request to take.

What I would recommend for your own code is that you split your work up into a queue, start a fixed number of thread (instead of a variable number), and let the worker threads pick up work until it is done. Make the http requests have a timeout. If the timeout expires, try again or put the work back into the queue.

Here is a generic example of how to use a queue, a fixed number of workers and a sync primitive between them:

import threading
import time
from Queue import Queue

def worker(queue, results, lock):
    local_results = []
    while True:
        val = queue.get()
        if val is None:
            break

        # pretend to do work
        time.sleep(.1)
        local_results.append(val)

    with lock:
        results.extend(local_results)
        print threading.current_thread().name, "Done!"


num_workers = 4

threads = []
queue = Queue()
lock = threading.Lock()
results = []

for i in xrange(100):
    queue.put(i)

for _ in xrange(num_workers):

    # Use None as a sentinel to signal the threads to end
    queue.put(None)

    t = threading.Thread(target=worker, args=(queue,results,lock))
    t.start()
    threads.append(t)

for t in threads:
    t.join()

print sorted(results)

print "All done"

Upvotes: 1

Related Questions