Reputation: 154
I have been making this download manager app in tkinter and requests and I realized that sometimes if the user is downloading multiple files at the same time it fails to keep up and all the downloads end without any error. I also tried urllib3 and the standard urllib though the only difference that urrlib had was that it just raised and error but still failed. I want to make my program in a way that if the download ends:
I used a while loop and temp counter but the problem is that when requests can't keep up and reaches the while loop, it makes millions of temp files with the size of each of them being 197 bytes and it doesn't work. I also tried just using an if loop hoping that it would be fixed, the difference being that it just didn't create millions of files but still didn't work. Finally I tried writing a separate mock program that just straightly got the rest of the files and merged it the half-downloaded file and it worked but for some reason when I try it in my program it doesn't. Keep in mind that I don't want to create a thread for each tempfile because it can be easily written in the same thread as the one that is downloading the file. How can I do this? My code (Be aware that this function is running in a separate thread):
currently_downloading = np.array([], dtype='S')
current_temp = 0
def download_files():
global files_downloading, times_clicked, currently_downloading, packed, last_temp, current_temp
try:
abort = False
win = None
available_num = 0
downloaded = 0
url = str(url_entry.get())
try:
headers = requests.head(url, headers={'accept-encoding': ''}).headers
except ValueError:
raise InvalidURL()
try:
file_size = float(headers['Content-Length'])
except TypeError:
raise NotDownloadable()
name = ""
formatname = ""
if num.get() == 1:
name = url.split("/")[-1].split(".")[0]
else:
if name_entry.get().strip() != "":
for char in str(name_entry.get()):
if char in banned_chars:
print("Usage of banned characters")
raise BannedCharsUsage()
else:
name = str(name_entry.get())
else:
raise EmptyName()
if var.get() == 1:
formatname = '.' + headers['Content-Type'].split('/')[1]
else:
if str(format_entry.get())[0] == '.' and len(format_entry.get()) >= 3:
formatname = str(format_entry.get())
else:
raise InvalidFormat()
fullname = str(name) + formatname
path = (str(output_entry.get()) + "/").replace(r" \ ".strip(), "/")
if chum.get() == 1:
conn = sqlite3.connect("DEF_PATH.db")
c = conn.cursor()
c.execute("SELECT * FROM DIRECTORY_LIST WHERE SELECTED_DEF = 1")
crnt_default_path = np.array(c.fetchone())
path = str(crnt_default_path[0] + "/").replace(r" \ ".strip(), "/")
conn.commit()
conn.close()
else:
pass
all_files_dir = np.array([], dtype='S')
for file in os.listdir(path):
all_files_dir = np.append(all_files_dir, path + file)
all_files_dir = np.concatenate((all_files_dir, currently_downloading))
while path + fullname in all_files_dir:
for element in currently_downloading:
if element not in all_files_dir:
all_files_dir = np.append(all_files_dir, element)
available_num += 1
if num.get() == 1:
name = url.split("/")[-1].split(".")[0] + f" ({available_num})"
else:
name = str(name_entry.get()) + f" ({available_num})"
fullname = name + formatname
if path + fullname not in all_files_dir:
currently_downloading = np.append(currently_downloading, path + fullname)
available_num = 0
break
else:
currently_downloading = np.append(currently_downloading, path + fullname)
def cancel_dl():
nonlocal abort
abort = True
start_time = time.time()
try:
r = requests.get(url, allow_redirects=False, stream=True)
start = last_print = time.time()
with open(path + fullname, 'wb') as fp:
for chunk in r.iter_content(chunk_size=4096):
if abort:
raise AbortException()
downloaded += fp.write(chunk)
if downloaded > 1000000:
lbl_crnt_size.config(text=f"Downloaded: {round(downloaded / 1000000, 2)} MB")
else:
lbl_crnt_size.config(text=f"Downloaded: {round(downloaded / 1000, 2)} KB")
pct_done = int(downloaded / file_size * 100)
lbl_percent.config(text=f"{round(pct_done, 2)} %")
download_prg["value"] = pct_done
now = time.time()
if now - last_print >= 1:
speed_sec = round(downloaded / (now - start))
if speed_sec > 1000000:
lbl_speed.config(text=f"{round(speed_sec / 1000000, 3)} MB/s")
else:
lbl_speed.config(text=f"{round(speed_sec / 1000, 3)} KB/s")
last_print = time.time()
while os.stat(path + fullname).st_size < file_size:
current_temp += 1
rng = {"Range": f"bytes={os.stat(path + fullname).st_size}-{file_size}"}
r = requests.get(url, allow_redirects=False, stream=True, headers=rng)
start = last_print = time.time()
with open(f"temp/Temp-{current_temp}{formatname}", 'wb') as fp:
for chunk in r.iter_content(chunk_size=4096):
if abort:
raise AbortException()
downloaded += fp.write(chunk)
if downloaded > 1000000:
lbl_crnt_size.config(text=f"Downloaded: {round(downloaded / 1000000, 2)} MB")
else:
lbl_crnt_size.config(text=f"Downloaded: {round(downloaded / 1000, 2)} KB")
pct_done = int(downloaded / file_size * 100)
lbl_percent.config(text=f"{round(pct_done, 2)} %")
download_prg["value"] = pct_done
now = time.time()
if now - last_print >= 1:
speed_sec = round(downloaded / (now - start))
if speed_sec > 1000000:
lbl_speed.config(text=f"{round(speed_sec / 1000000, 3)} MB/s")
else:
lbl_speed.config(text=f"{round(speed_sec / 1000, 3)} KB/s")
last_print = time.time()
with open(f"temp/Temp-{current_temp}{formatname}", 'rb') as fp:
temp_binary = fp.read()
with open(path + fullname, 'rb') as fp:
main_binary = fp.read()
with open(path + fullname, 'wb') as fp:
fp.write(main_binary + temp_binary)
except AbortException:
if os.path.exists(path + fullname):
os.remove(path + fullname)
Upvotes: 2
Views: 3259
Reputation: 1891
There is no inbuilt function to do that so you will have to Manually do that .
First thing you need to do is keep record of how many chunks/buffers you have written to file.
Before download function declare some variable, say x=0. (To count how much data is written to file)
then inside the download function check if x == 0. If true then download normally, Else : resume download using range header
Read Following examples for range header :- source
If the web server supports the range request then you can add the Range header to your request:
Range: bytes=StartPos-StopPos
You will receive the part between StartPos and StopPos. If dont know the StopPos just use:
Range: bytes=StartPos-
So your code would be:
def resume_download(fileurl, resume_byte_position):
resume_header = {'Range': 'bytes=%d-' % resume_byte_position}
return requests.get(fileurl, headers=resume_header, stream=True, verify=False, allow_redirects=True)
Another example :- https://www.oreilly.com/library/view/python-cookbook/0596001673/ch11s06.html
Also update the variable x after writing each chunk (x = x + chunk_size)
And in the end of your download part, add a "if" statement to check if the file size of downloaded file is same as the file size of file on server (you can get that by requests.header.get('Content-Length'). If file size is not same then you call your download function again.
Upvotes: 2