Reputation: 1
I am comparing 2 different methods to download files from a sftp server.
Using Paramiko.Transport(), adjusting the buffer size and downloading the file in chunks
Using Asyncssh, also adjusting the buffer size and downloading the file in chunks
Downloading 2 files took:
Downloading 4 files took:
I would assume as the file number increase async will perform much better compared to sequentially downloading the file. However, the results are the same or even worst.
Why is this possible? And is there a faster method to perform similar functionalities?
Paramiko Code
def create_sftp_client(host, port, username, password):
transport = paramiko.Transport((host, port))
transport.connect(username=username, password=password)
# Increase buffer size
transport.default_window_size = 2**40 - 1
transport.packetizer.REKEY_BYTES = 2**50
transport.packetizer.REKEY_PACKETS = 2**50
return paramiko.SFTPClient.from_transport(transport)
def download_file(sftp, remote_path, local_path, chunk_size=1024 * 1024 * 10):
with sftp.file(remote_path, 'r') as remote_file, open(local_path, 'wb') as local_file:
remote_file.prefetch()
while True:
data = remote_file.read(chunk_size)
if not data:
break
local_file.write(data)
print(f"Downloading {remote_path}: {local_file.tell()} bytes transferred")
def download_dir(sftp, remote_dir, local_dir):
if not os.path.exists(local_dir):
os.makedirs(local_dir)
for item in sftp.listdir_attr(remote_dir):
remote_path = os.path.join(remote_dir, item.filename)
local_path = os.path.join(local_dir, item.filename)
if item.st_mode & 0o40000: # Check if it's a directory
download_dir(sftp, remote_path, local_path)
else:
download_file(sftp, remote_path, local_path)
def file_download(host, port, username, password, local_dir, remote_dir):
try:
sftp = create_sftp_client(host, port, username, password)
download_dir(sftp, remote_dir, local_dir)
sftp.close()
except FileNotFoundError:
print(f"File not found: {remote_dir}")
AsyncSSH code
async def create_sftp_client(host, port, username, password):
conn = await asyncssh.connect(host, port=port, username=username, password=password, known_hosts=None)
return await conn.start_sftp_client()
async def download_file(sftp, remote_path, local_path, chunk_size=1024 * 1024 * 10):
try:
async with sftp.open(remote_path, 'rb') as remote_file, aiofiles.open(local_path, 'wb') as local_file:
while True:
data = await remote_file.read(chunk_size)
if not data:
break
await local_file.write(data)
print(f"Downloading {remote_path}: {local_file.tell()} bytes transferred")
except Exception as e:
print(f"Error downloading {remote_path}: {e}")
async def download_dir(sftp, remote_dir, local_dir):
if not os.path.exists(local_dir):
os.makedirs(local_dir)
tasks = []
for item in await sftp.listdir(remote_dir):
remote_path = os.path.join(remote_dir, item)
local_path = os.path.join(local_dir, item)
stat = await sftp.stat(remote_path)
if stat.permissions & 0o40000: # Check if it's a directory
tasks.append(download_dir(sftp, remote_path, local_path))
else:
tasks.append(download_file(sftp, remote_path, local_path))
await asyncio.gather(*tasks)
async def file_download(host, port, username, password, local_dir, remote_dir):
try:
sftp = await create_sftp_client(host, port, username, password)
await download_dir(sftp, remote_dir, local_dir)
except FileNotFoundError:
print(f"File not found: {remote_dir}")
except Exception as e:
print(f"Error: {e}")
Upvotes: 0
Views: 10