user2203774
user2203774

Reputation: 619

How to do a large text file transfer in python?

I'm learning socket programming and python. I need to create a server that accepts a command from client and send a large text file back to the client. The client then saves the complete text file to its directory. In my code, the server is sending the whole text file line by line, but the client could only accept 1024 bytes. I'm not sure what I need to add so the client can receive the whole text file from the server and save it to its directory. Can someone take a look at my code and point me to the right direction? Your help is greatly appreciated.

server.py

import socket
import sys
import os
from thread import *

HOST = ''
PORT = 8888

server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
print 'Socket created'

try:
    server_socket.bind((HOST, PORT))    #bind to a address(and port)
except socket.error, msg:
    print 'Bind failed. Error Code : ' + str(msg[0]) + ' Message ' + msg[1]
    sys.exit()

print 'Socket bind complete'

#put the socket in listening mode
server_socket.listen(10)     #maximum 10 connections
print 'TCP Server Waiting for client on port 30021'

#wait to accept a connection - blocking call
client, addr = server_socket.accept()
#display client information
print 'Connected with ' + addr[0] + ':' + str(addr[1])

try:
    #keep talking with the client
    while 1:
        #Receiving from client
        data = client.recv(1024)

    #command: list
        commandlist = data.split()
        if (commandlist[0].strip() in ('list')):
            reply = 'Directory: ' + os.getcwd()     #current directory
            client.send(reply)
        elif (commandlist[0] == 'get' and len(commandlist) >= 2):
            filename = commandlist[1]
            reply = filename
            #validate filename
            if os.path.exists(filename):
                length = os.path.getsize(filename)
                with open(filename, 'r') as infile:
                    for line in infile:
                        reply = line
                        client.sendall(reply)
                #f = open(filename, 'r')
                #reply = f.read()
                #client.send(piece)
                #f.close()
            else:
                reply = 'File not found'
                client.send(reply)
        elif (commandlist[0] == 'get' and len(commandlist) < 2):
            #if the command argument is less than 2
                reply = 'Provide a filename please'
                client.send(reply)
        else:
            reply = 'Error: Wrong command'
            client.send(reply)


except KeyboardInterrupt:
    print "Exiting gracefully."
finally:
    server_socket.close()

client.py

#Socket clicent in python

import socket   #for sockets
import sys      #for exit

command = ' '
socksize = 1024

#return a socket descriptor which can be used in other socket related functions
#properties: address family: AF_INET (IP v4)
#properties: type: SOCK_STREAM (connection oriented TCP protocol)

try:
    #create an AF_INET, STREAM socket (TCP)
    client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
except socket.error, msg:               #error handling
    print 'Failed to create socket. Error code: ' + str(msg[0]) + ', Error message: ' + msg[1]
    sys.exit();

print 'Socket Created'

#Get the IP address of the remote host/url
#connect to IP on a certain 'port' using the connect
host = ''       #symbolic name meaning the local host
port = 8888     #arbitrary non-privileged port

try:
    remote_ip = socket.gethostbyname(host)
except socket.gaierror:
    #could not resolve
    print 'Hostname could not be resolved. Existing'
    sys.exit()
print 'IP address of ' + host + ' is ' + remote_ip

#Connect to remote server
client_socket.connect((remote_ip, port))
print 'Socket Connected to ' + host + ' on ip ' + remote_ip

buf = ''
#Send some data to remote server
while True:
    print 'Enter a command: list or get <filename>'
    command = raw_input()
    if command.strip() == 'quit':
        break
    client_socket.send(command)

    data = client_socket.recv(socksize)
    print data

#Close the socket
client_socket.close()

Upvotes: 2

Views: 20862

Answers (4)

Yılmaz edis
Yılmaz edis

Reputation: 163

I converted @freakish codes as fallow;

def convert_to_bytes(no):
    result = bytearray()
    result.append(no & 255)
    for i in range(3):
        no = no >> 8
        result.append(no & 255)
    return result

def bytes_to_number(b):
    # if Python2.x
    # b = map(ord, b)
    res = 0
    for i in range(4):
        res += b[i] << (i*8)
    return res

def myReceive(sock):
    socksize = 1024

    size = sock.recv(4) # assuming that the size won't be bigger then 1GB
    size = bytes_to_number(size)
    current_size = 0
    myBuffer = b""
    while current_size < size:
        data = sock.recv(socksize)
        if not data:
            break
        if len(data) + current_size > size:
            data = data[:size-current_size] # trim additional data
        myBuffer += data
        # you can stream here to disk
        current_size += len(data)
    return myBuffer

def mySend(sock, data):
    length = len(data)
    sock.send(convert_to_bytes(length)) # has to be 4 bytes
    byte = 0
    while byte < length:
        sock.send(data[byte:byte+1024])
        byte += 1024

Thank to him. I have used these function my project. He answered very good.

example tcpClient.py

import socket
import os

def convert_to_bytes(no):
    result = bytearray()
    result.append(no & 255)
    for i in range(3):
        no = no >> 8
        result.append(no & 255)
    return result

def bytes_to_number(b):
    # if Python2.x
    # b = map(ord, b)
    res = 0
    for i in range(4):
        res += b[i] << (i*8)
    return res

def mySend(sock, data):

    length = len(data)
    print("length: ", length)
    sock.send(convert_to_bytes(length)) # has to be 4 bytes
    byte = 0
    while byte < length:
        sock.send(data[byte:byte+1024])
        byte += 1024

soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host = "127.0.0.1"
port = 8888

try:
    soc.connect((host, port))
except:
    print("Connection error")
    sys.exit()


filename = "/media/yilmaz/kuran.zip"

if os.path.exists(filename):
    length = os.path.getsize(filename)
    with open(filename, 'rb') as infile:
        data = infile.read()


mySend(soc, data)


print("All data send", flush=True)

example tcpServer.py

import socket

host = ''
port = 8888         # arbitrary non-privileged port

soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
soc.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

def convert_to_bytes(no):
    result = bytearray()
    result.append(no & 255)
    for i in range(3):
        no = no >> 8
        result.append(no & 255)
    return result

def bytes_to_number(b):
    # if Python2.x
    # b = map(ord, b)
    res = 0
    for i in range(4):
        res += b[i] << (i*8)
    return res

def myReceive(sock):
    socksize = 1024

    size = sock.recv(4) # assuming that the size won't be bigger then 1GB
    size = bytes_to_number(size)
    current_size = 0
    myBuffer = b""
    while current_size < size:
        data = sock.recv(socksize)
        if not data:
            break
        if len(data) + current_size > size:
            data = data[:size-current_size] # trim additional data
        myBuffer += data
        # you can stream here to disk
        current_size += len(data)

    return myBuffer

print("Socket created",flush=True)

try:
    soc.bind((host, port))
except:
    print("Bind failed. Error : " + str(sys.exc_info()), flush=True)
    sys.exit()

soc.listen(5)       # queue up to 5 requests
print("Socket now listening", flush=True)



client_socket, address = soc.accept()

print("Connected", flush=True)

myBuffer = myReceive(client_socket)

print("All data received",flush=True)

with open("aaa", "wb") as f:
    f.write(myBuffer)

print("All data written",flush=True)

soc.close()

Upvotes: 0

aust
aust

Reputation: 914

When transporting data over TCP, each call to recv isn't guaranteed to give you the exact amount of bytes you ask for, but it also won't give you more than what you ask for. It could potentially give 2 bytes, 200 bytes, or even 500 bytes if you asked for 1024. It may also fill your buffer with the amount you asked for, but not always. Since file sizes vary, you could send an initial packet (say 4 bytes worth) that specifies the size of the file, that way the receiver knows how much to expect. You can then allocate a buffer of that size (or use a buffer 1024 bytes large) and continually recv and write whatever you receive up to the amount originally specified.

To make things easier, I'd suggest using some type of mechanism that would help you with your transport of data, such as Google Protocol Buffers.

I'd especially look at Beej's guide to socket programming (specifically the recv section). Although it's in C, it definitely translates over to Python.

Upvotes: 2

freakish
freakish

Reputation: 56477

The problem is that the client has to know the size of the file from the begining otherwise it won't know how much data it has to read. There already are protocols (like HTTP) which handle it for you but you can implement it on your own. Something like this:

server.py

if os.path.exists(filename):
    length = os.path.getsize(filename)
    client.send(convert_to_bytes(length)) # has to be 4 bytes
    with open(filename, 'r') as infile:
        d = infile.read(1024)
        while d:
            client.send(d)
            d = infile.read(1024)

client.py

client_socket.send(command)

if command.strip().startswith("get"):
    size = client_socket.recv(4) # assuming that the size won't be bigger then 1GB
    size = bytes_to_number(size)
    current_size = 0
    buffer = b""
    while current_size < size:
        data = client_socket.recv(socksize)
        if not data:
            break
        if len(data) + current_size > size:
            data = data[:size-current_size] # trim additional data
        buffer += data
        # you can stream here to disk
        current_size += len(data)
    # you have entire file in memory

Note that this is only an idea and there are many issues involved with it.

Of course you have to implement convert_to_bytes (it always has to return 4 bytes (or other fixed number) otherwise the protocol will be broken, add zeros if necessary) and bytes_to_number functions.

Example of such implementations:

def convert_to_bytes(no):
    result = bytearray()
    result.append(no & 255)
    for i in range(3):
        no = no >> 8
        result.append(no & 255)
    return result

def bytes_to_number(b):
    # if Python2.x
    # b = map(ord, b)
    res = 0
    for i in range(4):
        res += b[i] << (i*8)
    return res

Upvotes: 4

LtWorf
LtWorf

Reputation: 7598

Your problem is here:

while True:
    print 'Enter a command: list or get <filename>'
    command = raw_input()
    if command.strip() == 'quit':
        break
    client_socket.send(command)

    data = client_socket.recv(socksize)
    print data

#Close the socket
client_socket.close()

Your recv has a maximum size, so you won't be able to receive packets bigger than that. However packets are not unlimited in size, so normally a file can be transmitted over several packets.

What you need is to loop on a recv() and keep reading from it and writing to your file.

Then the simple solution is to have the server close the connection when the file is finished, so the client will notice this event and you can open a new connection and repeat the procedure.

If you really really want to re-use the same connection, the best thing to do is to transmit in advance the size of the data, and then start reading, counting the bytes. But you will need to read in a loop again.

If you don't know in advance the size of the data you are about to send, it gets messy because you'll have to send a special value to signify an end of file, but if that value appears inside the file as well you'd be in trouble, so you'd also need to apply some encoding to your file to make sure that the special value never appears in it.

Upvotes: 0

Related Questions