David Culbreth
David Culbreth

Reputation: 2796

socket.makefile not receiving response Python 3.6

I've written a very simple http proxy script in python, but for some reason, it always hangs when a request is made.

If you run this with python 3.6, and try to use it by browsing to localhost:8080/www.google.com or whatever website you want in the browser of your choice, you will find that the script hangs after it gets a cache miss on its local files, and tries to request the file from www.google.com

from socket import *

def main():

    # Create a server socket, bind it to a port and start listening
    tcpSerSock = socket(AF_INET, SOCK_STREAM)
    tcpSerSock.bind(('localhost', 8080))
    tcpSerSock.listen(5)
    while 1:
        # Start receiving data from the client
        print('Ready to serve...')
        tcpCliSock, addr = tcpSerSock.accept()
        print('Received a connection from:', addr)
        message = tcpCliSock.recv(1024)
        print (message)
        # Extract the filename from the given message
        filename = message.split()[1].partition(b"/")[2]
        print ("Full File Name: ", filename)
        fileExist = False
        filetouse = b"/" + filename
        print ("File to use:", filetouse)
        try:
            # Check wether the file exist in the cache
            f = open(filetouse[1:], "r")
            outputdata = f.readlines()
            fileExist = True
            # ProxyServer finds a cache hit and generates a response message
            tcpCliSock.send(b"HTTP/1.0 200 OK\r\n")
            tcpCliSock.send(b"Content-Type:text/html\r\n")
            for line in outputdata:
                tcpCliSock.send(bytes(line, 'utf-8'))
                print("Read From Cache")
        # Error handling for file not found in cache
        except IOError:
            if fileExist is False:
                # Create a socket on the proxyserver
                c = socket(AF_INET, SOCK_DGRAM)
                hostn = filename.split(b'/')[0].replace(b"www.",b"",1)
                print("Host Name: ", hostn)
                try:
                    # Connect to the socket to port 80
                    c.connect((hostn, 80))
                    # Create a temporary file on this socket and ask port 80
                    # for the file requested by the client
                    fileobj = c.makefile(mode='rwb')
                    fileobj.write(b"GET http://" + filename + b"\r\nHost: "+hostn+b"\r\n HTTP/1.1\r\nConnection: keep-alive\r\nCache-Control: max-age=0\r\nUpgrade-Insecure-Requests: 1\r\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\r\nDNT: 1\r\nAccept-Encoding: gzip, deflate, br\r\nAccept-Language: en-US,en;q=0.9,es-US;q=0.8,es;q=0.7\r\n\r\n")
                    fileobj.flush()
                    # Read the response into buffer
                    print("Waiting for response...")
                    buffer = fileobj.readlines()
                    # Create a new file in the cache for the requested file.
                    # Also send the response in the buffer to client socket and the corresponding file in the cache
                    tmpFile = open(b"./" + filename,"w+b")
                    for line in buffer:
                        tmpFile.write(line)
                        tcpCliSock.send(bytes(line, "utf-8"))
#                     tmpFile.close()
                except error as err:
                    print(err)
                    print("Illegal request")
            else:
                # HTTP response message for file not found
                tcpCliSock.send(b"HTTP/1.0 404 NOT FOUND\r\n")
                tcpCliSock.send(b"Content-Type:text/html\r\n")
                tcpCliSock.send(b"<html><header><title>404: Page Not Found</title></header><body>ERROR 404: PAGE NOT FOUND</body></html>")
    # Close the client and the server sockets
    tcpCliSock.close()
    tcpSerSock.close()

if __name__ == "__main__":
    main()

Running this script on my machine and navigating to my server in Chrome, I get the following output, indicating my local server is receiving a connection from the browser, but that I'm not receiving a response from Google.com.

Ready to serve...
Received a connection from: ('127.0.0.1', 51909)
b'GET /www.google.com HTTP/1.1\r\nHost: localhost:8080\r\nConnection: keep-alive\r\nCache-Control: max-age=0\r\nUpgrade-Insecure-Requests: 1\r\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\r\nDNT: 1\r\nAccept-Encoding: gzip, deflate, br\r\nAccept-Language: en-US,en;q=0.9,es-US;q=0.8,es;q=0.7\r\n\r\n'
Full File Name:  b'www.google.com'
File to use: b'/www.google.com'
Host Name:  b'google.com'
Waiting for response...

I don't know where my error is, or if I'm not meeting some protocol (Google isn't the only that doesn't send a response). Any ideas?

Upvotes: 0

Views: 1553

Answers (1)

Steffen Ullrich
Steffen Ullrich

Reputation: 123561

  fileobj.write(b"GET http://" + filename + b"\r\nHost: "+hostn+b"\r\n HTTP/1.1\r\n...

This does not look like a proper HTTP request. It will result in something like this:

  GET http://www.google.com
  Host: google.com
   HTTP/1.1
  ...

which is broken for multiple reasons: no proper HTTP and attempt to use a full URL instead of the relative path when taking to the server (instead of a proxy). Apart from that:

   buffer = fileobj.readlines()

Here you read all the data you can get and thus implicitly expect the server to close the connection after the request is done. But since you are also using HTTP keep-alive the server might just keep the connection open to wait for more requests. You need to properly parse the response header instead in order to know how large the response will be, i.e. check for Content-length, deal with Transfer-Encoding chunked (in case of HTTP/1.1) etc.

Upvotes: 2

Related Questions