Behzad Pirvali
Behzad Pirvali

Reputation: 784

java nio SocketChannel.read does not return -1 to indicate end-of-stream

I am writing a code, which is using NIO/Selector to do web scraping. It works. I do get OP_CONNECT, then I send the GET request, and get the entire html page back. But, after that, I do not get a -1 to know it is finished. I do see , which means the entire page has been sent, but SocketChannel.read does not return -1 to indicate the end of the stream. Would really appreciate any help!

Here is the entire sample code:

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.StandardSocketOptions;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.channels.SelectionKey;
import java.nio.channels.Selector;
import java.nio.channels.SocketChannel;
import java.util.Iterator;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class HttpClientTest {
    private static final Logger logger = LoggerFactory.getLogger(HttpClientTest.class);
    private static final String BASE_URL_STR = "https://www.youtube.com/channel";
    private static final String CHANNEL_ID = "UCDm6kPZFCoT7altG4WNGy-A";

    private final ByteArrayOutputStream baHtmlPage = new ByteArrayOutputStream();
    private final ByteBuffer buffer = ByteBuffer.allocate(128 * 1024);

    private String htmlPage = null;

    private void startHttpClient() throws InterruptedException {


        // open Selector and ServerSocketChannel by calling the open() method
        try (Selector selector = Selector.open();
                SocketChannel socketChannel = SocketChannel.open()) {

            // check that both of them were successfully opened
            if ((socketChannel.isOpen()) && (selector.isOpen())) {

                // configure non-blocking mode
                socketChannel.configureBlocking(false);
                socketChannel.setOption(StandardSocketOptions.SO_RCVBUF,
                        128 * 1024);
                socketChannel.setOption(StandardSocketOptions.SO_SNDBUF,
                        128 * 1024);
                socketChannel.setOption(StandardSocketOptions.SO_KEEPALIVE,
                        true);
                //socketChannel.setOption(StandardSocketOptions.TCP_NODELAY,
                //      true);

                //socketChannel.connect(new InetSocketAddress(IP, DEFAULT_PORT));
                socketChannel.connect(createSocketAddress(CHANNEL_ID));

                // register the current channel with the given selector
                socketChannel.register(selector, SelectionKey.OP_CONNECT);


                while (true) {
                    // wait for incomming events
                    int num = selector.selectNow();
                    if (num==0) {
                        //Thread.yield();
                        Thread.sleep(2000);
                        System.out.println("sleep: 2 sec");
                        continue;
                    }


                    // there is something to process on selected keys
                    Iterator<SelectionKey> keys = selector.selectedKeys().iterator();
                    while (keys.hasNext()) {
                        SelectionKey key = (SelectionKey) keys.next();

                        // prevent the same key from coming up again
                        keys.remove();

                        if (!key.isValid()) {
                            continue;
                        }

                        if (key.isConnectable() && socketChannel.finishConnect()) {
                            System.out.println("Key: OP_CONNECT");
                            // reset the byte-array
                            baHtmlPage.reset();

                            // Connected --> Send the HTTP request 
                            key.interestOps(SelectionKey.OP_WRITE);

                        } else if (key.isReadable()) {
                            System.out.println("Key: OP_READ");
                            if (readResponse(key)) {
                                logger.info("finished reading, htmlpage:{}", htmlPage);
                            } else {
                                key.interestOps(SelectionKey.OP_READ);
                            }

                            // Once read is done --> we are done
                            //key.interestOps(SelectionKey.OP_WRITE);

                        } else if (key.isWritable()) {
                            System.out.println("Key: OP_WRITE");
                            if (writeHttpRequest(key)) {                            
                                // HTTP request is sent --> Get the response
                                key.interestOps(SelectionKey.OP_READ);
                            }
                        }
                    }

                }
            } else { // if ((serverSocketChannel.isOpen()) && (selector.isOpen())) {
                System.out
                        .println("The server socket channel or selector cannot be opened!");
            }
        } catch (IOException ex) {
            System.err.println(ex);
        }
    }

    private static InetSocketAddress createSocketAddress(String channelID) throws MalformedURLException {
        //String urlStr = BASE_URL_STR + "/" + CHANNEL_ID;  
        String urlStr = "http://www.google.com";  

        URL url = new URL(urlStr);
        String host = url.getHost();  
        int port = url.getPort();  
        if (port == -1) 
            port = 80;

        return new InetSocketAddress(host, port);
    }

    private boolean readResponse(SelectionKey key) throws IOException {
        boolean done = false;
        SocketChannel socketChannel = (SocketChannel) key.channel();

        int numRead = -1;
        do {
            buffer.clear();
            numRead = socketChannel.read(buffer);

            baHtmlPage.write(buffer.array(), 0, numRead);
            System.out.println("Server sent:" + new String(buffer.array(), 0, numRead, "UTF-8") );
        } while(numRead>0);

        if (numRead == -1) {
            System.out.println("Connection closed by: " + socketChannel.getRemoteAddress());
            key.cancel();
            socketChannel.close();
            htmlPage = baHtmlPage.toString("UTF-8");
            done = true;
        }
        return done;
    }

    private boolean writeHttpRequest(SelectionKey key) throws IOException {
        boolean done = false;

        SocketChannel socketChannel = (SocketChannel) key.channel();
        String request = 
                "GET /channel/UCDm6kPZFCoT7altG4WNGy-A HTTP/1.1\r\n" + 
                "Host: www.youtube.com\r\n" +
                "Cache-Control: no-cache\r\n\r\n"; 

        // ISO-8859-1
        ByteBuffer randomBuffer = ByteBuffer.wrap(request.getBytes("UTF-8"));
        int rem = randomBuffer.remaining();
        int num = socketChannel.write(randomBuffer);

        if (rem==num) {
            done = true;
            System.out.printf("Request written:%s\n", request);
        }
        return done;
    }

//  private void doEchoJob(SelectionKey key, byte[] data) {
//
//      SocketChannel socketChannel = (SocketChannel) key.channel();
//      List<byte[]> channelData = keepDataTrack.get(socketChannel);
//      channelData.add(data);
//
//      key.interestOps(SelectionKey.OP_WRITE);
//  }

    public static void main(String[] args) throws InterruptedException {
        HttpClientTest client = new HttpClientTest();
        client.startHttpClient();
    }
}

Upvotes: 0

Views: 1517

Answers (1)

Steffen Ullrich
Steffen Ullrich

Reputation: 123260

You are doing a HTTP/1.1 requests, which has an implicit keep-alive. That means, that the server will not necessary close the connection once the full response is sent, but instead will keep it open for a while in the hope that it will get more requests and thus can save the overhead of another TCP connection setup.

While this helps with performance in the normal case of a browser, it does not help in your case. I would recommend to use HTTP/1.0 instead of HTTP/1.1, so that you don't have to deal with keep-alive or other HTTP/1.1 features like chunked encoding. Apart from that it is recommended to use existing HTTP libraries which deal with all these problems already.

Upvotes: 2

Related Questions