nick_j_white
nick_j_white

Reputation: 644

Http request redirect and cookie handling in node.js

I am writing an application in node.js which will need to make http requests to external websites, and will need to include the ability to login and manage the cookies from these sites so that the session id is always present in any subsequent request headers.

When performing similar tasks in Java, this was straightforward using the java.net.CookieHandler & java.net.CookieManager, together with java.net.HttpURLConnection to make the requests (I can provide sample code for this is it's useful, but don't want to confuse this thread too much for now since the focus should be on the node.js implementation): every time a request is made, the cookie is correctly updated and maintained based on the Set-Cookie response headers as expected.

For the node.js application I am attempting to use restler for httq requests ^3.2.2, and cookie-manager ^0.0.19. This seems to require manually setting the cookie in the request header when sending each request, and updating the cookie based on the response headers whenever a request is completed. Sample code for a login request:

var _ = require('lodash'),
    restler = require('restler'),
    CM = require('cookie-manager'),
    cm = new CM();

var url = 'https://' + host1 + '/page';
    restlerOptions = {
        //Set the cookie for host1 in the request header
        headers : {'Cookie': cm.prepare( host1 )},
        followredirects: true,
        timeout: 5000,
        multipart: false,
        //post vars defined elsewhere for the request
        data: postVars
        };

//Various callback functions defined elsewhere for each request
restler.post(url,restlerOptions).on('complete',function(data,res){
    if (res.headers["set-cookie"] != null){
        //Loop through response cookies and add to cookie store for host1
        cm.store(
            host1,_.map(res.headers["set-cookie"], function(cookie){
                    return cookie.split(';')[0];
                }, "").join(";")
        );
    }
    successcallback(data,res);
}).on("timeout",function(){
    timeoutcallback();
}).on("error",function(err){
    errorcallback(err);
});

The problem I am facing is with redirects: sometimes the login pages for the 3rd party sites involve redirecting to a new host/sub domain etc. What should happen is follow-up GET requests should be made to the new host, and a new cookie should be managed for the redirect host. The final redirect should return to the original host, and the original cookie should still be in use. Example request headers for this process:

Req1 headers:
POST https://host1/page HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0 //Cookie already present for host

Req1 response:
HTTP/1.1 302 Found
Set-cookie: host1-cookie1=val1
Set-cookie: host1-cookie2=val2
Location: https://host2/page

Req2 headers:
GET https://host2/page HTTP/1.1
Host: host2
<no cookie> //No cookie set yet for new host

Req2 response:
HTTP/1.1 302 Found
Set-cookie: host2-cookie1=val3
Set-cookie: host2-cookie2=val4
Location: https://host1/result

Req3 headers:
GET https://host1/result HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie1=val1; host1-cookie2=val2; //Cookies from Re1 response appended for host1

Req3 response:
HTTP/1.1 200 OK
Set-cookie: host1-cookie3=val5
Set-cookie: host1-cookie4=val6

Req4 headers:
GET https://host1/newpage HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie1=val1; host1-cookie2=val2; host1-cookie3=val5; host1-cookie4=val6 //All cookies set as expected for host1

I am seeing 3 issues:

  1. redirects are followed with a POST
  2. the same cookie that is set with the original request header is used in all follow up requests, regardless of follow-up host changes or any cookies set from redirect response headers (seems they only get set once a response with 200 status is received.)
  3. The cookie setting code I am using above is supposed to iterate through all "Set-cookie" headers, and set the first part of each string in the cookie. However, it only seems to be doing this for the first "Set-Cookie" header it encounters.

Example below:

Req1 headers:
POST https://host1/page HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0 //Cookie already present for host

Req1 response:
HTTP/1.1 302 Found
Set-cookie: host1-cookie1=val1
Set-cookie: host1-cookie2=val2
Location: https://host2/page

Req2 headers:
POST https://host2/page HTTP/1.1 //This should be GET not POST!
Host: host2
Cookie: host1-cookie0=val0 //This should not be set!

Req2 response:
HTTP/1.1 302 Found
Set-cookie: host2-cookie1=val3
Set-cookie: host2-cookie2=val4
Location: https://host1/result

Req3 headers:
POST https://host1/result HTTP/1.1 //This should be GET not POST!
Host: host1
Cookie: host1-cookie0=val0 //Req1 response cookies not set!

Req3 response:
HTTP/1.1 200 OK
Set-cookie: host1-cookie3=val5
Set-cookie: host1-cookie4=val6

Req4 headers:
GET https://host1/newpage HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie3=va51; //Only first cookie from Req3 response is appended

Is this a limitation of restler/cookie-manager libraries being used this way, or does the approach need to be smarter (e.g. not using automatic redirects, and manually sending follow up requests as GET with a new cookie)? Although the applciation being built is constrained to run in node.js, there is no constraint on the libraries being used, so if it is sensible to switch to other http/cookie management libraries I am open to doing this.

Upvotes: 0

Views: 2343

Answers (1)

nick_j_white
nick_j_white

Reputation: 644

1) To prevent automatic redirect follow-up with POST, I had to add "followRedirects: false" to the initial request options, and then manually resubmit a GET request if the response code was one of [301,302,303].

2) Since redirects are being done manually, I was able to manually set the cookie based on the new domain on each request.

3) There was no need to extract values from each "Set-cookie" header and append them into a single string - cookie-manager does this automatically.

New code which works (combining fixes 1,2,3 above):

var _ = require('lodash'),
    restler = require('restler'),
    CM = require('cookie-manager'),
    cm = new CM();

var url = 'https://' + host1 + '/page';
    restlerOptions = {
        //Set the cookie for host1 in the request header
        headers : {'Cookie': cm.prepare( host1 )},
        followRedirects: false,
        timeout: 5000,
        multipart: false,
        //post vars defined elsewhere for the request
        data: postVars
        };

//Various callback functions defined elsewhere for each request
restler.post(url,restlerOptions).on('complete',function(data,res){
    cm.store(host1, res.headers["set-cookie"]);
    if ([301,302,303].indexOf(res.statusCode) > -1){
        redirectcallback(res.headers["location"],successcallback,errorcallback,timeoutcallback);
    } else successCallback(data);
}).on("timeout",function(){
    timeoutcallback();
}).on("error",function(err){
    errorcallback(err);
});

function redirectCallback(url,successcallback,errorcallback,timeoutcallback){
    var options = {
        //Set the cookie for new host in the request header
        headers : {'Cookie': cm.prepare( getHostFromUrl(url) )}, //getHostFromUrl strips page/queryparams from URL - cookie manager doesn't seem to do this automatically
        followRedirects: false,
        timeout: 5000,
        };

    restler.get(url,restlerOptions).on('complete',function(data,res){
        cm.store(getHostFromUrl(url), res.headers["set-cookie"]);
        if ([301,302,303].indexOf(res.statusCode) > -1){
            redirectcallback(res.headers["location"],successcallback,errorcallback,timeoutcallback);
        } else successCallback(data);
    }).on("timeout",function(){
        timeoutcallback();
    }).on("error",function(err){
        errorcallback(err);
    });
}

Upvotes: 0

Related Questions