Reputation: 539
I have below data coming from an url. Now I want to transform the data into new line delimited JSON using any method that gives me the lowest response time. It is a lot of data and hence streaming made sense to me.
03 Jan 2000,30.00,30.89,29.31,30.89,12487930,,,,
04 Jan 2000,32.30,32.35,29.40,30.38,7095350,,,,
05 Jan 2000,29.90,30.10,28.00,29.20,5044130,,,,
06 Jan 2000,29.50,30.34,29.20,29.41,3988780,,,,
07 Jan 2000,29.20,29.48,27.93,28.50,6264940,,,,
I started with the idea of using res.write and res.end. I have used both axios and request packages for experimental purpose!!!
Attempt#1 Status: Successful, TAT: 1.25s
function streamPrice(req, res) {
const { url } = req.query;
res.set({
"Cache-Control": "no-store, must-revalidate",
"Connection": "keep-alive",
"TRANSFER-ENCODING": "chunked"
});
axios(url)
.then(response => {
const {
data
} = response;
const rawdata = data.split("\n");
const opLen = rawdata.length;
for (let i = opLen -1 ; i >=0 ; i--) {
const cols = rawdata[i].split(",");
const elem = {
date: parseDateEOD(cols[0]),
open: +cols[1],
high: +cols[2],
low: +cols[3],
close: +cols[4],
volume: +cols[5],
bonus: cols[6],
dividend: cols[7],
rights: cols[8],
split: cols[9],
absoluteChange: 0,
percentChange: 0
};
res.write(JSON.stringify(elem) + "\n\n");
res.flush();
}
res.end(null);
return;
})
.catch(e => {
logger.info(e);
return;
});
}
But I am not happy with the response time of 1.25s and want it to be in milliseconds. The mere calling of the data from the url using request takes 45ms so I would like to transform the formatted data back to the client in <250-300ms. Someone suggested using piping increases speed so here is my second attempt using piping
Attempt#2 Status: Successful but no transformation, TAT: 250ms
function streamPrice = function(req,res){
const { url } = req.query;
const reqs = request(url);
reqs.pipe(res);
}
Now I wanted to transform each record and convert it into a newline delimited JSON so I wrote the below:
Attempt#3 Status: Unsuccessful, TAT: 250ms
function streamPrice = function(req,res){
const { url } = req.query;
const reqs = request(url,function(error, response, body){
const rawdata = body.split("\n");
const opLen = rawdata.length;
let result;
for (let i = opLen - 1; i >= 0; i--) {
const cols = rawdata[i].split(",");
const elem = JSON.stringify({
date: parseDateEOD(cols[0]),
open: +cols[1],
high: +cols[2],
low: +cols[3],
close: +cols[4],
volume: +cols[5],
bonus: cols[6],
dividend: cols[7],
rights: cols[8],
split: cols[9],
absoluteChange: 0,
percentChange: 0
}) + "\n\n";
if(i===0){
result = elem;
} else {
result+=elem;
}
}
return result;
});
reqs.pipe(res);
}
But the resultant data is still the untransformed data set
Few queries: 1. What am I doing wrong? 2. Should I use a transform stream before piping out to res. If yes, can you please help me with the transform stream that would convert the chunks into the below new line delimited JSON format
{"date":"2017-05-12T00:00:00.000Z","open":1562,"high":1562.4,"low":1540.1,"close":1548.85,"volume":28485,"bonus":"","dividend":"","rights":"","split":"","absoluteChange":0,"percentChange":0}
{"date":"2017-05-11T00:00:00.000Z","open":1572.8,"high":1580,"low":1555.15,"close":1561.7,"volume":336193,"bonus":"","dividend":"","rights":"","split":"","absoluteChange":0,"percentChange":0}
{"date":"2017-05-10T00:00:00.000Z","open":1530.95,"high":1575,"low":1528.55,"close":1572.8,"volume":74108,"bonus":"","dividend":"","rights":"","split":"","absoluteChange":0,"percentChange":0}
Please let me know in case of any further information
Upvotes: 0
Views: 490
Reputation: 21811
Your attempts start with spliting the full response data into lines with data.split('\n')
, which means the response has to be read twice, once for line-splitting, and once for data point parsing.
You should pipe the response to readline:
const request = require('request');
const readline = require('readline');
function streamPrice = function(req,res){
const { url } = req.query;
function transformLine (rawdata) {
const elem;
// parse
res.write(JSON.stringify(elem) + "\n\n");
}
request.get(url)
// response is an instance of http.IncomingMessage, a readable stream
.on('response', function(response) {
readline.createInterface({
input: response
}).on('line', transformLine);
});
}
Your individual lines are not that long, so JSON.stringify should be no problem.
Since you say identifying datasets may be more involved than reading lines, you could implement your own Transform stream:
class ResponseToJSON extends Transform {
constructor (options) {
this.cache = '';
super(options);
}
_transform (data, encoding, callback) {
const chunk = data.toString(encoding);
// get all yet complete lines
for (let line of this.splitData(chunk) {
this.transformLine(line);
}
callback();
}
splitData (chunk) {
//prepend the chunk with the cache content
chunk = this.cache + chunk;
// apply logic to identify single dataset endings
// and write result into an array
const lines = ...
// the last line may be yet incomplete
this.cache = lines.pop();
return lines;
}
transformLine (rawData) {
const elem;
// parse
// push out to receiving stream
this.push(line);
}
_flush (callback) {
// make sure the last line is transformed
this.transformLine(this.cache);
callback();
}
}
function streamPrice = function(req, res){
const { url } = req.query;
const transform = new ResponseToJSON();
transform.pipe(res);
request.get(url).on('response', function(response) {
response.pipe(transform);
}
}
This solution may slow things down agin a bit, since the line splitting again works on strings. If you see a way to identify datasets directly on the buffer, this would be certainly more efficient.
Upvotes: 1