Reputation: 8234
I have written a service to download files from an external partner site. There are around 1000 files of 1 MB each. My process is going out of memory every time I reach around 800 files.
How should I identify the root cause ?
var request = require('sync-request');
var fs = require('graceful-fs')
function find_starting_url(xyz_category){
feed_url = "<url>"
response = request("GET", feed_url).getBody().toString()
response = JSON.parse(response)
apiListings = response['apiGroups']['affiliate']['apiListings']
starting_url = apiListings[xyz_category]['availableVariants']['v0.1.0']['get']
return starting_url
}
function get_all_files(feed_category, count, next_url, retry_count){
var headers = {
'Id': '<my_header>',
'Token': '<my key>'
}
console.log(Date())
console.log(count)
if(next_url){
products_url = next_url
}
else{
products_url = find_starting_url(feed_category)
}
try{
var products = request("GET", products_url, {"headers": headers}).getBody().toString()
var parsed = JSON.parse(products)
var home = process.env.HOME
var fd = fs.openSync(home + "/data/abc/xyz/" + feed_category + "/" + count + ".json", 'w')
fs.writeSync(fd, products)
fs.closeSync(fd)
next_url = parsed['nextUrl']
count++;
if(next_url){
get_all_files(feed_category, count, next_url)
}
}catch(e){
if(retry_count >= 5){
console.log("TERRIBLE ENDING!!!", e)
}else{
retry_count++;
console.log("some error... retrying ..", e)
get_all_files(feed_category, count, next_url, retry_count)
}
}
}
var feed_category = process.argv[2]
get_all_files(feed_category, 1)
Upvotes: 0
Views: 160
Reputation: 708206
You're calling a synchronous function recursively so every single request you have and all the data from each request is retained in memory in your local variables until all of the requests are done and all the recursive calls can unwind and then finally free all the sets of local variables. This requires monster amounts of memory (as you have discovered).
It would be best to restructure your code so that the current request is processed, written to disk and then nothing from that request is retained when it goes onto the next request. The simplest way to do that would be to use a while loop instead of a recursive call. In pseudo code:
initialize counter
while (more to do) {
process the next item
increment counter
}
I don't understand the details of what your code is trying to do well enough to propose a rewrite, but hopefully you can see how you can replace the recursion with the type of non-recursive structure above.
Upvotes: 4
Reputation: 4703
It's because you are performing a recursive call to the get_all_files
function and it's keeping the body
variable in memory for every single execution, since every child execution needs to be completed before the memory is released.
Upvotes: 1