Mandeep Singh
Mandeep Singh

Reputation: 8234

Node.js process out of memory

I have written a service to download files from an external partner site. There are around 1000 files of 1 MB each. My process is going out of memory every time I reach around 800 files.

How should I identify the root cause ?

var request = require('sync-request');
var fs = require('graceful-fs')
function find_starting_url(xyz_category){
  feed_url = "<url>"
    response = request("GET", feed_url).getBody().toString()
    response = JSON.parse(response)
  apiListings =  response['apiGroups']['affiliate']['apiListings'] 
  starting_url = apiListings[xyz_category]['availableVariants']['v0.1.0']['get']
  return starting_url   
}

function get_all_files(feed_category, count, next_url, retry_count){
  var headers = {
    'Id': '<my_header>',
    'Token': '<my key>'
  } 

  console.log(Date())
  console.log(count)

  if(next_url){
    products_url = next_url
  }
  else{
    products_url = find_starting_url(feed_category)
  }

  try{
        var products = request("GET", products_url, {"headers": headers}).getBody().toString()
        var parsed = JSON.parse(products)
        var home = process.env.HOME
        var fd = fs.openSync(home + "/data/abc/xyz/" + feed_category + "/" + count + ".json", 'w') 
        fs.writeSync(fd, products)
        fs.closeSync(fd)
        next_url = parsed['nextUrl']
        count++;
        if(next_url){
            get_all_files(feed_category, count, next_url)
        }
  }catch(e){
        if(retry_count >= 5){
            console.log("TERRIBLE ENDING!!!", e)
        }else{
            retry_count++;
            console.log("some error... retrying ..", e)
            get_all_files(feed_category, count, next_url, retry_count)
        }
    }

}

var feed_category = process.argv[2]
get_all_files(feed_category, 1)

Upvotes: 0

Views: 160

Answers (2)

jfriend00
jfriend00

Reputation: 708206

You're calling a synchronous function recursively so every single request you have and all the data from each request is retained in memory in your local variables until all of the requests are done and all the recursive calls can unwind and then finally free all the sets of local variables. This requires monster amounts of memory (as you have discovered).

It would be best to restructure your code so that the current request is processed, written to disk and then nothing from that request is retained when it goes onto the next request. The simplest way to do that would be to use a while loop instead of a recursive call. In pseudo code:

initialize counter
while (more to do) {
    process the next item
    increment counter
}

I don't understand the details of what your code is trying to do well enough to propose a rewrite, but hopefully you can see how you can replace the recursion with the type of non-recursive structure above.

Upvotes: 4

Brian Shamblen
Brian Shamblen

Reputation: 4703

It's because you are performing a recursive call to the get_all_files function and it's keeping the body variable in memory for every single execution, since every child execution needs to be completed before the memory is released.

Upvotes: 1

Related Questions