AdamPat
AdamPat

Reputation: 101

Node Async ReadStream from SFTP connection

So I'm creating a class and ultimately want to create a method that takes a file on an SFTP server and produces a readstream that can be piped into other streams / functions. I'm most of the way there, except my readStream is acting strangely. Here's the relevant code:

const Client = require('ssh2').Client,
      Readable = require('stream').Readable,
      async = require('async');

/**
 * Class Definition stuff
 * ......
 */

getStream(get) {
    const self = this;
    const rs = new Readable;
    rs._read = function() {
        const read = this;
        self.conn.on('ready', function(){
            self.conn.sftp(function(err,sftp) {
                if(err) return err;

                sftp.open(get, 'r', function(err, fd){
                    sftp.fstat(fd, function(err, stats) {

                        let bufferSize = stats.size,
                            chunkSize = 512,//bytes
                            buffer = new Buffer(bufferSize),
                            bytesRead = 0;

                        async.whilst(
                            function () {
                                return bytesRead < bufferSize;
                            },
                            function (done) {
                                sftp.read(fd, buffer, bytesRead, chunkSize, bytesRead,
                                function (err, bytes, buff) {
                                    if (err) return done(err);
                                    // console.log(buff.toString('utf8'));
                                    read.push(buff);
                                    bytesRead += bytes;
                                    done();
                                });
                            },
                            function (err) {
                                if (err) console.log(err);
                                read.push(null);
                                sftp.close(fd);
                            }
                        );

                    });
                });
            });
        }).connect(self.connectionObj);
    }
    return rs;

}

Elsewhere, I would call this method like so:

let sftp = new SFTP(credentials);

sftp.getStream('/path/file.csv')
.pipe(toStuff);
.pipe(toOutput);

So, long story short. During the SFTP.read operation read.push(buff) keeps pushing the same first part of the file over and over. However, when I console.log(buff) it correctly streams the full file?

So I'm scratching my head wondering what I'm doing wrong with the read stream that it's only pushing the beginning of the file and not continuing on to the next part of the buffer.

Here's the docs on SSH2 SFTP client: https://github.com/mscdex/ssh2-streams/blob/master/SFTPStream.md

I used this SO question as inspiration for what I wrote above: node.js fs.read() example

This is similar/related: Reading file from SFTP server using Node.js and SSH2

Upvotes: 1

Views: 1667

Answers (1)

AdamPat
AdamPat

Reputation: 101

Ok, after a lot of trouble, I realized I was making a couple mistakes. First, the _read function is called every time the stream is ready to read more data, which means, the SFTP connection was being started everytime _read was called. This also meant the sftp.read() function was starting over each time, reseting the starting point back to the beginning.

I needed a way to first setup the connection, then read and stream the file data, so I chose the library noms. Here's the final code if anyone is interested:

getStream (get) {
    const self = this;

    let connection,
        fileData,
        buffer,
        totalBytes = 0,
        bytesRead = 0;

    return nom(
        // _read function
        function(size, next) {
            const read = this;

            // Check if we're done reading
            if(bytesRead === totalBytes) {
                connection.close(fileData);
                connection.end();
                self.conn.end();
                console.log('done');
                return read.push(null);
            }

            // Make sure we read the last bit of the file
            if ((bytesRead + size) > totalBytes) {
                size = (totalBytes - bytesRead);
            }

            // Read each chunk of the file
            connection.read(fileData, buffer, bytesRead, size, bytesRead,
                function (err, byteCount, buff, pos) {
                    // console.log(buff.toString('utf8'));
                    // console.log('reading');
                    bytesRead += byteCount;
                    read.push(buff);
                    next();
                }
            );
      },
      // Before Function
      function(start) {
          // setup the connection BEFORE we start _read
          self.conn.on('ready', function(){
              self.conn.sftp(function(err,sftp) {
                  if(err) return err;
                  sftp.open(get, 'r', function(err, fd){
                      sftp.fstat(fd, function(err, stats) {
                          connection = sftp;
                          fileData = fd;
                          totalBytes = stats.size;
                          buffer = new Buffer(totalBytes);
                          console.log('made connection');
                          start();
                      });
                  });
              });
          }).connect(self.connectionObj);
      })
}

Always looking for feedback. This doesn't run quite as fast as I'd hope, so let me know if you have ideas on speeding up the stream.

Upvotes: 1

Related Questions