maudulus
maudulus

Reputation: 11035

Promise Resolving before Google Cloud Bucket Upload

I am writing some code that loops over a CSV and creates a JSON file based on the CSV. Included in the JSON is an array named photos, which is to contain the returned urls for the images that are being uploaded to Google Cloud Storage within the function. However, having the promise wait for the uploads to finish has me stumped, since everything is running asynchronously, and finishes off the promise and the JSON compilation prior to finishing the bucket upload and returning the url. How can I make the promise resolve after the urls have been retrieved and added to currentJSON.photos?

const csv=require('csvtojson')
const fs = require('fs');
const {Storage} = require('@google-cloud/storage');
var serviceAccount = require("./my-firebase-storage-spot.json");
const testFolder = './Images/';
var csvFilePath = './Inventory.csv';

var dirArr = ['./Images/Subdirectory-A','./Images/Subdirectory-B','./Images/Subdirectory-C'];
var allData = [];

csv()
.fromFile(csvFilePath)
.subscribe((json)=>{
  return new Promise((resolve,reject)=>{
    for (var i in dirArr ) {
      if (json['Name'] == dirArr[i]) {

        var currentJSON = {
          "photos" : [],
        };         

        fs.readdir(testFolder+json['Name'], (err, files) => {
          files.forEach(file => {
            if (file.match(/.(jpg|jpeg|png|gif)$/i)){
              var imgName = testFolder + json['Name'] + '/' + file;
              bucket.upload(imgName, function (err, file) {
                if (err) throw new Error(err);
                //returned uploaded img address is found at file.metadata.mediaLink
                currentJSON.photos.push(file.metadata.mediaLink);
              });              
            }else {
              //do nothing
            }
          });
        });
        allData.push(currentJSON);
      }
    }

    resolve(); 
  })
},onError,onComplete);

function onError() {
  // console.log(err)
}
function onComplete() {
  console.log('finito');
}

I've tried moving the resolve() around, and also tried placing the uploader section into the onComplete() function (which created new promise-based issues).

Upvotes: 5

Views: 1284

Answers (4)

Rohit Nishad
Rohit Nishad

Reputation: 2755

You are looking for this library ELT.

You can read rows from CSV in parallel and process them in parallel rather than one by one.

I have tried to explain the lines in the code below. Hopefully, it makes sense.

const etl = require("etl");
const fs = require("fs");
const csvFilePath = `${__dirname }/Inventory.csv`;
const testFolder = "./Images/";

const dirArr = [
  "./Images/Subdirectory-A",
  "./Images/Subdirectory-B",
  "./Images/Subdirectory-C"
];

fs.createReadStream(csvFilePath)
  .pipe(etl.csv()) // parse the csv file
  .pipe(etl.collect(10)) // this could be any value depending on how many you want to do in parallel.
  .pipe(etl.map(async items => {
    return Promise.all(items.map(async item => { // Iterate through 10 items
      const finalResult = await Promise.all(dirArr.filter(i => i === item.Name).map(async () => { // filter the matching one and iterate
        const files = await fs.promises.readdir(testFolder + item.Name); // read all files
        const filteredFiles = files.filter(file => file.match(/\.(jpg|jpeg|png|gif)$/i)); // filter out only images
        const result = await Promise.all(filteredFiles).map(async file => {
          const imgName = `${testFolder}${item.Name}/${file}`;
          const bucketUploadResult = await bucket.upload(imgName); // upload image
          return bucketUploadResult.metadata.mediaLink;
        });
        return result; // This contains all the media link for matching files
      }));
      // eslint-disable-next-line no-console
      console.log(finalResult); // Return arrays of media links for files
      return finalResult;
    }));
  }))
  .promise()
  .then(() => console.log("finsihed"))
  .catch(err => console.error(err));

Upvotes: 0

dmitrydwhite
dmitrydwhite

Reputation: 826

Here's a way to do it where we extract some of the functionality into some separate helper methods, and trim down some of the code. I had to infer some of your requirements, but this seems to match up pretty closely with how I understood the intent of your original code:

const csv=require('csvtojson')
const fs = require('fs');
const {Storage} = require('@google-cloud/storage');
var serviceAccount = require("./my-firebase-storage-spot.json");
const testFolder = './Images/';
var csvFilePath = './Inventory.csv';

var dirArr = ['./Images/Subdirectory-A','./Images/Subdirectory-B','./Images/Subdirectory-C'];
var allData = [];

// Using nodejs 'path' module ensures more reliable construction of file paths than string manipulation:
const path = require('path');

// Helper function to convert bucket.upload into a Promise
// From other responses, it looks like if you just omit the callback then it will be a Promise
const bucketUpload_p = fileName => new Promise((resolve, reject) => {
  bucket.upload(fileName, function (err, file) {
    if (err) reject(err);

    resolve(file);
  });
});

// Helper function to convert readdir into a Promise
// Again, there are other APIs out there to do this, but this is a rl simple solution too:
const readdir_p = dirName => new Promise((resolve, reject) => {
  fs.readdir(dirName, function (err, files) {
    if (err) reject(err);

    resolve(files);
  });
});

// Here we're expecting the string that we found in the "Name" property of our JSON from "subscribe".
// It should match one of the strings in `dirArr`, but this function's job ISN'T to check for that,
// we just trust that the code already found the right one.
const getImageFilesFromJson_p = jsonName => new Promise((resolve, reject) => {
  const filePath = path.join(testFolder, jsonName);

  try {
    const files = await readdir_p(filePath);

    resolve(files.filter(fileName => fileName.match(/\.(jpg|jpeg|png|gif)$/i)));
  } catch (err) {
    reject(err);
  }
});

csv()
.fromFile(csvFilePath)
.subscribe(async json => {
  // Here we appear to be validating that the "Name" prop from the received JSON matches one of the paths that
  // we're expecting...?  If that's the case, this is a slightly more semantic way to do it.
  const nameFromJson = dirArr.find(dirName => json['Name'] === dirName);

  // If we don't find that it matches one of our expecteds, we'll reject the promise.
  if (!nameFromJson) {
    // We can do whatever we want though in this case, I think it's maybe not necessarily an error:
    // return Promise.resolve([]);
    return Promise.reject('Did not receive a matching value in the Name property from \'.subscribe\'');
  }

  // We can use `await` here since `getImageFilesFromJson_p` returns a Promise
  const imageFiles = await getImageFilesFromJson_p(nameFromJson);
  // We're getting just the filenames; map them to build the full path
  const fullPathArray = imageFiles.map(fileName => path.join(testFolder, nameFromJson, fileName));

  // Here we Promise.all, using `.map` to convert the array of strings into an array of Promises;
  // if they all resolve, we'll get the array of file objects returned from each invocation of `bucket.upload`
  return Promise.all(fullPathArray.map(filePath => bucketUpload_p(filePath)))
    .then(fileResults => {
      // So, now we've finished our two asynchronous functions; now that that's done let's do all our data
      // manipulation and resolve this promise

      // Here we just extract the metadata property we want
      const fileResultsMediaLinks = fileResults.map(file => file.metadata.mediaLink);

      // Before we return anything, we'll add it to the global array in the format from the original code
      allData.push({ photos: fileResultsMediaLinks });

      // Returning this array, which is the `mediaLink` value from the metadata of each of the uploaded files.
      return fileResultsMediaLinks;
    })
}, onError, onComplete);

Upvotes: 0

Ashish Modi
Ashish Modi

Reputation: 7770

The problem is the your code is not waiting in your forEach. I would highly recommend to look for stream and try to do things in parallel as much as possible. There is one library which is very powerful and does that job for you. The library is etl.

You can read rows from csv in parallel and process them in parallel rather than one by one.

I have tried to explain the lines in the code below. Hopefully it makes sense.

const etl = require("etl");
const fs = require("fs");
const csvFilePath = `${__dirname }/Inventory.csv`;
const testFolder = "./Images/";

const dirArr = [
  "./Images/Subdirectory-A",
  "./Images/Subdirectory-B",
  "./Images/Subdirectory-C"
];

fs.createReadStream(csvFilePath)
  .pipe(etl.csv()) // parse the csv file
  .pipe(etl.collect(10)) // this could be any value depending on how many you want to do in parallel.
  .pipe(etl.map(async items => {
    return Promise.all(items.map(async item => { // Iterate through 10 items
      const finalResult = await Promise.all(dirArr.filter(i => i === item.Name).map(async () => { // filter the matching one and iterate
        const files = await fs.promises.readdir(testFolder + item.Name); // read all files
        const filteredFiles = files.filter(file => file.match(/\.(jpg|jpeg|png|gif)$/i)); // filter out only images
        const result = await Promise.all(filteredFiles).map(async file => {
          const imgName = `${testFolder}${item.Name}/${file}`;
          const bucketUploadResult = await bucket.upload(imgName); // upload image
          return bucketUploadResult.metadata.mediaLink;
        });
        return result; // This contains all the media link for matching files
      }));
      // eslint-disable-next-line no-console
      console.log(finalResult); // Return arrays of media links for files
      return finalResult;
    }));
  }))
  .promise()
  .then(() => console.log("finsihed"))
  .catch(err => console.error(err));

Upvotes: 4

trincot
trincot

Reputation: 350310

Indeed, your code is not awaiting the asynchronous invocation of the readdir callback function, nor of the bucket.upload callback function.

Asynchronous coding becomes easier when you use the promise-version of these functions.

bucket.upload will return a promise when omitting the callback function, so that is easy.

For readdir to return a promise, you need to use the fs Promise API: then you can use the promise-based readdir method and use promises throughout your code.

So use fs = require('fs').promises instead of fs = require('fs')

With that preparation, your code can be transformed into this:

const testFolder = './Images/';
var csvFilePath = './Inventory.csv';
var dirArr = ['./Images/Subdirectory-A','./Images/Subdirectory-B','./Images/Subdirectory-C'];

(async function () {
    let arr = await csv().fromFile(csvFilePath);
    arr = arr.filter(obj => dirArr.includes(obj.Name));
    let allData = await Promise.all(arr.map(async obj => {
        let files = await fs.readdir(testFolder + obj.Name);
        files = files.filter(file => file.match(/\.(jpg|jpeg|png|gif)$/i));
        let photos = await Promise.all(
            files.map(async file => {
                var imgName = testFolder + obj.Name + '/' + file;
                let result = await bucket.upload(imgName);
                return result.metadata.mediaLink;
            })
        );
        return {photos};
    }));
    console.log('finito', allData);
})().catch(err => {  // <-- The above async function runs immediately and returns a promise
    console.log(err);
});

Some remarks:

  • There is a shortcoming in your regular expression. You intended to match a literal dot, but you did not escape it (fixed in above code).

  • allData will contain an array of { photos: [......] } objects, and I wonder why you would not want all photo elements to be part of one single array. However, I kept your logic, so the above will still produce them in these chunks. Possibly, you intended to have other properties (next to photos) as well, which would make it actually useful to have these separate objects.

Upvotes: 4

Related Questions