Gagantous
Gagantous

Reputation: 518

Parse excel file and create JSON format in exceljs ON nodejs

I have this excel file

enter image description here

I need to convert the data excel from file to JSON like below

[
   {
   "no_pemohonan": "PNL-202109200826210023105",
   "sumber_data": "HOSTS",
   "tgl_permohonan": "2021-09-20",
   },
   {
   "no_pemohonan": "PNL-202109200845131363376",
   "sumber_data": "HOSTS",
   "tgl_permohonan": "2021-09-20",
   },
   ...
]

I could make the data with this comment but i have to set the object again like below syntax

            const excel = require('exceljs');
            const workbook = new excel.Workbook();


            await workbook.xlsx.load(objDescExcel.buffer);
            let json = workbook.model;
            let worksheetsArr = json.worksheets.length;
            const arrRow = [];
            const arrIdPembatalan = [];

            // looping per worksheet
            for (let index = 0; index < worksheetsArr; index++) {
                let worksheet = workbook.worksheets[index];
                // console.log("worksheet " + worksheet);
                const rowlast = worksheet.lastRow;
                // looping semua row untuk
                worksheet.eachRow(async (row, rowNumber) => {
                    let new_row = row.values
                    // console.log(new_row);
                        let no_permohonan= new_row[2]
                        let sumber_data = new_row[3]
                        let tgl_permohonan = new_row[4]
                        
                        let header = {
                            no_permohonan: no_permohonan,
                            sumber_data : sumber_data ,
                            tgl_permohonan : tgl_permohonan ,
                        };
                        arrIdPembatalan.push(header)
                    }
                })
        }

I want to set the header automatically without have to set the header again like above syntax. I have seen this solution but it was written in xlsx library, while i am using exceljs.

Upvotes: 1

Views: 10524

Answers (3)

erwinv
erwinv

Reputation: 456

If dealing with large files, I would explore stream processing using the following libraries:

  1. Use exceljs to read .xlsx file as stream and write to .csv as stream:
// read from a stream
const readStream = fs.createReadStream('largeWorkbook.xlsx');
const workbook = new Excel.Workbook();
await workbook.xlsx.read(readStream);

// write to stream
const writeStream = fs.createWriteStream('largeWorksheet.csv');
await workbook.csv.write(writeStream, { sheetName: 'Page name' });
  1. Then use csvtojson to transform CSV to JSON:
import csvToJson from 'csvtojson'

const readStream = fs.createReadStream('largeWorksheet.csv')
const writeStream = fs.createWriteStream('largeWorksheet.json')

readStream
  .pipe(csvToJson())
  .pipe(writeStream)

This will work for large files even on hardware with low memory.

Full code snippet:

import fs from 'fs'
import Excel from 'exceljs'
import csvToJson from 'csvtojson'

const xlsxRead = fs.createReadStream('largeWorkbook.xlsx')
const csvWrite = fs.createWriteStream('largeWorksheet.csv')
const csvRead = () => fs.createReadStream('largeWorksheet.csv')
const jsonWrite = fs.createWriteStream('largeWorksheet.json')

(async function process() {
  const workbook = new Excel.Workbook()
  await workbook.xlsx.read(xlsxRead)
  await workbook.csv.write(csvWrite, { sheetName: 'Worksheet Name' })
  csvRead()
    .pipe(csvToJson())
    .pipe(jsonWrite)
})() // this immediately-invoked wrapper function is just for Node.js runtimes
// that don't support top-level await yet
// if running via `--esm` or from `.mjs` file, it can be ommitted

Upvotes: 3

jaykhengar
jaykhengar

Reputation: 61

var Excel = require('exceljs');

var ReadExcelCSV = function (fileType, fileName, filePath, delimeter, textQualifier) {

var deffered = q.defer();
var workbook = new Excel.Workbook();
var finalFilePath = filePath + fileName;

if (fileType == "excel") {
    console.log("File Type: Excel");
    workbook.xlsx.readFile(finalFilePath).then(function () {
        ParseExcelCSVFile(workbook).then(function (resp) {
            deffered.resolve(resp);
        }, function (error) {
            logger.info("Error in Parsing Excel/CSV");
        });
    }, function (err) {
        logger.info("Error In Read Excel: " + JSON.stringify(err));
    });
} else {
    if (delimeter != undefined && textQualifier != undefined) {
        var options = {};
        options.delimiter = delimeter;
        options.quote = textQualifier;
        options.dateFormats = [];

        workbook.csv.readFile(finalFilePath, options).then(function () {
            ParseExcelCSVFile(workbook).then(function (resp) {
                // fs.unlink(finalFilePath);
                deffered.resolve(resp);
            }, function (error) {
                logger.info("Error in Parsing Excel/CSV");
                deffered.reject(error);
            });
        }, function (error) {
            logger.info("Error In Read CSV: " + JSON.stringify(error));
            deffered.reject(error);
        });
    } else {
        workbook.csv.readFile(finalFilePath).then(function () {
            ParseExcelCSVFile(workbook).then(function (resp) {
                deffered.resolve(resp);
            }, function (error) {
                logger.info("Error in Parsing Excel/CSV");
                deffered.reject(error);
            });
        }, function (error) {
            logger.info("Error In Read CSV: " + JSON.stringify(error));
            deffered.reject(error);
        });
    }
}
return deffered.promise;
};

var ParseExcelCSVFile = function (workbook) {
try {
    var deffered = q.defer();

    var objresult = [];
    var objheaders = [];

    var worksheet = workbook.getWorksheet(1);
    worksheet.eachRow(function (row, rowNumber) {
        var currentobj = {};
        row.eachCell({
            includeEmpty: true
        }, function (cell, colNumber) {
            if (rowNumber == 1) {
                objheaders.push(cell.value);
            } else {
                currentobj[objheaders[colNumber - 1]] = cell.value == null ? '' : cell.value;
            }
        });
        if (rowNumber != 1) {
            objresult.push(currentobj);
        }
    });
    deffered.resolve(objresult);
    return deffered.promise;
} catch (ex) {
    logger.error("Error in ParseExcel: " + ex.stack);
}
};

I wrote this code quite a long time ago so you will see an old module like deffered which you can change easily, but it will help in what you are trying to achieve. It can read and parse excel and csv both.

Upvotes: 2

emptyhua
emptyhua

Reputation: 6692

Here is a nodejs implement.

(async function() {
    const excel = require('exceljs');
    const workbook = new excel.Workbook();
    // use readFile for testing purpose
    // await workbook.xlsx.load(objDescExcel.buffer);
    await workbook.xlsx.readFile(process.argv[2]);
    let jsonData = [];
    workbook.worksheets.forEach(function(sheet) {
        // read first row as data keys
        let firstRow = sheet.getRow(1);
        if (!firstRow.cellCount) return;
        let keys = firstRow.values;
        sheet.eachRow((row, rowNumber) => {
            if (rowNumber == 1) return;
            let values = row.values
            let obj = {};
            for (let i = 1; i < keys.length; i ++) {
                obj[keys[i]] = values[i];
            }
            jsonData.push(obj);
        })

    });
    console.log(jsonData);
})();

test result

$ node ./test.js ~/Documents/Book1.xlsx
[
  {
    no_pemohonan: 'PNL-202109200826210023105',
    sumber_data: 'HOSTS',
    tgl_permohonan: 2021-09-20T00:00:00.000Z
  },
  {
    no_pemohonan: 'PNL-202109200845131363376',
    sumber_data: 'HOSTS',
    tgl_permohonan: 2021-09-20T00:00:00.000Z
  }
]

Upvotes: 6

Related Questions