sogg
sogg

Reputation: 85

Parse XLSX and create json with Node

I'm using the package called js-xlsx link:https://github.com/SheetJS/js-xlsx
Question: How can I parse an xlsx with merges to output json? Here is what the excel sheet looks like: In the end the json should look like this:

    [
   {
   "Nuber": 1,
   "Department": "part1",
   "Unit": ["check","second","cable"],
   "BeginTime": "1/7:00",
   "EndTime": "2/20:00",
   "Worker": ["Lucy","Jussa","Peter"]
   },
   {
   "Nuber": 2,
   "Department": "part2",
   "Unit": "check",
   "BeginTime": "1/7:00",
   "EndTime": "1/20:00",
   "Worker": "Yu"
   }
]  

test.js:

XLSX = require('xlsx');
var workbook = XLSX.readFile('test.xlsx');
var sheet_name_list = workbook.SheetNames;
sheet_name_list.forEach(function(y) { /* iterate through sheets */
  var worksheet = workbook.Sheets[y];
  for (z in worksheet) {
    /* all keys that do not begin with "!" correspond to cell addresses */
    if(z[0] === '!') continue;

  }

});

Upvotes: 3

Views: 7279

Answers (1)

Stéphane Laurent
Stéphane Laurent

Reputation: 84529

var XLSX = require('xlsx');
var workbook = XLSX.readFile('iris_small.xlsx');
var sheetNames = workbook.SheetNames;

var sheetIndex = 1;

var df = XLSX.utils.sheet_to_json(workbook.Sheets[sheetNames[sheetIndex-1]]);
console.log(df);

gives:

[ { Id: '1',
    'Petal.Length': '1.4',
    'Petal.Width': '0.2',
    Species: 'setosa' },
  { Id: '2',
    'Petal.Length': '1.4',
    'Petal.Width': '0.2',
    Species: 'setosa' },
  { Id: '3',
    'Petal.Length': '1.3',
    'Petal.Width': '0.2',
    Species: 'setosa' },
  { Id: '4',
    'Petal.Length': '3.9',
    'Petal.Width': '1.4',
    Species: 'versicolor' },
  { Id: '5',
    'Petal.Length': '3.5',
    'Petal.Width': '1',
    Species: 'versicolor' },
  { Id: '6',
    'Petal.Length': '4.2',
    'Petal.Width': '1.5',
    Species: 'versicolor' },
  { Id: '7',
    'Petal.Length': '5.4',
    'Petal.Width': '2.3',
    Species: 'virginica' },
  { Id: '8',
    'Petal.Length': '5.1',
    'Petal.Width': '1.8',
    Species: 'virginica' } ]

As you can see, the numbers are converted to strings. I don't know whether there is an option to preserve the numbers as numbers. A workaround with Papa Parse:

var Papa = require('babyparse');

var csv = XLSX.utils.sheet_to_csv(workbook.Sheets[sheetNames[sheetIndex-1]]);

var json = Papa.parse(csv, 
                {
                    header: true,
                    skipEmptyLines: true,
                    dynamicTyping: true
                }
            );

console.log(json.data);

Now the result is:

[ { Id: 1,
    'Petal.Length': 1.4,
    'Petal.Width': 0.2,
    Species: 'setosa' },
  { Id: 2,
    'Petal.Length': 1.4,
    'Petal.Width': 0.2,
    Species: 'setosa' },
  { Id: 3,
    'Petal.Length': 1.3,
    'Petal.Width': 0.2,
    Species: 'setosa' },
  { Id: 4,
    'Petal.Length': 3.9,
    'Petal.Width': 1.4,
    Species: 'versicolor' },
  { Id: 5,
    'Petal.Length': 3.5,
    'Petal.Width': 1,
    Species: 'versicolor' },
  { Id: 6,
    'Petal.Length': 4.2,
    'Petal.Width': 1.5,
    Species: 'versicolor' },
  { Id: 7,
    'Petal.Length': 5.4,
    'Petal.Width': 2.3,
    Species: 'virginica' },
  { Id: 8,
    'Petal.Length': 5.1,
    'Petal.Width': 1.8,
    Species: 'virginica' } ]

Upvotes: 7

Related Questions