Pierre-Gilles Levallois
Pierre-Gilles Levallois

Reputation: 4371

How to parse CSV data?

Where could I find some JavaScript code to parse CSV data?

Upvotes: 292

Views: 537818

Answers (15)

Spencer May
Spencer May

Reputation: 4505

Edited to accept newlines as \r \n and \r\n as either a single or double byte character code ... couldn't find a definitive answer on that so I rolled with it.

function parseCSV(str) {
    let csv = [], pointer = 0, c = null, quote = 0, col = '', row = [];
    while (c = str.charAt(pointer)) {
        pointer++;
        if (c == '"') {
            quote++;
            // first and even numbered quotes are parsed, not used
            if (quote == 1 || quote%2 == 0) {
                continue;
            }
        } else if ([',','\r\n','\n','\r'].includes(c)) {
            // even quotes means a completed col and potential row
            if (quote%2 == 0) {
                row.push(col);
                col = '';
                quote = 0;
                // if delimiter is not a comma, also a completed row
                if (c != ',') {
                    // handle edge case of \r\n being two separate characters
                    if (c == '\r' && str.charAt(pointer) == '\n') {
                        pointer++;
                    }
                    csv.push(row);
                    row = [];
                }
                continue;
            }
        }
        col += c;
    }
    return csv;
}

Upvotes: 0

Zero14
Zero14

Reputation: 31

Edit1(2024)

Now deno std is hosted on https://jsr.io/, its possible to use it from esm.sh

import { parse } from "https://esm.sh/jsr/@std/csv";

Original Answer

Personally I like to use deno std library since most modules are officially compatible with the browser

The problem is that the std is in typescript but official solution might happen in the future https://github.com/denoland/deno_std/issues/641 https://github.com/denoland/dotland/issues/1728

For now there is an actively maintained on the fly transpiler https://bundle.deno.dev/

so you can use it simply like this

<script type="module">
import { parse } from "https://bundle.deno.dev/https://deno.land/[email protected]/encoding/csv.ts"
console.log(await parse("a,b,c\n1,2,3"))
</script>

and you can also just vendor it

curl https://bundle.deno.dev/https://deno.land/[email protected]/encoding/csv.ts --output src/csv.js

Upvotes: 1

Kirtan
Kirtan

Reputation: 21695

You can use the CSVToArray() function mentioned in this blog entry.

      console.log(CSVToArray(`"foo, the column",bar
2,3
"4, the value",5`));

      // ref: http://stackoverflow.com/a/1293163/2343
        // This will parse a delimited string into an array of
        // arrays. The default delimiter is the comma, but this
        // can be overriden in the second argument.
        function CSVToArray( strData, strDelimiter ){
            // Check to see if the delimiter is defined. If not,
            // then default to comma.
            strDelimiter = (strDelimiter || ",");
     
            // Create a regular expression to parse the CSV values.
            var objPattern = new RegExp(
                (
                    // Delimiters.
                    "(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
     
                    // Quoted fields.
                    "(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
     
                    // Standard fields.
                    "([^\"\\" + strDelimiter + "\\r\\n]*))"
                ),
                "gi"
                );
     
     
            // Create an array to hold our data. Give the array
            // a default empty first row.
            var arrData = [[]];
     
            // Create an array to hold our individual pattern
            // matching groups.
            var arrMatches = null;
     
     
            // Keep looping over the regular expression matches
            // until we can no longer find a match.
            while (arrMatches = objPattern.exec( strData )){
     
                // Get the delimiter that was found.
                var strMatchedDelimiter = arrMatches[ 1 ];
     
                // Check to see if the given delimiter has a length
                // (is not the start of string) and if it matches
                // field delimiter. If id does not, then we know
                // that this delimiter is a row delimiter.
                if (
                    strMatchedDelimiter.length &&
                    strMatchedDelimiter !== strDelimiter
                    ){
     
                    // Since we have reached a new row of data,
                    // add an empty row to our data array.
                    arrData.push( [] );
     
                }
     
                var strMatchedValue;
     
                // Now that we have our delimiter out of the way,
                // let's check to see which kind of value we
                // captured (quoted or unquoted).
                if (arrMatches[ 2 ]){
     
                    // We found a quoted value. When we capture
                    // this value, unescape any double quotes.
                    strMatchedValue = arrMatches[ 2 ].replace(
                        new RegExp( "\"\"", "g" ),
                        "\""
                        );
     
                } else {
     
                    // We found a non-quoted value.
                    strMatchedValue = arrMatches[ 3 ];
     
                }
     
     
                // Now that we have our value string, let's add
                // it to the data array.
                arrData[ arrData.length - 1 ].push( strMatchedValue );
            }
     
            // Return the parsed data.
            return( arrData );
        }

Upvotes: 317

Trevor Dixon
Trevor Dixon

Reputation: 24342

Here's an extremely simple CSV parser that handles quoted fields with commas, new lines, and escaped double quotation marks. There's no splitting or regular expression. It scans the input string 1-2 characters at a time and builds an array.

Test it at http://jsfiddle.net/vHKYH/.

function parseCSV(str) {
    const arr = [];
    let quote = false;  // 'true' means we're inside a quoted field

    // Iterate over each character, keep track of current row and column (of the returned array)
    for (let row = 0, col = 0, c = 0; c < str.length; c++) {
        let cc = str[c], nc = str[c+1];        // Current character, next character
        arr[row] = arr[row] || [];             // Create a new row if necessary
        arr[row][col] = arr[row][col] || '';   // Create a new column (start with empty string) if necessary

        // If the current character is a quotation mark, and we're inside a
        // quoted field, and the next character is also a quotation mark,
        // add a quotation mark to the current column and skip the next character
        if (cc == '"' && quote && nc == '"') { arr[row][col] += cc; ++c; continue; }

        // If it's just one quotation mark, begin/end quoted field
        if (cc == '"') { quote = !quote; continue; }

        // If it's a comma and we're not in a quoted field, move on to the next column
        if (cc == ',' && !quote) { ++col; continue; }

        // If it's a newline (CRLF) and we're not in a quoted field, skip the next character
        // and move on to the next row and move to column 0 of that new row
        if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }

        // If it's a newline (LF or CR) and we're not in a quoted field,
        // move on to the next row and move to column 0 of that new row
        if (cc == '\n' && !quote) { ++row; col = 0; continue; }
        if (cc == '\r' && !quote) { ++row; col = 0; continue; }

        // Otherwise, append the current character to the current column
        arr[row][col] += cc;
    }
    return arr;
}

Upvotes: 73

Stephen Quan
Stephen Quan

Reputation: 25871

Here's another solution. This uses:

  • a coarse global regular expression for splitting the CSV string (which includes surrounding quotes and trailing commas)
  • fine-grained regular expression for cleaning up the surrounding quotes and trailing commas
  • also, has type correction differentiating strings, numbers, boolean values and null values

For the following input string:

"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,

The code outputs:

[
  "This is, a value",
  "Hello",
  4,
  -123,
  3.1415,
  "This is also, possible",
  true,
  null
]

Here's my implementation of parseCSVLine() in a runnable code snippet:

function parseCSVLine(text) {
  return text.match( /\s*(\"[^"]*\"|'[^']*'|[^,]*)\s*(,|$)/g ).map( function (text) {
    let m;
    if (m = text.match(/^\s*,?$/)) return null; // null value
    if (m = text.match(/^\s*\"([^"]*)\"\s*,?$/)) return m[1]; // Double Quoted Text
    if (m = text.match(/^\s*'([^']*)'\s*,?$/)) return m[1]; // Single Quoted Text
    if (m = text.match(/^\s*(true|false)\s*,?$/)) return m[1] === "true"; // Boolean
    if (m = text.match(/^\s*((?:\+|\-)?\d+)\s*,?$/)) return parseInt(m[1]); // Integer Number
    if (m = text.match(/^\s*((?:\+|\-)?\d*\.\d*)\s*,?$/)) return parseFloat(m[1]); // Floating Number
    if (m = text.match(/^\s*(.*?)\s*,?$/)) return m[1]; // Unquoted Text
    return text;
  } );
}

let data = `"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,`;
let obj = parseCSVLine(data);
console.log( JSON.stringify( obj, undefined, 2 ) );

Upvotes: 8

bosscube
bosscube

Reputation: 189

Just throwing this out there.. I recently ran into the need to parse CSV columns with Javascript, and I opted for my own simple solution. It works for my needs, and may help someone else.

const csvString = '"Some text, some text",,"",true,false,"more text","more,text, more, text ",true';

const parseCSV = text => {
  const lines = text.split('\n');
  const output = [];

  lines.forEach(line => {
      line = line.trim();

      if (line.length === 0) return;

      const skipIndexes = {};
      const columns = line.split(',');

      output.push(columns.reduce((result, item, index) => {
          if (skipIndexes[index]) return result;

          if (item.startsWith('"') && !item.endsWith('"')) {
              while (!columns[index + 1].endsWith('"')) {
                  index++;
                  item += `,${columns[index]}`;
                  skipIndexes[index] = true;
              }

              index++;
              skipIndexes[index] = true;
              item += `,${columns[index]}`;
          }

          result.push(item);
          return result;
      }, []));
  });

  return output;
};

console.log(parseCSV(csvString));

Upvotes: 1

noam sondak
noam sondak

Reputation: 137

Here's my simple vanilla JavaScript code:

let a = 'one,two,"three, but with a comma",four,"five, with ""quotes"" in it.."'
console.log(splitQuotes(a))

function splitQuotes(line) {
  if(line.indexOf('"') < 0) 
    return line.split(',')

  let result = [], cell = '', quote = false;
  for(let i = 0; i < line.length; i++) {
    char = line[i]
    if(char == '"' && line[i+1] == '"') {
      cell += char
      i++
    } else if(char == '"') {
      quote = !quote;
    } else if(!quote && char == ',') {
      result.push(cell)
      cell = ''
    } else {
      cell += char
    }
    if ( i == line.length-1 && cell) {
      result.push(cell)
    }
  }
  return result
}

Upvotes: 5

Gabriel Chung
Gabriel Chung

Reputation: 1577

I have constructed this JavaScript script to parse a CSV in string to array object. I find it better to break down the whole CSV into lines, fields and process them accordingly. I think that it will make it easy for you to change the code to suit your need.

    //
    //
    // CSV to object
    //
    //

    const new_line_char = '\n';
    const field_separator_char = ',';

    function parse_csv(csv_str) {

        var result = [];

        let line_end_index_moved = false;
        let line_start_index = 0;
        let line_end_index = 0;
        let csr_index = 0;
        let cursor_val = csv_str[csr_index];
        let found_new_line_char = get_new_line_char(csv_str);
        let in_quote = false;

        // Handle \r\n
        if (found_new_line_char == '\r\n') {
            csv_str = csv_str.split(found_new_line_char).join(new_line_char);
        }
        // Handle the last character is not \n
        if (csv_str[csv_str.length - 1] !== new_line_char) {
            csv_str += new_line_char;
        }

        while (csr_index < csv_str.length) {
            if (cursor_val === '"') {
                in_quote = !in_quote;
            } else if (cursor_val === new_line_char) {
                if (in_quote === false) {
                    if (line_end_index_moved && (line_start_index <= line_end_index)) {
                        result.push(parse_csv_line(csv_str.substring(line_start_index, line_end_index)));
                        line_start_index = csr_index + 1;
                    } // Else: just ignore line_end_index has not moved or line has not been sliced for parsing the line
                } // Else: just ignore because we are in a quote
            }
            csr_index++;
            cursor_val = csv_str[csr_index];
            line_end_index = csr_index;
            line_end_index_moved = true;
        }

        // Handle \r\n
        if (found_new_line_char == '\r\n') {
            let new_result = [];
            let curr_row;
            for (var i = 0; i < result.length; i++) {
                curr_row = [];
                for (var j = 0; j < result[i].length; j++) {
                    curr_row.push(result[i][j].split(new_line_char).join('\r\n'));
                }
                new_result.push(curr_row);
            }
            result = new_result;
        }
        return result;
    }

    function parse_csv_line(csv_line_str) {

        var result = [];

        //let field_end_index_moved = false;
        let field_start_index = 0;
        let field_end_index = 0;
        let csr_index = 0;
        let cursor_val = csv_line_str[csr_index];
        let in_quote = false;

        // Pretend that the last char is the separator_char to complete the loop
        csv_line_str += field_separator_char;

        while (csr_index < csv_line_str.length) {
            if (cursor_val === '"') {
                in_quote = !in_quote;
            } else if (cursor_val === field_separator_char) {
                if (in_quote === false) {
                    if (field_start_index <= field_end_index) {
                        result.push(parse_csv_field(csv_line_str.substring(field_start_index, field_end_index)));
                        field_start_index = csr_index + 1;
                    } // Else: just ignore field_end_index has not moved or field has not been sliced for parsing the field
                } // Else: just ignore because we are in quote
            }
            csr_index++;
            cursor_val = csv_line_str[csr_index];
            field_end_index = csr_index;
            field_end_index_moved = true;
        }
        return result;
    }

    function parse_csv_field(csv_field_str) {
        with_quote = (csv_field_str[0] === '"');

        if (with_quote) {
            csv_field_str = csv_field_str.substring(1, csv_field_str.length - 1); // remove the start and end quotes
            csv_field_str = csv_field_str.split('""').join('"'); // handle double quotes
        }
        return csv_field_str;
    }

    // Initial method: check the first newline character only
    function get_new_line_char(csv_str) {
        if (csv_str.indexOf('\r\n') > -1) {
            return '\r\n';
        } else {
            return '\n'
        }
    }

Upvotes: 0

dt192
dt192

Reputation: 1013

csvToArray v1.3

A compact (645 bytes), but compliant function to convert a CSV string into a 2D array, conforming to the RFC4180 standard.

https://code.google.com/archive/p/csv-to-array/downloads

Common Usage: jQuery

 $.ajax({
        url: "test.csv",
        dataType: 'text',
        cache: false
 }).done(function(csvAsString){
        csvAsArray=csvAsString.csvToArray();
 });

Common usage: JavaScript

csvAsArray = csvAsString.csvToArray();

Override field separator

csvAsArray = csvAsString.csvToArray("|");

Override record separator

csvAsArray = csvAsString.csvToArray("", "#");

Override Skip Header

csvAsArray = csvAsString.csvToArray("", "", 1);

Override all

csvAsArray = csvAsString.csvToArray("|", "#", 1);

Upvotes: 16

Andy VanWagoner
Andy VanWagoner

Reputation: 561

I have an implementation as part of a spreadsheet project.

This code is not yet tested thoroughly, but anyone is welcome to use it.

As some of the answers noted though, your implementation can be much simpler if you actually have DSV or TSV file, as they disallow the use of the record and field separators in the values. CSV, on the other hand, can actually have commas and newlines inside a field, which breaks most regular expression and split-based approaches.

var CSV = {
    parse: function(csv, reviver) {
        reviver = reviver || function(r, c, v) { return v; };
        var chars = csv.split(''), c = 0, cc = chars.length, start, end, table = [], row;
        while (c < cc) {
            table.push(row = []);
            while (c < cc && '\r' !== chars[c] && '\n' !== chars[c]) {
                start = end = c;
                if ('"' === chars[c]){
                    start = end = ++c;
                    while (c < cc) {
                        if ('"' === chars[c]) {
                            if ('"' !== chars[c+1]) {
                                break;
                            }
                            else {
                                chars[++c] = ''; // unescape ""
                            }
                        }
                        end = ++c;
                    }
                    if ('"' === chars[c]) {
                        ++c;
                    }
                    while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
                        ++c;
                    }
                } else {
                    while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
                        end = ++c;
                    }
                }
                row.push(reviver(table.length-1, row.length, chars.slice(start, end).join('')));
                if (',' === chars[c]) {
                    ++c;
                }
            }
            if ('\r' === chars[c]) {
                ++c;
            }
            if ('\n' === chars[c]) {
                ++c;
            }
        }
        return table;
    },

    stringify: function(table, replacer) {
        replacer = replacer || function(r, c, v) { return v; };
        var csv = '', c, cc, r, rr = table.length, cell;
        for (r = 0; r < rr; ++r) {
            if (r) {
                csv += '\r\n';
            }
            for (c = 0, cc = table[r].length; c < cc; ++c) {
                if (c) {
                    csv += ',';
                }
                cell = replacer(r, c, table[r][c]);
                if (/[,\r\n"]/.test(cell)) {
                    cell = '"' + cell.replace(/"/g, '""') + '"';
                }
                csv += (cell || 0 === cell) ? cell : '';
            }
        }
        return csv;
    }
};

Upvotes: 44

Micah
Micah

Reputation: 313

Just use .split(','):

var str = "How are you doing today?";
var n = str.split(" ");

Upvotes: -9

Evan Plaice
Evan Plaice

Reputation: 14140

jQuery-CSV

It's a jQuery plugin designed to work as an end-to-end solution for parsing CSV into JavaScript data. It handles every single edge case presented in RFC 4180, as well as some that pop up for Excel/Google spreadsheet exports (i.e., mostly involving null values) that the specification is missing.

Example:

track,artist,album,year

Dangerous,'Busta Rhymes','When Disaster Strikes',1997

// Calling this
music = $.csv.toArrays(csv)

// Outputs...
[
  ["track", "artist", "album", "year"],
  ["Dangerous", "Busta Rhymes", "When Disaster Strikes", "1997"]
]

console.log(music[1][2]) // Outputs: 'When Disaster Strikes'

Update:

Oh yeah, I should also probably mention that it's completely configurable.

music = $.csv.toArrays(csv, {
  delimiter: "'", // Sets a custom value delimiter character
  separator: ';', // Sets a custom field separator character
});

Update 2:

It now works with jQuery on Node.js too. So you have the option of doing either client-side or server-side parsing with the same library.

Update 3:

Since the Google Code shutdown, jquery-csv has been migrated to GitHub.

Disclaimer: I am also the author of jQuery-CSV.

Upvotes: 158

Shanimal
Shanimal

Reputation: 11718

I'm not sure why I couldn't get Kirtan's example to work for me. It seemed to be failing on empty fields or maybe fields with trailing commas...

This one seems to handle both.

I did not write the parser code, just a wrapper around the parser function to make this work for a file. See attribution.

    var Strings = {
        /**
         * Wrapped CSV line parser
         * @param s      String delimited CSV string
         * @param sep    Separator override
         * @attribution: http://www.greywyvern.com/?post=258 (comments closed on blog :( )
         */
        parseCSV : function(s,sep) {
            // http://stackoverflow.com/questions/1155678/javascript-string-newline-character
            var universalNewline = /\r\n|\r|\n/g;
            var a = s.split(universalNewline);
            for(var i in a){
                for (var f = a[i].split(sep = sep || ","), x = f.length - 1, tl; x >= 0; x--) {
                    if (f[x].replace(/"\s+$/, '"').charAt(f[x].length - 1) == '"') {
                        if ((tl = f[x].replace(/^\s+"/, '"')).length > 1 && tl.charAt(0) == '"') {
                            f[x] = f[x].replace(/^\s*"|"\s*$/g, '').replace(/""/g, '"');
                          } else if (x) {
                        f.splice(x - 1, 2, [f[x - 1], f[x]].join(sep));
                      } else f = f.shift().split(sep).concat(f);
                    } else f[x].replace(/""/g, '"');
                  } a[i] = f;
        }
        return a;
        }
    }

Upvotes: 3

Peter Thoeny
Peter Thoeny

Reputation: 7616

Regular expressions to the rescue! These few lines of code handle properly quoted fields with embedded commas, quotes, and newlines based on the RFC 4180 standard.

function parseCsv(data, fieldSep, newLine) {
    fieldSep = fieldSep || ',';
    newLine = newLine || '\n';
    var nSep = '\x1D';
    var qSep = '\x1E';
    var cSep = '\x1F';
    var nSepRe = new RegExp(nSep, 'g');
    var qSepRe = new RegExp(qSep, 'g');
    var cSepRe = new RegExp(cSep, 'g');
    var fieldRe = new RegExp('(?<=(^|[' + fieldSep + '\\n]))"(|[\\s\\S]+?(?<![^"]"))"(?=($|[' + fieldSep + '\\n]))', 'g');
    var grid = [];
    data.replace(/\r/g, '').replace(/\n+$/, '').replace(fieldRe, function(match, p1, p2) {
        return p2.replace(/\n/g, nSep).replace(/""/g, qSep).replace(/,/g, cSep);
    }).split(/\n/).forEach(function(line) {
        var row = line.split(fieldSep).map(function(cell) {
            return cell.replace(nSepRe, newLine).replace(qSepRe, '"').replace(cSepRe, ',');
        });
        grid.push(row);
    });
    return grid;
}

const csv = 'A1,B1,C1\n"A ""2""","B, 2","C\n2"';
const separator = ',';      // field separator, default: ','
const newline = ' <br /> '; // newline representation in case a field contains newlines, default: '\n' 
var grid = parseCsv(csv, separator, newline);
// expected: [ [ 'A1', 'B1', 'C1' ], [ 'A "2"', 'B, 2', 'C <br /> 2' ] ]

You don't need a parser-generator such as lex/yacc. The regular expression handles RFC 4180 properly thanks to positive lookbehind, negative lookbehind, and positive lookahead.

Clone/download code at https://github.com/peterthoeny/parse-csv-js

Upvotes: 2

Trevor Dixon
Trevor Dixon

Reputation: 24342

Here's my PEG(.js) grammar that seems to do ok at RFC 4180 (i.e. it handles the examples at http://en.wikipedia.org/wiki/Comma-separated_values):

start
  = [\n\r]* first:line rest:([\n\r]+ data:line { return data; })* [\n\r]* { rest.unshift(first); return rest; }

line
  = first:field rest:("," text:field { return text; })*
    & { return !!first || rest.length; } // ignore blank lines
    { rest.unshift(first); return rest; }

field
  = '"' text:char* '"' { return text.join(''); }
  / text:[^\n\r,]* { return text.join(''); }

char
  = '"' '"' { return '"'; }
  / [^"]

Try it out at http://jsfiddle.net/knvzk/10 or http://pegjs.majda.cz/online. Download the generated parser at https://gist.github.com/3362830.

Upvotes: 14

Related Questions