Reputation: 3225
I found this function to parse a csv string into a multi-dim array in javascript, here, however I need to update it to also allow single quotes (') to be used as the enclosure in addition to double quotes (") which it already does. i.e. I need it to be able to parse:
CSVToArray('"a, comma","2","3"');
// => [['a, comma', '2', '3']]
As well as this
CSVToArray("'a, comma','2','3'");
// => [['a, comma', '2', '3']]
The function:
// This will parse a delimited string into an array of
// arrays. The default delimiter is the comma, but this
// can be overriden in the second argument.
function CSVToArray( strData, strDelimiter ){
// Check to see if the delimiter is defined. If not,
// then default to comma.
strDelimiter = (strDelimiter || ",");
// Create a regular expression to parse the CSV values.
var objPattern = new RegExp(
(
// Delimiters.
"(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
// Quoted fields.
"(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
// Standard fields.
"([^\"\\" + strDelimiter + "\\r\\n]*))"
),
"gi"
);
// Create an array to hold our data. Give the array
// a default empty first row.
var arrData = [[]];
// Create an array to hold our individual pattern
// matching groups.
var arrMatches = null;
// Keep looping over the regular expression matches
// until we can no longer find a match.
while (arrMatches = objPattern.exec( strData )){
// Get the delimiter that was found.
var strMatchedDelimiter = arrMatches[ 1 ];
// Check to see if the given delimiter has a length
// (is not the start of string) and if it matches
// field delimiter. If id does not, then we know
// that this delimiter is a row delimiter.
if (
strMatchedDelimiter.length &&
(strMatchedDelimiter != strDelimiter)
){
// Since we have reached a new row of data,
// add an empty row to our data array.
arrData.push( [] );
}
// Now that we have our delimiter out of the way,
// let's check to see which kind of value we
// captured (quoted or unquoted).
if (arrMatches[ 2 ]){
// We found a quoted value. When we capture
// this value, unescape any double quotes.
var strMatchedValue = arrMatches[ 2 ].replace(
new RegExp( "\"\"", "g" ),
"\""
);
} else {
// We found a non-quoted value.
var strMatchedValue = arrMatches[ 3 ];
}
// Now that we have our value string, let's add
// it to the data array.
arrData[ arrData.length - 1 ].push( strMatchedValue );
}
// Return the parsed data.
return( arrData );
}
Upvotes: 1
Views: 1640
Reputation: 349042
Your base pattern contains some flaws, and not easily extensible: Delimiters are always prefixed by a \\
, even when they shouldn't.
I've rewritten the code, to make it mor ereliable.Multiple quotation types are supported, and delimiters are properly escaped.
Fiddle: http://jsfiddle.net/qz53J/
function CSVToArray( strData, strDelimiter ){
// Properly escape the delimiter, if existent.
// If no delimiter is given, use a comma
strDelimiter = (strDelimiter || ",").replace(/([[^$.|?*+(){}])/g, '\\$1');
//What are the quotation characters? "'
var quotes = "\"'";
// Create a regular expression to parse the CSV values.
// match[1] = Contains the delimiter if the RegExp is not at the begin
// match[2] = quote, if any
// match[3] = string inside quotes, if match[2] exists
// match[4] = non-quoted strings
var objPattern = new RegExp(
// Delimiter or marker of new row
"(?:(" + strDelimiter + ")|[\\n\\r]|^)" +
// Quoted fields
"(?:([" + quotes + "])((?:[^" + quotes + "]+|(?!\\2).|\\2\\2)*)\\2" +
// Standard fields
"|([^" + quotes + strDelimiter + "\\n\\r]*))"
, "gi");
// Create a matrix (2d array) to hold data, which will be returned.
var arrData = [];
// Execute the RegExp until no match is found
var arrMatches;
while ( arrMatches = objPattern.exec( strData ) ){
// If the first group of the RegExp does is empty, no delimiter is
// matched. This only occurs at the beginning of a new row
if ( !arrMatches[ 1 ] ){
// Add an empty row to our data array.
arrData.push( [] );
}
var quote = arrMatches[ 2 ]
if ( quote ){
// We found a quoted value. When we capture
// this value, unescape any double quotes.
var strMatchedValue = arrMatches[ 3 ].replace(
new RegExp( quote + quote, "g" ),
quote
);
} else {
// We found a non-quoted value.
var strMatchedValue = arrMatches[ 4 ];
}
// Add the found value to the array
arrData[ arrData.length - 1 ].push( strMatchedValue );
}
// Return the parsed data.
return arrData;
}
Upvotes: 3