Reputation:
Split a string by commas but ignore commas within double-quotes using Javascript gives the regexp /(".*?"|[^",\s]+)(?=\s*,|\s*$)/
. How to get use it in Erlang re:split()
? The regexp doesn't work with Erlang.
1> S = "20140419,\"Blah blah, foo foo\",1,0,0,0,1,2,0,0".
2> re:split(S, "(\".*?\"|[^\",\s]+,)(?=\s*,|\s*$)", [{return,list}]).
["20140421,","\"Blah blah, foo foo\"",",1,0,0,0,1,2,0,0"]
The result I'm looking for is the list
["20140421","\"Blah blah, foo foo\"","1","0","0","0","1","2","0","0"]
Thanks.
Upvotes: 0
Views: 1180
Reputation: 20004
Just translate the JavaScript regular expression to Erlang:
Erlang R16B03-1 (erts-5.10.4) [source] [64-bit] [smp:8:8] [async-threads:10] [kernel-poll:false]
Eshell V5.10.4 (abort with ^G)
1> S = "20140419,\"Blah blah, foo foo\",1,0,0,0,1,2,0,0".
"20140419,\"Blah blah, foo foo\",1,0,0,0,1,2,0,0"
2> {ok,R} = re:compile("(\".*?\"|[^\",\\s]+)(?=\\s*,|\\s*$)").
{ok,{re_pattern,1,0,
<<69,82,67,80,122,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,
...>>}}
3> {match,Matches} = re:run(S, R, [{capture,[1],list},global]).
{match,[["20140419"],
["\"Blah blah, foo foo\""],
["1"],
["0"],
["0"],
["0"],
["1"],
["2"],
["0"],
["0"]]}
4> [M || [M] <- Matches].
["20140419","\"Blah blah, foo foo\"","1","0","0","0","1",
"2","0","0"]
In shell command 2 note the use of double backslashes in the pattern to specify \s
correctly.
Upvotes: 2
Reputation: 2496
How about this:
1> string:tokens(S, ",").
["20140419","\"Blah blah"," foo foo\"","1","0","0","0","1","2","0","0"]
Or even:
2> re:split(S, ",", [{return,list}]).
["20140419","\"Blah blah"," foo foo\"","1","0","0","0","1","2","0","0"]
(@kadaj, that is seriously a lot of code to parse CSVs)
Edit: to properly answer the question, one needs to reassemble "\"…"
, "…\""
pairs.
To do so, a trivial recursive function will do:
finish([[$\"]++Rest=M, Scnd|T], Acc) ->
finish(T, [M++Scnd|Acc]);
finish([H|T], Acc) ->
finish(T, [H |Acc]);
finish([], Acc) ->
lists:reverse(Acc).
Upvotes: 1
Reputation:
Came up with a parser using pattern matching. Adding it here in case anyone finds it useful.
parse_csv(String) -> parse_csv(String, [], [], [], false).
parse_csv([], S, Acc, [], _) -> lists:reverse(lists:map(fun(X) -> lists:reverse(lists:flatten(X)) end, [Acc|S]));
parse_csv([], S, [], L, _) -> lists:reverse(lists:map(fun(X) -> lists:reverse(lists:flatten(X)) end, [L|S]));
parse_csv(String, S, Acc, L, IsSubStr) ->
case String of
[$"|T] when IsSubStr =:= true ->
% end of substring (ending quote).
parse_csv(T, S, Acc, [$"|L], false);
[$"|T] when IsSubStr =:= false ->
% beginning of a substring (beginning quote).
parse_csv(T, S, Acc, [$"], true);
[$,|T] when IsSubStr =:= true andalso L =/= [] ->
% comma within a substring
parse_csv(T, S, Acc, [$,|L], true);
[$,|T] when IsSubStr =:= false andalso L =/= [] ->
% comma after a substring.
parse_csv(T, [[L|Acc]|S], [], [], false);
[$,|T] when IsSubStr =:= false andalso L =:= [] ->
% comma after a normal string.
parse_csv(T, [Acc|S], [], [], false);
[H|T] when IsSubStr =:= true ->
% within a substring
parse_csv(T, S, Acc, [H|L], true);
[H|T] when IsSubStr =:= false ->
% a normal string
parse_csv(T, S, [H|Acc], [], false) end.
Example:
2> ql:parse_csv("foo,\"bar aa\",blah,\"dooo\",phew").
["foo","\"bar aa\"","blah","\"dooo\"","phew"]
3> ql:parse_csv("foo,bar,baz").
["foo","bar","baz"]
4> ql:parse_csv("foo,\"foo, bar\",baz").
["foo","\"foo, bar\"","baz"]
Upvotes: 1