Pages

Wednesday, 7 May 2014

A Simple CSV Parser in Erlang

A simple CSV parser in Erlang. Usually splitting a line by comma would suffice unless a value itself is a string containing commas. This considers such cases as well.
parse_csv(String) -> parse_csv(String, [], [], [], false).

parse_csv([], S, Acc, [], _) -> lists:reverse(lists:map(fun(X) -> lists:reverse(lists:flatten(X)) end, [Acc|S]));
parse_csv([], S, [], L, _) -> lists:reverse(lists:map(fun(X) -> lists:reverse(lists:flatten(X)) end, [L|S]));
parse_csv(String, S, Acc, L, IsSubStr) ->
    case String of
        [$"|T] when IsSubStr =:= true ->
            % end of substring (ending quote).
            parse_csv(T, S, Acc, [$"|L], false);
        [$"|T] when IsSubStr =:= false  ->
            % beginning of a substring (beginning quote).
            parse_csv(T, S, Acc, [$"], true);
        [$,|T] when IsSubStr =:= true andalso L =/= [] ->
            % comma within a substring
            parse_csv(T, S, Acc, [$,|L], true);
        [$,|T] when IsSubStr =:= false andalso L =/= [] ->
            % comma after a substring.
            parse_csv(T, [[L|Acc]|S], [], [], false);
        [$,|T] when IsSubStr =:= false andalso L =:= [] ->
            % comma after a normal string.
            parse_csv(T, [Acc|S], [], [], false);
        [H|T] when IsSubStr =:= true ->
            % within a substring
            parse_csv(T, S, Acc, [H|L], true);
        [H|T] when IsSubStr =:= false ->
            % a normal string
            parse_csv(T, S, [H|Acc], [], false) end.
Example usage
1> c(ql).
{ok,ql}
2> ql:parse_csv("1,\"Abc, cdf\",3.0").
["1","\"Abc, cdf\"","3.0"]
3> ql:parse_csv("1,xyz,3.0").         
["1","xyz","3.0"]
Download from github.