Reputation: 906
I want to parse XML strings to erlang list and then to JSON.
Example input:
<?xml version="1.0" encoding="UTF-8"?>
<!--some message here-->
<start>
<data>
<number id="333">test message</number>
<data>current date</data>
</data>
<mass>
<client>35</client>
<address>lattitude</address>
<code>3454343</code>
<foo tipo="casa">Some text message 2</foo>
<product>TEST</product>
</mass>
</start>
Output should be:
{
"start": {
"data": {
"number": {
"@id": "333",
"#text": "test message"
},
"data": "current date"
},
"mass": {
"client": "35",
"address": "lattitude",
"code": "3454343",
"foo": {
"@tipo": "casa",
"#text": "Some text message 2"
},
"product": "TEST"
}
}
}
I am trying to use erlsom:simple_form(Xml).
and getting :
{ok,{"start",[],
[{"data",[],
[{"number",[{"id","333"}],["test message"]},
{"data",[],["current date"]}]},
{"mass",[],
[{"client",[],["35"]},
{"address",[],["lattitude"]},
{"code",[],["3454343"]},
{"foo",[{"tipo","casa"}],["Some text message 2"]},
{"product",[],["TEST"]}]}]},
[]}
Now I want to delete these empty attrs. Is there any simple way to do this? thanks in advance.
UPDATE: Make it work w/ solution from Erlang, converting xml to tuples and lists
BUT Getting
{"start",
[{"data",
[{"number","test message"},{"data","current date"}]},
{"mass",
[{"client","35"},
{"address","lattitude"},
{"code","3454343"},
{"foo","Some text message 2"},
{"product","TEST"}]}]}
there is no [{"id","333"}]
and [{"tipo","casa"}]
lists.
Upvotes: 1
Views: 1004
Reputation: 1626
i suggest to use jiffy for JSON and exml for XML. jiffy and exml have native code which means they are so fast.
Clone and compile them.
before compiling them, you should install g++ and libexpat-dev
Example:
-module(test).
-export([convert/1]).
-include("exml/include/exml.hrl"). %% In my test
convert(XML) when erlang:is_binary(XML) ->
{ok, XMLEl} = exml:parse(XML),
jiffy:encode({[convert2(XMLEl)]}).
convert2(#xmlel{name = Name
,attrs = []
,children = [{xmlcdata, Data}]}) ->
{Name, Data};
convert2(#xmlel{name = Name
,attrs = Attrs
,children = Children}) ->
{Name, {convert_attrs(Attrs) ++ convert_children(Children)}}.
convert_attrs(Attrs) ->
convert_attrs(Attrs,[]).
convert_attrs([Attr|Attrs1], Attrs2) ->
convert_attrs(Attrs1, [convert_attr(Attr)|Attrs2]);
convert_attrs([], Attrs2) ->
lists:reverse(Attrs2).
convert_attr({Attr, Value}) ->
{<<$@, Attr/binary>>, Value}.
convert_children(Children) ->
convert_children(Children, []).
convert_children([Child|Children1], Children2) ->
convert_children(Children1, [convert_child(Child)|Children2]);
convert_children([], Children2) ->
lists:reverse(Children2).
convert_child({xmlcdata, Data}) ->
{<<"#text">>, Data};
convert_child(#xmlel{}=XMLEl) ->
convert2(XMLEl).
In the shell:
p@jahanbakhsh ~/Projects/test $ ls
exml jiffy test.erl
p@jahanbakhsh ~/Projects/test $ erl -pa jiffy/ebin exml/ebin
Erlang/OTP 19 [erts-8.2.2] [source-1ca84a4] [64-bit] [smp:4:4] [async-threads:10] [hipe] [kernel-poll:false]
Eshell V8.2.2 (abort with ^G)
1> c(test).
{ok,test}
2> XML = <<"<start><data><number id=\"333\">test message</number><data>current date</data></data><mass><client>35</client><address>lattitude</address><code>3454343</code><foo tipo=\"casa\">Some text message 2</foo><product>TEST</product></mass></start>">>.
<<"<start><data><number id=\"333\">test message</number><data>current date</data></data><mass><client>35</client><address"...>>
3> test:convert(XML).
<<"{\"start\":{\"data\":{\"number\":{\"@id\":\"333\",\"#text\":\"test message\"},\"data\":\"current date\"},\"mass\":{\"client\":\"35\",\"addres"...>>
4> io:format("~s~n", [test:convert(XML)]).
{"start":{"data":{"number":{"@id":"333","#text":"test message"},"data":"current date"},"mass":{"client":"35","address":"lattitude","code":"3454343","foo":{"@tipo":"casa","#text":"Some text message 2"},"product":"TEST"}}}
ok
5>
Upvotes: 2
Reputation: 2345
The output of your simple parsing is in a set format: {Node, Attributes, Children}
, so you can write a simple parser that turns that structure you have into a nested proplist. With that, you can either use mochijson or jsx to turn that proplist into a JSON string.
-module(transform).
-export([test/0]).
test() -> parse(data()).
parse({Node, [], [Value]}) when is_list(Value) ->
[{Node, Value}];
parse({Node, [], Children}) ->
V = children_to_struct(Children, []),
[{Node, V}];
parse({Node, Attributes, Children}) ->
V = attributes_to_struct(Attributes, []) ++ children_to_struct(Children, []),
[{Node, V}].
children_to_struct([], Acc) -> Acc;
children_to_struct([Value], Acc) when is_list(Value) ->
Acc ++ [{"#text", Value}];
children_to_struct([Value | T], Acc) when is_tuple(Value) ->
children_to_struct(T, Acc ++ parse(Value)).
attributes_to_struct([], Acc) -> Acc;
attributes_to_struct([{K, V}|T], Acc) ->
attributes_to_struct(T, Acc ++ [{"@" ++ K, V}]).
data() ->
{"start",[],
[{"data",[],
[{"number",[{"id","333"}],["test message"]},
{"data",[],["current date"]}]},
{"mass",[],
[{"client",[],["35"]},
{"address",[],["lattitude"]},
{"code",[],["3454343"]},
{"foo",[{"tipo","casa"}],["Some text message 2"]},
{"product",[],["TEST"]}]}]}.
Running it in the shell with mochijson:
Eshell V7.3 (abort with ^G)
1> c(transform).
{ok,transform}
2> T = transform:test().
[{"start",
[{"data",
[{"number",[{"@id","333"},{"#text","test message"}]},
{"data","current date"}]},
{"mass",
[{"client","35"},
{"address","lattitude"},
{"code","3454343"},
{"foo",[{"@tipo","casa"},{"#text","Some text message 2"}]},
{"product","TEST"}]}]}]
3>
4> iolist_to_binary(mochijson2:encode(T)).
<<"{\"start\":{\"data\":{\"number\":{\"@id\":[51,51,51],\"#text\":[116,101,115,116,32,109,101,115,115,97,103,101]},\"data\":{\"#text"...>>
Upvotes: 2