Eugen Dubrovin
Eugen Dubrovin

Reputation: 906

How to convert XML strings to JSON in Erlang?

I want to parse XML strings to erlang list and then to JSON.

Example input:

<?xml version="1.0" encoding="UTF-8"?>
<!--some message here-->
<start>
   <data>
      <number id="333">test message</number>
      <data>current date</data>
   </data>
   <mass>
      <client>35</client>
      <address>lattitude</address>
      <code>3454343</code>
      <foo tipo="casa">Some text message 2</foo>
      <product>TEST</product>
   </mass>
</start>

Output should be:

{
  "start": {
    "data": {
      "number": {
        "@id": "333",
        "#text": "test message"
      },
      "data": "current date"
    },
    "mass": {
      "client": "35",
      "address": "lattitude",
      "code": "3454343",
      "foo": {
        "@tipo": "casa",
        "#text": "Some text message 2"
      },
      "product": "TEST"
    }
  }
}

I am trying to use erlsom:simple_form(Xml).

and getting :

{ok,{"start",[],
     [{"data",[],
       [{"number",[{"id","333"}],["test message"]},
        {"data",[],["current date"]}]},
      {"mass",[],
       [{"client",[],["35"]},
        {"address",[],["lattitude"]},
        {"code",[],["3454343"]},
        {"foo",[{"tipo","casa"}],["Some text message 2"]},
        {"product",[],["TEST"]}]}]},
    []}

Now I want to delete these empty attrs. Is there any simple way to do this? thanks in advance.

UPDATE: Make it work w/ solution from Erlang, converting xml to tuples and lists

BUT Getting

{"start",
 [{"data",
   [{"number","test message"},{"data","current date"}]},
  {"mass",
   [{"client","35"},
    {"address","lattitude"},
    {"code","3454343"},
    {"foo","Some text message 2"},
    {"product","TEST"}]}]}

there is no [{"id","333"}] and [{"tipo","casa"}] lists.

Upvotes: 1

Views: 1004

Answers (2)

Pouriya
Pouriya

Reputation: 1626

i suggest to use jiffy for JSON and exml for XML. jiffy and exml have native code which means they are so fast.

Clone and compile them.
before compiling them, you should install g++ and libexpat-dev

Example:

-module(test).
-export([convert/1]).
-include("exml/include/exml.hrl"). %% In my test





convert(XML) when erlang:is_binary(XML) ->
    {ok, XMLEl} = exml:parse(XML),
    jiffy:encode({[convert2(XMLEl)]}).





convert2(#xmlel{name = Name
              ,attrs = []
              ,children = [{xmlcdata, Data}]}) ->
    {Name, Data};
convert2(#xmlel{name = Name
              ,attrs = Attrs
              ,children = Children}) ->
    {Name,  {convert_attrs(Attrs) ++ convert_children(Children)}}.





convert_attrs(Attrs) ->
    convert_attrs(Attrs,[]).

convert_attrs([Attr|Attrs1], Attrs2) ->
    convert_attrs(Attrs1, [convert_attr(Attr)|Attrs2]);
convert_attrs([], Attrs2) ->
    lists:reverse(Attrs2).





convert_attr({Attr, Value}) ->
    {<<$@, Attr/binary>>, Value}.





convert_children(Children) ->
    convert_children(Children, []).

convert_children([Child|Children1], Children2) ->
    convert_children(Children1, [convert_child(Child)|Children2]);
convert_children([], Children2) ->
    lists:reverse(Children2).





convert_child({xmlcdata, Data}) ->
    {<<"#text">>, Data};
convert_child(#xmlel{}=XMLEl) ->
    convert2(XMLEl).

In the shell:

p@jahanbakhsh ~/Projects/test $ ls
exml                      jiffy                   test.erl

p@jahanbakhsh ~/Projects/test $ erl -pa jiffy/ebin exml/ebin
Erlang/OTP 19 [erts-8.2.2] [source-1ca84a4] [64-bit] [smp:4:4] [async-threads:10] [hipe] [kernel-poll:false]

Eshell V8.2.2  (abort with ^G)
1> c(test).
{ok,test}
2> XML = <<"<start><data><number id=\"333\">test message</number><data>current date</data></data><mass><client>35</client><address>lattitude</address><code>3454343</code><foo tipo=\"casa\">Some text message 2</foo><product>TEST</product></mass></start>">>.
<<"<start><data><number id=\"333\">test message</number><data>current date</data></data><mass><client>35</client><address"...>>

3> test:convert(XML).
<<"{\"start\":{\"data\":{\"number\":{\"@id\":\"333\",\"#text\":\"test message\"},\"data\":\"current date\"},\"mass\":{\"client\":\"35\",\"addres"...>>

4> io:format("~s~n", [test:convert(XML)]).
{"start":{"data":{"number":{"@id":"333","#text":"test message"},"data":"current date"},"mass":{"client":"35","address":"lattitude","code":"3454343","foo":{"@tipo":"casa","#text":"Some text message 2"},"product":"TEST"}}}
ok
5>

Upvotes: 2

M&#225;t&#233;
M&#225;t&#233;

Reputation: 2345

The output of your simple parsing is in a set format: {Node, Attributes, Children}, so you can write a simple parser that turns that structure you have into a nested proplist. With that, you can either use mochijson or jsx to turn that proplist into a JSON string.

-module(transform).

-export([test/0]).

test() -> parse(data()).

parse({Node, [], [Value]}) when is_list(Value) ->
    [{Node, Value}];
parse({Node, [], Children}) ->
    V = children_to_struct(Children, []),
    [{Node, V}];
parse({Node, Attributes, Children}) ->
    V = attributes_to_struct(Attributes, []) ++ children_to_struct(Children, []),
    [{Node, V}].

children_to_struct([], Acc) -> Acc;
children_to_struct([Value], Acc) when is_list(Value) ->
    Acc ++ [{"#text", Value}];
children_to_struct([Value | T], Acc) when is_tuple(Value) ->
    children_to_struct(T, Acc ++ parse(Value)).

attributes_to_struct([], Acc) -> Acc;
attributes_to_struct([{K, V}|T], Acc) ->
    attributes_to_struct(T, Acc ++ [{"@" ++ K, V}]).

data() ->
    {"start",[],
     [{"data",[],
       [{"number",[{"id","333"}],["test message"]},
        {"data",[],["current date"]}]},
      {"mass",[],
       [{"client",[],["35"]},
        {"address",[],["lattitude"]},
        {"code",[],["3454343"]},
        {"foo",[{"tipo","casa"}],["Some text message 2"]},
        {"product",[],["TEST"]}]}]}.

Running it in the shell with mochijson:

Eshell V7.3  (abort with ^G)
1> c(transform).
{ok,transform}
2> T = transform:test().
[{"start",
  [{"data",
    [{"number",[{"@id","333"},{"#text","test message"}]},
     {"data","current date"}]},
   {"mass",
    [{"client","35"},
     {"address","lattitude"},
     {"code","3454343"},
     {"foo",[{"@tipo","casa"},{"#text","Some text message 2"}]},
     {"product","TEST"}]}]}]
3> 
4> iolist_to_binary(mochijson2:encode(T)).
<<"{\"start\":{\"data\":{\"number\":{\"@id\":[51,51,51],\"#text\":[116,101,115,116,32,109,101,115,115,97,103,101]},\"data\":{\"#text"...>>

Upvotes: 2

Related Questions