richwol
richwol

Reputation: 1165

Parsing a string into an object (custom format)

I have the following example queries which represent objects of varying complexity:

abc,def,ghi

abc,\def,ghi

abc,\def{ghi,jkl},mno

abc,\def{ghi,\jkl},mno

abc,\def{ghi,\jkl{mno,pqr,\stu},vwx

In the above examples I've just used 3 letters of the alphabet to represent a word. This word could be of any length.

There's two different types of words. If a word starts with a back slash it is known as an edge. If it doesn't it is known as a field.

Fields and edges can belong to edges. These are defined within edge{ }. This is infinitely recursive, so words can belong to edges which belong to edges which belong to edges etc..

I want to parse this string to convert it into a JavaScript object.

It's hard to explain, so an example is probably better..

For example 1 this should convert to:

[
    {
        type: "field",
        val: "abc"
    },
    {
        type: "field",
        val: "def"
    },
    {
        type: "field",
        val: "ghi"
    },
]

For example 2 this should convert to:

[
    {
        type: "field",
        val: "abc"
    },
    {
        type: "edge",
        val: "def",
        children: []
    },
    {
        type: "field",
        val: "ghi"
    }
]

For example 5 this should convert to:

[
    {
        type: "field",
        val: "abc"
    },
    {
        type: "edge",
        val: "def",
        children: [
            {
                type: "field",
                val: "ghi"
            },
            {
                type: "edge",
                val: "jkl",
                children: [
                    {
                        type: "field",
                        val: "mno"
                    },
                    {
                        type: "field",
                        val: "pqr"
                    },
                    {
                        type: "edge",
                        val: "stu",
                        children: []
                    }
                ]
            }
        ]
    },
    {
        type: "field",
        val: "vwx"
    }
]

To do this I presume it'll have to crawl through the string character by character, working out what it's currently looking at and building/traversing the object as it goes. Maybe there's some sort of recursive regular expression I could use?

Any thoughts on how this would best be done? Any ideas, concepts etc would be hugely appreciated.

Upvotes: 2

Views: 103

Answers (3)

1983
1983

Reputation: 5963

I'd suggest writing a grammar and implementing a recursive descent parser. They're pretty straightforward.

var parse = function(s){

    var i = 0;

    var peek = function(c){
        return s[i] === c;
    };

    var next = function(c){
        var found = s[i];
        if(c && s[i] !== c){
            throw 'expected ' + c + ' at char ' + i + ': ' + s.slice(i,20);
        }
        i += 1;
        return found;
    };

    var word = function(){
        var w = '';
        if(!/[a-z]/.test(s[i])){
            throw 'expected a word at char ' + i + ': ' + s.slice(i,20);
        }
        while(s[i] && /[a-z]/.test(s[i])){
            w += s[i];
            i += 1;
        }
        return w;
    };

    var edge = function(){
        next('\\');
        var val = word();
        var e = {
            type: 'edge',
            val: val
        };
        if(peek('{')){
            next('{');
            e.children = fields_or_edges();
            next('}');
        }
        return e;
    };

    var field = function(){
        return {
            type: 'field',
            val: word()
        };
    };

    var fields_or_edges = function(){
        var stuff = [];
        var thing;
        do {
            if(peek('\\')){
                thing = edge();
            }else{
                thing = field();
            }
            stuff.push(thing);
        }while(peek(',') && next());
        return stuff;
    };

    return fields_or_edges();
};

Let's test it.

[
    'abc,def,ghi',
    'abc,\\def,ghi',
    'abc,\\def{ghi,jkl},mno',
    'abc,\\def{ghi,\\jkl},mno',
    'abc,\\def{ghi,\\jkl{mno,pqr,\\stu},vwx' // uncaught exception: expected } at char 35: 
].forEach(function(s){
    console.log(JSON.stringify(parse(s), null, 4));
});

Note that if you run this, the parser picks up a an error at char 35 in the final test string: no closing '}'.

Upvotes: 1

XAMPPRocky
XAMPPRocky

Reputation: 3599

Here's a single recursive function. The only real problem is that this will not work for high code point characters.

function parse (string) {
    var array = []
      , children = ''
      , recursive = false
      , level = 0
      , object = { type: 'field', val: ''}
    for (var i = 0; i < string.length; i++) {
        var ch = string.charAt(i)
        if (recursive) {
            if (ch === '}' && level === 0) {
                object.children = parse(children)
                array.push(object)
                object = {type: 'field', val: ''}
                recursive = false
                continue
            }else if (ch === '}') {
                level--
                children += ch 
            } else if (ch === '{') {
                level++
                children += ch 
            } else {
                children += ch
                continue
            }
        }

        if (ch === ',' && object.val.length > 0) {
            if (object.type == 'edge') {
                object.children = []
                array.push(object)
                object = {type: 'field', val: ''}
            } else {
                array.push(object)
                object = {type: 'field', val: ''}
            }
        } else if (ch === '{') {
            recursive = true
            continue
        } else if (ch === '\u005C') {
            object.type = 'edge'
        } else {
            object.val += ch
        }
    }

    if (object.val.length > 0) {
        array.push(object)
    }
    return array
}

Upvotes: 1

AntoineWDG
AntoineWDG

Reputation: 549

Here is how I would do it without regexp:

function parseArray(s) {
    var w = "", i = 0, c, array = [];
    for (i = 0; i < s.length; i++) {
        c = s.atChar(i);
        if (c == ",") {
            array.push(parseObject(w));
        } else {
            w += c;
        }

    }
}

function parseObject(s) {
    if (s.length === 0) {
        return null;
    }

    if (s.atChar(0) !== "\\") {
        return {
            type: "field",
            value: w
        };
    }
    var v, a, c, status = 0;
    for (var i = 1; i < s.length; i++) {
        c = s.atChar(i);
        if (status === 0) {
            if (c === "{") {
                status = 1;
            } else {
                v += c;
            }
        } else {
            if (c === "}") {
                break;
            }
            a += c;
        }
    }

    return {
        type: "edge",
        value: v,
        children: parseArray(a)
    };

}

There might be some javascript mistakes, there a still invalid queries to be dealt with, but at thinks it's a good start. I've got no idea about recursive regexp, but it is not that long to write such a function.

Upvotes: 0

Related Questions