Rob
Rob

Reputation: 11487

Javascript Parse Nested String Functions with parameters

I am trying to parse this string into a organized set of functions:

var str = "a(b, c(e, f(h,i,j), g(k,l,m(o,p,q)) ), d(r,s,t))"

Ideally I would like to turn it into an object like this:

var obj = {
    func:'a',
    params:[
        {p:'b'},
        {p: {
            func:'c',
            params:[
                {
                    p:'e',
                    p:{
                        func:'f',
                        params:[
                            {p:'h'},
                            {p:'i'},
                            {p:'j'}
                        ]
                    },
                    p:'g',
                    params:[
                        {p:'k'},
                        {p:'l'},
                        {p:{
                            func:'m',
                            params:[
                                {p:'o'},
                                {p:'p'},
                                {p:'q'}
                            ]
                        }}
                    ]
                }
            ]
        }},
        {
            p:'d',
            params:[
                {p:'r'},
                {p:'s'},
                {p:'t'}
            ]
        }
    ]
}

I have tried about 8 hours of mixed str.replace() str.substring(), and str.indexOf() and not had any luck.

Any help about how to go about achieving my goal would be appreocated.

note: the functions could take any number params and is not set to 3

UPDATE -- I stopped trying to do string manipulation and approached it character by character. To create desired output:

var str = "a(b, c(e, f(h,i,j), g(k,l,m(o,p,q)) ), d(r,s,t))";
str = str.replace('/ /g,""');
var strArr = str.split('');
var firstPass = "";
var final;
var buildObj = function(){
for(var i = 0; i < strArr.length; i++){
    var letters = /^[0-9a-zA-Z]+$/;

    if(strArr[i].match(letters)){
        if(strArr[i + 1] == '('){
            firstPass += '},{"func":' + '"' + strArr[i] + '"';
        } else {
            firstPass += '"' + strArr[i] + '"';
        }

    }
    if(strArr[i] == '('){
        firstPass += ',"params":[{"p":';
    }
    if(strArr[i] == ')'){
        firstPass += '}],';
    }
    if(strArr[i] == ','){
        firstPass += '},{"p":';
    }

    //console.log(job + '}')
}

var secondPass = firstPass;
secondPass += '}'
secondPass = secondPass.replace(/,{"p":}/g,'');
secondPass = secondPass.replace('},','');
secondPass = secondPass.replace(/],}/g,']}');
final = secondPass
console.log(final)
console.log(JSON.parse(final))

};

Upvotes: 2

Views: 1324

Answers (3)

inf3rno
inf3rno

Reputation: 26137

You cannot use the same property name for the 3 values, so you cannot do this.

        func:'c',
        params:[
            {
                p:'e',
                p:{
                    func:'f',
                    params:[
                        {p:'h'},
                        {p:'i'},
                        {p:'j'}
                    ]
                },
                p:'g',

If we remove this part and fix other inconsistent parts of your example (at least try to write an example which is not a failure on its own), it's relative easy to transform your code into javascript with eval:

parser:

var parse = function (str) {

    var compiled = str.replace(/(\w+)\s*(\W)/g, function (match, name, token) {
        if (token == "(")
            return "q(\"" + name + "\",";
        else
            return "p(\"" + name + "\")" + token;
    }).replace(/,\s*\)/g, ")");

    function q(name) {
        return {
            p: {
                func: name,
                params: Array.prototype.slice.call(arguments, 1)
            }
        };
    }

    function p(name) {
        return {
            p: name
        };
    }

    var f = eval("(function (){return " + compiled + ";})");

    return f().p;
};

test:

describe("x", function () {

    it("y", function () {

        var str = "a(b, c(e), d(r,s,t))";


        var obj = {
            func: 'a',
            params: [
                {p: "b"},
                {
                    p: {
                        func: 'c',
                        params: [
                            {
                                p: 'e'
                            }
                        ]
                    }
                },
                {
                    p: {
                        func: 'd',
                        params: [
                            {p: 'r'},
                            {p: 's'},
                            {p: 't'}
                        ]
                    }
                }
            ]
        };

        expect(parse(str)).toEqual(obj);

    });

});

note:

I agree with Ira Baxter, you have to read more about how to do this in general.

Upvotes: 0

Ira Baxter
Ira Baxter

Reputation: 95392

Regular expressions and string hacking isn't going to work; regexes cannot handle (directly) any text with nested structures (people keep learning this...). Switching to single characters doesn't improve anything.

Classically what you want is a lexer that produces tokens (language elements) and a parser (that checks the elements are organized properly).

As a practical matter, you can combine these into one coherent structure for simple languages like the one that interests OP. Check out this SO answer on how to build a recursive descent parser easily; follow that answer to one that tells how to build a tree (in essence, how to build the result structure you want).

Upvotes: 3

irysius
irysius

Reputation: 600

I saw the problem, and thought it might be interesting to try. I'll walk through my thought process and hope that helps you.

The object I produce does not entirely map to yours, but could easily be. It was easier to end up with the object I produced without additional work without getting distracted by "extraneous" details like putting things in an array.

1.) I'm assuming whitespace is useless. The first step was to replace all whitespace with nothing.

function processStatement(statement) {
    return statement.replace(/[ ]/g, '');
}
// Output: a(b,c(e,f(h,i,j),g(k,l,m(o,p,q))),d(r,s,t))

2.) I proceeded with the goal of creating a tree like object, with parameters that do not lead into more functions as dead ends. I needed a way to parse "roots", the code should explain more:

function produceNodeFromStatement(statement) {
    var regex = new RegExp('([a-zA-Z])\\((.+)\\)', 'g');
    var results = regex.exec(statement);
    // This regex matches for the widest pattern: identifier(stuff-inside)
    // Running the previous output would result in matching groups of:
    // identifier: a
    // stuff-inside: b,c(e,f(h,i,j),g(k,l,m(o,p,q))),d(r,s,t)

    var root = {}
    // We need a way to split the stuff-inside by commas that are not enclosed in parenthesis.
    // We want to turn stuff-inside into the following array:
    // [ 'b', 'c(e,f(h,i,j),g(k,l,m(o,p,q)))', 'd(r,s,t)' ]
    // Since my regex-fu is bad, I wrote a function to do this, explained in the next step.
    var parameters = splitStatementByExternalCommas(results[2]);

    var node = {};
    parameters.forEach(function (parameter) {
        if (parameter.indexOf('(') == -1) {
            node[parameter] = null;
        } else {
            // Recursion. This function produces an anonymous wrapper object around a node.
            // I will need to unwrap my result.
            var wrappedNode = deconstructStatement(parameter);
            var key;
            for (key in wrappedNode) {
                node[key] = wrappedNode[key];
            }
        }
    });

    // Assign node to the node's identifier
    root[results[1]] = node;
    return root;
}

3.) The only missing piece to the formula is the function that splits a string by only external commas - since I can't figure out a regex, here's splitStatementByExternalCommas.

function splitStatementByExternalCommas(statement) {
    statement += ','; // so I don't have to handle the "last, leftover parameter"
    var results = [];
    var chars = statement.split('');
    var level = 0; // levels of parenthesis, used to track how deep I am in ().
    var index = 0; // determines which parameter am I currently on.
    var temp = '';

    // this is somewhat like your implementation in the edits, I walk through the characters one by one, and do something extra if it's a special character.
    chars.forEach(function (char) {
        switch (char) {
            case '(':
                temp += char;
                level++;
                break;
            case ')':
                temp += char;
                level--;
                break;
            case ',':
                // if the comma is between a set of parenthesis, ignore.
                if (level !== 0) { temp += char; }
                // if the comma is external, split the string.
                else { results[index] = temp; temp = ''; index++; }
                break;
            default:
                temp += char;
                break;
        }
    });
    return results;
}

4.) Putting everything together, to turn your string statement into an intermediate object:

var str = "a(b, c(e, f(h,i,j), g(k,l,m(o,p,q)) ), d(r,s,t))";
str = processStatement(str);
var root = produceNodeFromStatement(str);
// Output is something like:

{ 
  a: { 
       b: null,
       c: {
          e: null,
          f: { h: null, i: null, j: null },
          g: {
              k: null, l: null,
              m: { o: null, p: null, q: null }
          }
       },
       d: { r: null, s: null, t: null }
  }
}

5.) I'm going to assume mapping this intermediate object to your intended target is straightforward from here on out?

Upvotes: 0

Related Questions