Reputation: 2298
I have an AST generated by a parsing expression grammar from a target language that will compile to a source language by traversing its nodes. A simple source
like (10 + 20) * 2
should generate the following representation, as a native ECMAScript object:
var ast = {
"type": "Stmt",
"body": [
{
"type": "Expr",
"expression": {
"type": "BinaryExpr",
"operator": "*",
"left": {
"type": "BinaryExpr",
"operator": "+",
"left": {
"type": "Literal",
"value": 10
},
"right": {
"type": "Literal",
"value": 20
}
},
"right": {
"type": "Literal",
"value": 2
}
}
}
]
};
The generated object clearly defines the precedence of the operators, and evaluating this source is pretty easy, however, generating code back from it is a pretty complex task when you have to deal with the parenthesis solving.
When generating code by traversing nodes, the precedence is completely lost. I have a function called visitor
, which is the entry point of the program:
function visitor(node) {
switch (node.type) {
case "Stmt":
return parseStmt(node.body);
}
}
This simple grammar can accept multiple statements...
function parseStmt(body) {
var stmtList = Array(body.length);
for (var i = 0, len = body.length; i < len; i++) {
stmtList[i] = (function(stmt) {
switch (stmt.type) {
case "Expr":
return parseExpr(stmt.expression);
}
})(body[i]);
}
return stmtList.join(";\n");
}
... and two types of expressions:
function parseExpr(expr) {
switch (expr.type) {
case "BinaryExpr":
return parseBinaryExpr(expr);
case "Literal":
return parseLiteral(expr);
}
}
Where Literal
just deals with string transformation...
function parseLiteral(expr) {
return expr.value.toString();
}
... and BinaryExpr
is ambiguous when solving precedence:
function parseBinaryExpr(expr) {
var op = {
left: parseExpr(expr.left),
right: parseExpr(expr.right)
};
switch (expr.operator) {
case "+":
return Codegen.OP_ADD(op.left, op.right);
case "*":
return Codegen.OP_MUL(op.left, op.right);
}
}
Only two math operations are here supported, and the code generation happens here:
var Codegen = {
OP_ADD: function(left, right) {
return left + " + " + right;
},
OP_MUL: function(left, right) {
return left + " * " + right;
}
};
When calling visitor(ast)
back, I get 10 + 20 * 2
, which would eval to 10 + (20 * 2)
instead of (10 + 20) * 2
, and inserting parenthesis in each side of the binary expression would be a ridiculous workaround: (10 + 20) * 2
where:
function parseBinaryExpr(expr) {
var op = {
left: "(" + parseExpr(expr.left) + ")",
right: "(" + parseExpr(expr.right) + ")"
};
...
How could this ambiguity be solved in a good way?
Upvotes: 1
Views: 1526
Reputation: 1189
Wouldn't a simple precedence table and looking up at the parent expression solve it?
Also, there were a little bug in the switch.
var ast = {
"type": "Stmt",
"body": [
{
"type": "Expr",
"expression": {
"type": "BinaryExpr",
"operator": "*",
"left": {
"type": "BinaryExpr",
"operator": "+",
"left": {
"type": "Literal",
"value": 10
},
"right": {
"type": "Literal",
"value": 20
}
},
"right": {
"type": "Literal",
"value": 2
}
}
}
]
};
var precedence = { "*": 0, "+": 1 };
var Codegen = {
OP_ADD: function(left, right) {
return left + " + " + right;
},
OP_MUL: function(left, right) {
return left + " * " + right;
}
};
function visitor(node) {
switch (node.type) {
case "Stmt":
return parseStmt(node.body);
}
}
function parseStmt(body) {
var stmtList = Array(body.length);
for (var i = 0, len = body.length; i < len; i++) {
stmtList[i] = (function(stmt) {
switch (stmt.type) {
case "Expr":
return parseExpr(stmt.expression, null);
}
})(body[i]);
}
return stmtList.join(";\n");
}
function parseExpr(expr, parent) {
switch (expr.type) {
case "BinaryExpr":
return parseBinaryExpr(expr, parent);
case "Literal":
return parseLiteral(expr);
}
}
function parseLiteral(expr) {
return expr.value.toString();
}
function parseBinaryExpr(expr, parent) {
var op = {
left: parseExpr(expr.left, expr),
right: parseExpr(expr.right, expr)
};
var ret = "";
switch (expr.operator) {
case "+":
ret = Codegen.OP_ADD(op.left, op.right);
break;
case "*":
ret = Codegen.OP_MUL(op.left, op.right);
break;
}
if (parent && precedence[expr.operator] > precedence[parent.operator]) {
ret = "(" + ret + ")";
}
return ret;
}
visitor(ast);
Or you can always put a parenthesis if there's a nested binary expression inside another, that would do the trick too.
if (parent) {
ret = "(" + ret + ")";
}
Just check parent because we only pass the parent if we already are inside a binary expression.
Upvotes: 2
Reputation: 241731
I would add the parentheses in CodeGen
rather than in ParseBinaryExpr
:
var Codegen = {
OP_ADD: function(left, right) {
return "(" + left + " + " + right + ")";
},
OP_MUL: function(left, right) {
return "(" + left + " * " + right + ")";
}
};
That will result in fewer redundant parentheses, although you will still end up with a lot of parentheses. On the positive side, there is no doubt that the resulting expression corresponds to the AST. (You'd need to add parentheses in the code gen for unary operators as well, by the way.)
It is possible to avoid all redundant parentheses by checking operator precedence in ParseBinaryExpr
-- that is, you surround an argument with parentheses only if its precedence is less than the precedence of the operator of the binary expression -- but that's easy to get wrong and leads to subtle bugs.
Upvotes: 0