Reputation: 287
I found a web site [Python: Writing a Compiler and Interpreter in 160 lines of code] which implement a simple interpreter, i'm very impressed by the code and want to improve it, below is the modified version of the source:
#****************** Grammer ************************
# stmtlist := (statement)*
#
# statement := 'if' condition ('{')? stmtlist ['else' stmtlist] '}'
# | 'while' condition ('{')? stmtlist '}'
# | variable '=' expression
# | variable '+=' expression
# | variable '-=' expression
# | variable '*=' expression
# | variable '/=' expression
# | 'print' expression
#
# condition := expression ('=='|'!='|'>'|'<'|'>='|'<=') expression
#
# expression := term ('+'|'-' term)*
#
# term := factor ('*'|'/' factor)*
#
# factor := variable|digit
#****************** Lexer ************************
tokenlist = []
currtoken = ("", "", 0) # token, identifier name, value
keywords = set(["while", "if", "else", "print",
"=", "==", "!=", ">", ">=", "<", "<=",
"+", "-", "*", "/", "+=", "-=", "*=", "/=",
"{", "}"]) # '}' end 'while' and 'if'
symboltable = dict()
def nextToken():
global currtoken, symboltable
if(len(tokenlist) > 0):
s = tokenlist.pop(0)
if s in keywords:
currtoken = (s, "", 0)
elif s.isdigit():
#currtoken = ("digit", "", int(s))
currtoken = ("digit", "", float(s))
elif s.isalnum():
symboltable[s] = 0
currtoken = ("variable", s, 0)
else:
print("syntax error: " + s)
else:
currtoken = ("", "", 0)
def consume(expected):
if currtoken[0] == expected:
nextToken()
else:
print("expected " + expected + " not found")
#****************** Parser ************************
def parseFile(filename):
inputfile = open(filename, "r")
inputstring = inputfile.read()
global tokenlist
tokenlist = inputstring.split()
nextToken()
pgm = doStatementList()
return execStatementList(pgm)
def doStatementList():
stmts = []
newstmt = []
while currtoken[0] in ["while", "if", "print", "variable"]:
if currtoken[0] == "while":
# ["while", [condition], ['{'] [statementlist], '}']
consume("while")
newstmt = ["while"]
newstmt.append(doCondition())
if currtoken[0] == "{":
consume("{")
newstmt.append(doStatementList())
#consume("endwhile")
consume("}")
elif currtoken[0] == "if":
# ['if' [condition] [statementlist] ['{'] [['else' statementlist]] '}']
consume("if")
newstmt = ["if"]
newstmt.append(doCondition())
if currtoken[0] == "{":
consume("{")
newstmt.append(doStatementList())
if currtoken[0] == "else":
consume("else")
newstmt.append(doStatementList())
#consume("endif")
consume("}")
elif currtoken[0] == "print":
# ["print", [expression]]
consume("print")
newstmt = ["print"]
newstmt.append(doExpression())
elif currtoken[0] == "variable":
# ["=", [variable], [expression]]
variable = [currtoken[1]]
nextToken()
if currtoken[0] == "=":
consume("=")
newstmt = ["="]
elif currtoken[0] == "+=":
consume("+=")
newstmt = ["+="]
elif currtoken[0] == "-=":
consume("-=")
newstmt = ["-="]
elif currtoken[0] == "*=":
consume("*=")
newstmt = ["*="]
elif currtoken[0] == "/=":
consume("/=")
newstmt = ["/="]
newstmt.append(variable)
newstmt.append(doExpression())
else:
print("invalid statement: " + currtoken[0])
stmts.append(newstmt)
return stmts
def doCondition():
exp = doExpression()
# ["==|!=|>|<|>=|<=", [left side], [right side]]
if currtoken[0] in ["==", "!=", ">", ">=", "<", "<="]:
retval = [currtoken[0]]
retval.append(exp)
nextToken()
retval.append(doExpression())
else:
print("expected == or != not found")
return retval
def doExpression():
term = doTerm()
# carry the term in case there's no +|-
exp = term
# ["+|-", [left side], [right side]]
while currtoken[0] in ["+", "-"]:
exp = [currtoken[0]]
nextToken()
exp.append(doExpression())
exp.append(term)
print("exp=", exp)
return exp
def doTerm():
factor = doFactor()
term = factor
# ["*|/", [left side], [right side]]
while currtoken[0] in ["*", "/"]:
term = [currtoken[0]]
nextToken()
term.append(doTerm())
term.append(factor)
print("term=", term)
return term
def doFactor():
if currtoken[0] == "variable":
retval = currtoken[1]
nextToken()
elif currtoken[0] == "digit":
retval = currtoken[2]
nextToken()
return [retval]
#****************** Interpreter ************************
stack = []
def execStatementList(pgm):
for stmt in pgm:
execStatement(stmt)
def execStatement(stmt):
if stmt[0] == "while":
# ["while", [condition], [statementlist]]
execCondition(stmt[1])
while stack.pop():
execStatementList(stmt[2])
execCondition(stmt[1])
elif stmt[0] == "if":
# ['if' [condition] [statementlist] [['else' statementlist]] 'endif']
execCondition(stmt[1])
if stack.pop():
execStatementList(stmt[2])
elif len(stmt) == 4:
execStatementList(stmt[3])
elif stmt[0] == "=":
execExpression(stmt[2])
symboltable[stmt[1][0]] = stack.pop()
elif stmt[0] == "+=":
execExpression(stmt[2])
symboltable[stmt[1][0]] = symboltable[stmt[1][0]] + stack.pop()
elif stmt[0] == "-=":
execExpression(stmt[2])
symboltable[stmt[1][0]] = symboltable[stmt[1][0]] - stack.pop()
elif stmt[0] == "*=":
execExpression(stmt[2])
symboltable[stmt[1][0]] = symboltable[stmt[1][0]] * stack.pop()
elif stmt[0] == "/=":
execExpression(stmt[2])
symboltable[stmt[1][0]] = symboltable[stmt[1][0]] / stack.pop()
elif stmt[0] == "print":
execExpression(stmt[1])
print("output:" + str(stack.pop()))
else:
print("invalid statement")
def execCondition(cond):
# ["==|!=|>|<|>=|<=", [left side], [right side]]
execExpression(cond[1])
execExpression(cond[2])
a = stack.pop()
b = stack.pop()
if cond[0] == "==":
stack.append(a == b)
elif cond[0] == "!=":
stack.append(a != b)
elif cond[0] == ">":
stack.append(b > a)
elif cond[0] == ">=":
stack.append(b >= a)
elif cond[0] == "<":
stack.append(b < a)
elif cond[0] == "<=":
stack.append(b <= a)
def execExpression(exp):
if len(exp) == 3:
execExpression(exp[1])
execExpression(exp[2])
if exp[0] == "+":
a = stack.pop()
b = stack.pop()
stack.append(a + b)
elif exp[0] == "-":
a = stack.pop()
b = stack.pop()
stack.append(a - b)
if exp[0] == "*":
a = stack.pop()
b = stack.pop()
stack.append(a * b)
elif exp[0] == "/":
a = stack.pop()
b = stack.pop()
stack.append(a / b)
else:
#if type(exp[0]) == int:
if type(exp[0]) == float:
stack.append(exp[0])
else:
stack.append(symboltable[exp[0]])
if __name__ == "__main__":
parseFile("./simpleScript.txt")
And below is the simpleScript.txt
file:
d = 2 + 5 * 3 - 2 * 4 + 5
print d
The expected result should be 14.0
, but i got 4.0
. I know the problem is in function doExpression
and doTerm
, but i cannot figure it out how to correct it, anyone please help me.
Upvotes: 0
Views: 68
Reputation: 1686
The only idea, I got right now consists of doing of special case for -
. What I mean is that you can consider it as a +
, if you multiple by -1
the expression inside of it (you have to do it in the next doExpression
, so i'm passing a boolean).
Change your doExpression()
for (I'm sorry it's a bit ugly, maybe someone will propose an other solution):
def doExpression(_b = True):
term = doTerm()
if _b == False:
tp = ['*']
tp.append([-1.0])
tp.append(term)
term = tp
# carry the term in case there's no +|-
exp = term
# ["+|-", [left side], [right side]]
while currtoken[0] in ["+", "-"]:
exp = ['+']
if currtoken[0] == '+':
nextToken()
exp.append(doExpression())
else: # '-'
nextToken()
exp.append(doExpression(_b = False))
exp.append(term)
print("exp=", exp)
return exp
Upvotes: 1