Reputation: 1
I'm working on a simple Pascal-like interpreter using PLY, but I'm encountering a Syntax error at ',' during parsing. The issue arises when trying to parse a source file that includes commas. Below is the code for the PLY code, as well as a sample of the source code I'm using that's causing the error.
Pascal.py
import ply.lex as lex
import ply.yacc as yacc
# Lexical Analysis
tokens = (
'PROGRAM', 'VAR', 'BEGIN', 'END', 'INTEGER', 'REAL', 'ASSIGN',
'SEMICOLON', 'COLON', 'COMMA', 'PLUS', 'DIVIDE', 'LPAREN', 'RPAREN',
'WRITELN', 'STRING', 'ID', 'NUMBER', 'DOT'
)
# Reserved words
reserved = {
'PROGRAM': 'PROGRAM',
'VAR': 'VAR',
'BEGIN': 'BEGIN',
'END': 'END',
'integer': 'INTEGER',
'real': 'REAL',
'WRITELN': 'WRITELN'
}
# Token definitions
t_ASSIGN = r':='
t_SEMICOLON = r';'
t_COLON = r':'
t_COMMA = r','
t_PLUS = r'\+'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_DOT = r'\.'
t_ignore = ' \t' # Ignore spaces and tabs
def t_STRING(t):
r"'[^']*'"
t.value = t.value[1:-1] # Remove surrounding quotes
return t
def t_ID(t):
r'[a-zA-Z_][a-zA-Z_0-9]*'
t.type = reserved.get(t.value, 'ID') # Check if it's a reserved word
return t
def t_NUMBER(t):
r'\d+(\.\d+)?'
t.value = float(t.value) if '.' in t.value else int(t.value)
return t
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
def t_error(t):
print(f"Illegal character '{t.value[0]}'")
t.lexer.skip(1)
lexer = lex.lex()
# Syntax Analysis
precedence = (
('left', 'PLUS', 'DIVIDE'),
)
def p_program(p):
'program : PROGRAM ID SEMICOLON declarations BEGIN statements END DOT'
interpret(p[5]) # Pass the parsed statements to the interpreter
def p_declarations(p):
'''declarations : VAR var_declaration
| empty'''
def p_var_declaration(p):
'''var_declaration : ID COLON type SEMICOLON var_declaration
| ID COLON type SEMICOLON'''
def p_type(p):
'''type : INTEGER
| REAL'''
def p_statements(p):
'''statements : statements statement SEMICOLON
| statement SEMICOLON'''
if len(p) == 4:
p[0] = p[1] + [p[2]]
else:
p[0] = [p[1]]
def p_statement(p):
'''statement : ID ASSIGN expression
| WRITELN LPAREN writeln_args RPAREN'''
if p[1] == 'WRITELN':
p[0] = ('WRITELN', p[3])
else:
p[0] = ('ASSIGN', p[1], p[3])
def p_writeln_args(p):
'''writeln_args : writeln_args COMMA writeln_arg
| writeln_arg'''
if len(p) == 4:
p[0] = p[1] + [p[3]]
else:
p[0] = [p[1]]
def p_writeln_arg(p):
'''writeln_arg : STRING
| expression'''
p[0] = p[1]
def p_expression(p):
'''expression : expression PLUS term
| expression DIVIDE term
| term'''
if len(p) == 4:
p[0] = (p[2], p[1], p[3])
else:
p[0] = p[1]
def p_term(p):
'''term : ID
| NUMBER'''
p[0] = p[1]
def p_empty(p):
'empty :'
p[0] = []
def p_error(p):
print(f"Syntax error at '{p.value}'" if p else "Syntax error at EOF")
parser = yacc.yacc()
# Interpreter
variables = {}
def evaluate(tree):
if isinstance(tree, (int, float)):
return tree
if isinstance(tree, str):
return variables.get(tree, 0)
if tree[0] == '+':
return evaluate(tree[1]) + evaluate(tree[2])
if tree[0] == '/':
return evaluate(tree[1]) / evaluate(tree[2])
return 0
def interpret(statements):
for stmt in statements:
if stmt[0] == 'ASSIGN':
variables[stmt[1]] = evaluate(stmt[2])
elif stmt[0] == 'WRITELN':
print(" ".join(str(evaluate(arg)) if not isinstance(arg, str) else arg for arg in stmt[1]))
# Run the Program
if __name__ == '__main__':
import sys
if len(sys.argv) < 2:
print("Usage: python Pascal.py <file>")
sys.exit(1)
with open(sys.argv[1], 'r') as file:
source = file.read()
lexer.input(source)
parser.parse(source)
Sample Source Code (SumAndAverage.pas)
PROGRAM SUMANDAVERAGE;
VAR num1,num2,num3: integer;
sum:integer;
avg:real;
BEGIN
num1:=10;
num2:=20;
num3:=30;
sum:=num1+num2+num3;
avg:=sum/3;
WRITELN('Num1 is ',num1);
WRITELN('Num2 is ',num2);
WRITELN('Num3 is ',num3);
WRITELN('Sum 3 numbers is ',sum);
WRITELN('Average is ',avg)
END.
Input and the expected Output
python Pascal.py SumAndAverage.pas
Num1 is 10
Num2 is 20
Num3 is 30
Sum 3 numbers is 60
Average is 2.0000000000000000E+001
I've reviewed my token definitions and grammar, but I can't figure out why the parser is having trouble with the comma in the WRITELN statement. Could someone help me figure out what's going wrong?
Upvotes: -1
Views: 66
Reputation: 170278
As mentioned in the comments: your p_var_declaration
only accepts one ID
:
def p_var_declaration(p):
'''var_declaration : ID COLON type SEMICOLON var_declaration
| ID COLON type SEMICOLON'''
change that into:
def p_var_declaration(p):
'''var_declaration : vars COLON type SEMICOLON var_declaration
| vars COLON type SEMICOLON'''
def p_vars(p):
'''vars : ID
| vars COMMA ID'''
Also, your last WRITELN('Average is ',avg)
is missing a semi-colon at the end.
Upvotes: 0