Juju
Juju

Reputation: 1

Syntax error at ',' in PLY Pascal-like interpreter

I'm working on a simple Pascal-like interpreter using PLY, but I'm encountering a Syntax error at ',' during parsing. The issue arises when trying to parse a source file that includes commas. Below is the code for the PLY code, as well as a sample of the source code I'm using that's causing the error.

Pascal.py

import ply.lex as lex
import ply.yacc as yacc

# Lexical Analysis
tokens = (
    'PROGRAM', 'VAR', 'BEGIN', 'END', 'INTEGER', 'REAL', 'ASSIGN',
    'SEMICOLON', 'COLON', 'COMMA', 'PLUS', 'DIVIDE', 'LPAREN', 'RPAREN',
    'WRITELN', 'STRING', 'ID', 'NUMBER', 'DOT'
)

# Reserved words
reserved = {
    'PROGRAM': 'PROGRAM',
    'VAR': 'VAR',
    'BEGIN': 'BEGIN',
    'END': 'END',
    'integer': 'INTEGER',
    'real': 'REAL',
    'WRITELN': 'WRITELN'
}

# Token definitions
t_ASSIGN = r':='
t_SEMICOLON = r';'
t_COLON = r':'
t_COMMA = r','
t_PLUS = r'\+'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_DOT = r'\.'
t_ignore = ' \t'  # Ignore spaces and tabs

def t_STRING(t):
    r"'[^']*'"
    t.value = t.value[1:-1]  # Remove surrounding quotes
    return t

def t_ID(t):
    r'[a-zA-Z_][a-zA-Z_0-9]*'
    t.type = reserved.get(t.value, 'ID')  # Check if it's a reserved word
    return t

def t_NUMBER(t):
    r'\d+(\.\d+)?'
    t.value = float(t.value) if '.' in t.value else int(t.value)
    return t

def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)

def t_error(t):
    print(f"Illegal character '{t.value[0]}'")
    t.lexer.skip(1)

lexer = lex.lex()

# Syntax Analysis
precedence = (
    ('left', 'PLUS', 'DIVIDE'),
)

def p_program(p):
    'program : PROGRAM ID SEMICOLON declarations BEGIN statements END DOT'
    interpret(p[5])  # Pass the parsed statements to the interpreter

def p_declarations(p):
    '''declarations : VAR var_declaration
                    | empty'''

def p_var_declaration(p):
    '''var_declaration : ID COLON type SEMICOLON var_declaration
                       | ID COLON type SEMICOLON'''

def p_type(p):
    '''type : INTEGER
            | REAL'''

def p_statements(p):
    '''statements : statements statement SEMICOLON
                  | statement SEMICOLON'''
    if len(p) == 4:
        p[0] = p[1] + [p[2]]
    else:
        p[0] = [p[1]]

def p_statement(p):
    '''statement : ID ASSIGN expression
                 | WRITELN LPAREN writeln_args RPAREN'''
    if p[1] == 'WRITELN':
        p[0] = ('WRITELN', p[3])
    else:
        p[0] = ('ASSIGN', p[1], p[3])

def p_writeln_args(p):
    '''writeln_args : writeln_args COMMA writeln_arg
                    | writeln_arg'''
    if len(p) == 4:
        p[0] = p[1] + [p[3]]
    else:
        p[0] = [p[1]]

def p_writeln_arg(p):
    '''writeln_arg : STRING
                   | expression'''
    p[0] = p[1]

def p_expression(p):
    '''expression : expression PLUS term
                  | expression DIVIDE term
                  | term'''
    if len(p) == 4:
        p[0] = (p[2], p[1], p[3])
    else:
        p[0] = p[1]

def p_term(p):
    '''term : ID
            | NUMBER'''
    p[0] = p[1]

def p_empty(p):
    'empty :'
    p[0] = []

def p_error(p):
    print(f"Syntax error at '{p.value}'" if p else "Syntax error at EOF")

parser = yacc.yacc()

# Interpreter
variables = {}

def evaluate(tree):
    if isinstance(tree, (int, float)):
        return tree
    if isinstance(tree, str):
        return variables.get(tree, 0)
    if tree[0] == '+':
        return evaluate(tree[1]) + evaluate(tree[2])
    if tree[0] == '/':
        return evaluate(tree[1]) / evaluate(tree[2])
    return 0

def interpret(statements):
    for stmt in statements:
        if stmt[0] == 'ASSIGN':
            variables[stmt[1]] = evaluate(stmt[2])
        elif stmt[0] == 'WRITELN':
            print(" ".join(str(evaluate(arg)) if not isinstance(arg, str) else arg for arg in stmt[1]))

# Run the Program
if __name__ == '__main__':
    import sys
    if len(sys.argv) < 2:
        print("Usage: python Pascal.py <file>")
        sys.exit(1)
    with open(sys.argv[1], 'r') as file:
        source = file.read()
    lexer.input(source)
    parser.parse(source)

Sample Source Code (SumAndAverage.pas)

PROGRAM SUMANDAVERAGE;
VAR num1,num2,num3: integer;
    sum:integer;
    avg:real;
BEGIN
    num1:=10;
    num2:=20;
    num3:=30;
    sum:=num1+num2+num3;
    avg:=sum/3;
    WRITELN('Num1 is ',num1);
    WRITELN('Num2 is ',num2);
    WRITELN('Num3 is ',num3);
    WRITELN('Sum 3 numbers is ',sum);
    WRITELN('Average is ',avg)
END.

Input and the expected Output

python Pascal.py SumAndAverage.pas
Num1 is 10
Num2 is 20
Num3 is 30
Sum 3 numbers is 60
Average is 2.0000000000000000E+001

I've reviewed my token definitions and grammar, but I can't figure out why the parser is having trouble with the comma in the WRITELN statement. Could someone help me figure out what's going wrong?

Upvotes: -1

Views: 66

Answers (1)

Bart Kiers
Bart Kiers

Reputation: 170278

As mentioned in the comments: your p_var_declaration only accepts one ID:

def p_var_declaration(p):
    '''var_declaration : ID COLON type SEMICOLON var_declaration
                       | ID COLON type SEMICOLON'''

change that into:

def p_var_declaration(p):
    '''var_declaration : vars COLON type SEMICOLON var_declaration
                       | vars COLON type SEMICOLON'''

def p_vars(p):
    '''vars : ID
            | vars COMMA ID'''

Also, your last WRITELN('Average is ',avg) is missing a semi-colon at the end.

Upvotes: 0

Related Questions