quartaela
quartaela

Reputation: 2757

can't find a simple error when parsing with YACC

i'm trying to make a very simple YACC parser on Pascal language which just includes integer declarations, some basic expressions and if-else statements. however, i cant find the error for hours and i'm going to be crazy soon. terminal says Error at line:0 but it is impossible!. i use flex and byacc for parser.i will be very glad if you can help me. this is my lex file as you can see;

%{
#include <stdio.h>
#include <string.h>
#include "y.tab.h"
extern int yylval;
int linenum=0;
%}

digit   [0-9]
letter  [A-Za-z]

%%
if              return IF;
then                return THEN;
else                return ELSE;
for             return FOR;
while               return WHILE;
PROGRAM             return PROGRAM_SYM;
BEGIN               return BEGIN_SYM;
VAR             return VAR_SYM;
END             return END_SYM;
INTEGER             return INTEGER_SYM;
{letter}({letter}|{digit})* return identifier;
[0-9]+              return NUMBER;
[\<][\=]            return CON_LE;
[\>][\=]            return CON_GE;
[\=]                return CON_EQ;          
[\:][\=]            return ASSIGNOP;
;               return semiColon;
,               return comma;
\n              {linenum++;}
.               return (int) yytext[0];
%%

and this is my Yacc file

%{
#include <stdio.h>
#include <string.h>
#include "y.tab.h"
extern FILE *yyin;
extern int linenum;
%}

%token PROGRAM_SYM VAR_SYM BEGIN_SYM END_SYM INTEGER_SYM NUMBER
%token identifier INTEGER ASSIGNOP semiColon comma THEN
%token IF ELSE FOR WHILE
%token CON_EQ CON_LE CON_GE GE LE

%left '*' '/'
%left '+' '-'

%start program

%%

program: PROGRAM_SYM identifier semiColon VAR_SYM dec_block BEGIN_SYM statement_list END_SYM '.'
     ;

dec_block:
        dec_list semiColon;

dec_list:
        dec_list dec
        |
        dec
        ;

dec: 
        int_dec_list
        ;

int_dec_list:   
        int_dec_list int_dec ':' type
        |
        int_dec ':' type
        ;

int_dec:
        int_dec comma identifier
        |
        identifier
        ;

type:
    INTEGER_SYM
    ;

statement_list:
        statement_list statement
        |
        statement
        ;

statement:
        assignment_list
        |
        expression_list
        |
        selection_list
        ;

assignment_list:
        assignment_list assignment
        |
        assignment      
        ;

assignment:

        identifier ASSIGNOP expression_list
        ;

expression_list:
        expression_list expression semiColon
        |
        expression semiColon
        ;


expression:
        '(' expression ')'
        |
        expression '*' expression
        |
        expression '/' expression
        |
        expression '+' expression
        |
        expression '-' expression
        |
        factor
        ;

factor:     
        identifier
        |
        NUMBER
        ;


selection_list:
        selection_list selection
        |
        selection
        ;

selection:
        IF '(' logical_expression ')' THEN statement_list ELSE statement_list
        ;


logical_expression:
        logical_expression '=' expression
        |
        logical_expression '>' expression
        |
        logical_expression '<' expression
        ;


%%
void yyerror(char *s){
    fprintf(stderr,"Error at line: %d\n",linenum);
}
int yywrap(){
    return 1;
}
int main(int argc, char *argv[])
{
    /* Call the lexer, then quit. */
    yyin=fopen(argv[1],"r");
    yyparse();
    fclose(yyin);
    return 0;
}

and finally i take an error at the first line when i give the input;

PROGRAM myprogram;

VAR

i:INTEGER;

i3:INTEGER;

j:INTEGER;

BEGIN

i := 3;

j := 5;

i3 := i+j*2;

i := j*20;

if(i>j)

then i3 := i+50+(45*i+(40*j));

else i3 := i+50+(45*i+(40*j))+i+50+(45*i+(30*j));

END.

Upvotes: 1

Views: 1470

Answers (2)

Chris Dodd
Chris Dodd

Reputation: 126488

For debugging grammars, YYDEBUG is your friend. Either stick #define YYDEBUG 1 in the %{..%} in the top of your .y file, or compile with -DYYDEBUG, and stick a yydebug = 1; in main before calling yyparse and you'll get a slew of info about what tokens the parser is seeing and what it is doing with them....

Upvotes: 1

Jonathan Leffler
Jonathan Leffler

Reputation: 754920

Your lexical analyzer returns blanks and tabs as tokens, but the grammar doesn't recognize them.

Add a parser rule:

[ \t\r]    { }

This gets you to line 6 instead of line 0 before you run into an error. You get that error because you don't allow semicolons between declarations:

dec_block:
        dec_list semiColon;

dec_list:
        dec_list dec
        |
        dec
        ;

dec:
        int_dec_list
        ;

That should probably be:

dec_block:
        dec_block dec
        |
        dec
        ;

dec:
        int_dec_list semiColon
        ;

Doing this gets you to line 14 in the input.

Incidentally, one of the first things I did was to ensure that the lexical analyzer tells me what it is doing, by modifying the rules like this:

if              { printf("IF\n"); return IF; }

In long term code, I'd make that diagnostic output selectable at run-time.


You have a general problem with where you expect semicolons. It is also not clear that you should allow an expression_list in the rule for statement (or, maybe, 'not yet' — that might be appropriate when you have function calls, but allowing 3 + 2 / 4 as a 'statement' is not very helpful).


This grammar gets to the end of the input:

%{
#include <stdio.h>
#include <string.h>
#include "y.tab.h"
extern FILE *yyin;
extern int linenum;
%}

%token PROGRAM_SYM VAR_SYM BEGIN_SYM END_SYM INTEGER_SYM NUMBER
%token identifier INTEGER ASSIGNOP semiColon comma THEN
%token IF ELSE FOR WHILE
%token CON_EQ CON_LE CON_GE GE LE

%left '*' '/'
%left '+' '-'

%start program

%%

program: PROGRAM_SYM identifier semiColon VAR_SYM dec_block BEGIN_SYM statement_list END_SYM '.'
     ;

dec_block:
        dec_block dec
        |
        dec
        ;

dec:
        int_dec_list semiColon
        ;

int_dec_list:
        int_dec_list int_dec ':' type
        |
        int_dec ':' type
        ;

int_dec:
        int_dec comma identifier
        |
        identifier
        ;

type:
    INTEGER_SYM
    ;

statement_list:
        statement_list statement
        |
        statement
        ;

statement:
        assignment
        |
        selection
        ;

assignment:
        identifier ASSIGNOP expression semiColon
        ;

expression:
        '(' expression ')'
        |
        expression '*' expression
        |
        expression '/' expression
        |
        expression '+' expression
        |
        expression '-' expression
        |
        factor
        ;

factor:
        identifier
        |
        NUMBER
        ;

selection:
        IF '(' logical_expression ')' THEN statement_list ELSE statement_list
        ;

logical_expression:
        expression '=' expression
        |
        expression '>' expression
        |
        expression '<' expression
        ;

%%
void yyerror(char *s){
    fprintf(stderr,"Error at line: %d\n",linenum);
}
int yywrap(){
    return 1;
}
int main(int argc, char *argv[])
{
    /* Call the lexer, then quit. */
    yyin=fopen(argv[1],"r");
    yyparse();
    fclose(yyin);
    return 0;
}

Key changes include removing assignment_list and expression_list, and modifying logical_expression so that the two sides of the expansion are expression, rather than the LHS being logical_expression (which then never had a primitive definition, leading to the problems with warnings).

There are still issues to resolve; the expression_list in the selection should be more restrictive to accurately reflect the grammar of Pascal. (You need a block where that could be a single statement or a BEGIN, statement list, END.)

Upvotes: 0

Related Questions