ReminiscientYeti
ReminiscientYeti

Reputation: 271

How to write yacc grammar rules to identify function definitions vs function calls?

I have started learning about YACC, and I have executed a few examples of simple toy programs. But I have never seen a practical example that demonstrates how to build a compiler that identifies and implements function definitions and function calls, array implementation and so on, nor has it been easy to find an example using Google search. Can someone please provide one example of how to generate the tree using YACC? C or C++ is fine.

Thanks in advance!

Upvotes: 2

Views: 5948

Answers (1)

Niklas Rosencrantz
Niklas Rosencrantz

Reputation: 26652

Let's parse this code with yacc.

file test contains valid C code that we want to parse.

int main (int c, int b) {
    int a;
    while ( 1 ) {

    int d;
}
}

A lex file c.l

alpha [a-zA-Z]
digit [0-9]

%%
[ \t]       ;
[ \n]   { yylineno = yylineno + 1;}
int return INT;
float return FLOAT;
char return CHAR;
void return VOID;
double return DOUBLE;
for     return FOR;
while   return WHILE;
if  return IF;
else    return ELSE;
printf  return PRINTF;
struct  return STRUCT;
^"#include ".+ ;
{digit}+       return NUM;
{alpha}({alpha}|{digit})* return ID;
"<="    return LE;
">="    return GE;
"=="    return EQ;
"!="    return NE;
">" return GT;
"<" return LT;
"."     return DOT;
\/\/.* ;
\/\*(.*\n)*.*\*\/ ;
.       return yytext[0];
%%

file c.y for input to YACC:

%{
#include <stdio.h>
#include <stdlib.h>

extern FILE *fp;

%}

%token INT FLOAT CHAR DOUBLE VOID
%token FOR WHILE
%token IF ELSE PRINTF
%token STRUCT
%token NUM ID
%token INCLUDE
%token DOT

%right '='
%left AND OR
%left '<' '>' LE GE EQ NE LT GT
%%

start:  Function
    | Declaration
    ;

/* Declaration block */
Declaration: Type Assignment ';'
    | Assignment ';'
    | FunctionCall ';'
    | ArrayUsage ';'
    | Type ArrayUsage ';'
    | StructStmt ';'
    | error
    ;

/* Assignment block */
Assignment: ID '=' Assignment
    | ID '=' FunctionCall
    | ID '=' ArrayUsage
    | ArrayUsage '=' Assignment
    | ID ',' Assignment
    | NUM ',' Assignment
    | ID '+' Assignment
    | ID '-' Assignment
    | ID '*' Assignment
    | ID '/' Assignment
    | NUM '+' Assignment
    | NUM '-' Assignment
    | NUM '*' Assignment
    | NUM '/' Assignment
    | '\'' Assignment '\''
    | '(' Assignment ')'
    | '-' '(' Assignment ')'
    | '-' NUM
    | '-' ID
    |   NUM
    |   ID
    ;

/* Function Call Block */
FunctionCall : ID'('')'
    | ID'('Assignment')'
    ;

/* Array Usage */
ArrayUsage : ID'['Assignment']'
    ;

/* Function block */
Function: Type ID '(' ArgListOpt ')' CompoundStmt
    ;
ArgListOpt: ArgList
    |
    ;
ArgList:  ArgList ',' Arg
    | Arg
    ;
Arg:    Type ID
    ;
CompoundStmt:   '{' StmtList '}'
    ;
StmtList:   StmtList Stmt
    |
    ;
Stmt:   WhileStmt
    | Declaration
    | ForStmt
    | IfStmt
    | PrintFunc
    | ';'
    ;

/* Type Identifier block */
Type:   INT
    | FLOAT
    | CHAR
    | DOUBLE
    | VOID
    ;

/* Loop Blocks */
WhileStmt: WHILE '(' Expr ')' Stmt
    | WHILE '(' Expr ')' CompoundStmt
    ;

/* For Block */
ForStmt: FOR '(' Expr ';' Expr ';' Expr ')' Stmt
       | FOR '(' Expr ';' Expr ';' Expr ')' CompoundStmt
       | FOR '(' Expr ')' Stmt
       | FOR '(' Expr ')' CompoundStmt
    ;

/* IfStmt Block */
IfStmt : IF '(' Expr ')'
        Stmt
    ;

/* Struct Statement */
StructStmt : STRUCT ID '{' Type Assignment '}'
    ;

/* Print Function */
PrintFunc : PRINTF '(' Expr ')' ';'
    ;

/*Expression Block*/
Expr:
    | Expr LE Expr
    | Expr GE Expr
    | Expr NE Expr
    | Expr EQ Expr
    | Expr GT Expr
    | Expr LT Expr
    | Assignment
    | ArrayUsage
    ;
%%
#include"lex.yy.c"
#include<ctype.h>
int count=0;

int main(int argc, char *argv[])
{
    yyin = fopen(argv[1], "r");

   if(!yyparse())
        printf("\nParsing complete\n");
    else
        printf("\nParsing failed\n");

    fclose(yyin);
    return 0;
}

yyerror(char *s) {
    printf("%d : %s %s\n", yylineno, s, yytext );
}

A Makefile to put it together. I use and but the example will also work with and .

miniC:  c.l c.y
    bison c.y
    flex c.l
    gcc c.tab.c -ll -ly

Compile and parse the test code:

$ make
bison c.y
flex c.l
gcc c.tab.c -ll -ly
c.tab.c: In function ‘yyparse’:
c.tab.c:1273:16: warning: implicit declaration of function ‘yylex’ [-Wimplicit-function-declaration]
       yychar = yylex ();
                ^
c.tab.c:1402:7: warning: implicit declaration of function ‘yyerror’ [-Wimplicit-function-declaration]
       yyerror (YY_("syntax error"));
       ^
c.y: At top level:
c.y:155:1: warning: return type defaults to ‘int’ [-Wimplicit-int]
 yyerror(char *s) {
 ^
$ ls
a.out  c.l  CMakeLists.txt  c.tab.c  c.y  lex.yy.c  Makefile  README.md  test
$ ./a.out test

Parsing complete

For reading resources I can recommend the books Modern Compiler Implementation in C by Andrew Appel and the flex/bison book by John Levine.

Upvotes: 4

Related Questions