(f)lex in c and bison/yacc in C++

Question

I am trying to build a mini programming languages like ruby with flex/bison. Flex and bison work fine together if it's all writin in C. The problem starts when I need c++ for building classes for my nonterminals (expr, statements etc.).

mRuby.l:

%option yylineno

%{
#include "absyn.h"
#include "mRuby.tab.h"

int line_nr = 1;
int col_nr = 1; 

/*
col_nr += strlen(atoi(yytext));
*/

%}

identifier  [A-z]([A-z]|[0-9])*
integer  -?([0-9])+(_?[0-9])*
comment  ("#".*"
")
whitespace (" "|"	")+
boolean (true|false)
CR (
)*|(
)*|(
)*


%%
";"                     { return SEMICOLON; }
"undef"                 { return UNDEF; }
"def"                   { return DEF; }
"("                     { return LPAREN; }
")"                     { return RPAREN; }
"end"                   { return END;}
"return"                { return RETURN;}
"if"                    { return IF; }
"then"                  { return THEN; }
"elsif"                 { return ELSIF;}
"else"                  { return ELSE; }
"unless"                { return UNLESS; }
"while"                 { return WHILE; }
"do"                    { return DO; }
"until"                 { return UNTIL; }
"case"                  { return CASE; }
"when"                  { return WHEN; }
","                     { return COMMA; }
"="                     { return ASSIGN; }
"+="                    { return PLUSASSIGN; }
"-="                    { return MINUSASSIGN; }
"*="                    { return MULASSIGN; }
"/="                    { return DIVASSIGN; }
"&&="                   { return ANDASSIGN; }
"||="                   { return ORASSIGN; }
"+"                     { return PLUS; }
"-"                     { return MINUS; }
"*"                     { return MUL; }
"/"                     { return DIV; }
">"                     { return GT; }
">="                    { return GE; }
"<"                     { return LT; }
"<="                    { return LE; }
"=="                    { return EQ; }
"!="                    { return NE; }
"&&"                    { return AND; }
"||"                    { return OR; }
"!"                     { return NOT; }
"
"                    { col_nr = 1; return SEMICOLON; }
{boolean}               { return BOOLEAN; }  
{comment}|{whitespace}  { /* doe niets */ }
{integer}               {

                        return INTEGER; 
                        }
{identifier}            { 
                        char* s = (char*) malloc(yyleng+1);
                        strcpy(s, yytext);
                        
                        return IDENTIFIER;
                        }

.      {
  if (yytext[0] < ' '){ 
    /* non-printable char */
    /*yyerror*/ 
    fprintf(stderr,"illegal character: ^%c",yytext[0] + '@'); 
  }
  else {
    if (yytext[0] > '~') {
      /* non-printable char printed as octal int padded with zeros, eg \012*/
      /*yyerror(*/
      fprintf(stderr,"illegal character: \%03o", (int) yytext[0]);
    }
    else {
      /*dit is een functie verwijder enter om te gebruiken*/
      /*yyerror(*/
      fprintf(stderr,"illegal character: %s",yytext);
    }
  }
  /* lex read exactly one char; the illegal one */
  //fprintf(stderr," at line %d column %d
", line_nr, (col_nr-1));
  fprintf(stderr," at line %d column %d
", yylineno, (col_nr-1));
                   }
%%

/* Function called by (f)lex when EOF is read. If yywrap returns a
   true (non-zero) (f)lex will terminate and continue otherwise.*/
int yywrap(){
  return (1);
}

My bison file:

%{
#include "lexer.h"
#include "absyn.h"
#include 


void yyerror(const char* str);

int main(int argc, char* argv[]){
  int tokenid;
  std::cout << "Hello world! 
";

  //return yyparse();
  yyparse();

  std::cout << "TEST 
";
  return 0;
}

%}

%union {
  int g;
  char* id;
  char* b;
  Stmts stmts;
  Stmt stmt;
  CaseStmt casestmt;
  WhenStmt whenstmt;
  IfStmt ifstmt; 
  ElifStmt elifstmt;
  Expr expr;
  Exprs exprs;
  ArgList arglist;
  ArgLists arglists;
  Ids ids;
  T t;
  Assignop assignop;
  Binop binop;
}
// vul aan met tokendeclaraties

%token
 SEMICOLON UNDEF DEF LPAREN RPAREN END RETURN INTEGER
 IF THEN ELSIF ELSE UNLESS WHILE DO UNTIL CASE WHEN COMMA
 ASSIGN PLUSASSIGN MINUSASSIGN MULASSIGN DIVASSIGN ANDASSIGN ORASSIGN
 PLUS MINUS MUL DIV GT GE LT LE EQ NE AND OR NOT

%token  IDENTIFIER
%token  BOOLEAN

%type  stmts
%type  stmt
%type  casestmt
%type  whenstmt
%type  ifstmt
%type  elifstmt
%type  expr
%type  exprs
%type  arglist
%type  arglists
%type  ids
%type  t 
%type  assignop


%type  binop
%type  PLUS

 // vul aan met voorrangdeclaraties
%nonassoc operation
%nonassoc expression

%nonassoc OR NE EQ LT LE GT GE AND

%left PLUS MINUS
%left TIMES DIVIDES
%right ASSOP
%right UNOT
%right UMINUS
//%defines

%%

// vul aan met producties
program   : compstmt            { std::cout << "program 0"; }
;

compstmt  : stmts              { std::cout << " compstmt 1"; }
          | stmts t            { std::cout << " compstmt 2"; }
;

stmts     : stmt               { std::cout << " stmts 1"; }
          | stmts t stmt       { std::cout << " stmts 2 "; }
;

stmt      : undefstmt     { std::cout << " stmt 1"; }
          | expr          { std::cout << " stmt 2"; }
          | defstmt       { std::cout << " stmt 3"; }
          | returnstmt    { std::cout << "stmt 4"; }
          | ifstmt        { std::cout << "stmt 5"; }
          | whilestmt     { std::cout << " stmt 6"; }
          | untilstmt     { std::cout << " stmt 7"; }
          | unlessstmt    { std::cout << " stmt 8"; }
          | casestmt      { std::cout << " stmt 9"; }
          | error         { std::cout << " error"; }
;

undefstmt : UNDEF IDENTIFIER                                      { std::cout << " undefstmt"; }
;

defstmt   : DEF IDENTIFIER LPAREN arglists RPAREN compstmt END    { std::cout << " defstmt"; }
;

returnstmt : RETURN expr                                          { std::cout << " returnstmt"; }
;

whilestmt : WHILE expr DO compstmt END                            { std::cout << " whilestmt"; }
;

untilstmt : UNTIL expr DO compstmt END                            { std::cout << " untilstmt"; }
;

unlessstmt  : UNLESS expr THEN compstmt END                         { std::cout << " unless 1"; }
            | UNLESS expr THEN compstmt ELSE compstmt END           { std::cout << " unless 2"; }
;

casestmt  : CASE expr WHEN expr THEN compstmt END                         { std::cout << " casestmt "; }
          | CASE expr WHEN expr THEN compstmt ELSE compstmt END           { std::cout << " casestmt "; }
          | CASE expr WHEN expr THEN compstmt whenstmt END                { std::cout << " casestmt "; }
          | CASE expr WHEN expr THEN compstmt whenstmt ELSE compstmt END  { std::cout << " casestmt "; }
;
whenstmt  : WHEN expr THEN compstmt                           { std::cout << " whenstmt "; }
          | whenstmt WHEN expr THEN compstmt                  { std::cout << " whenstmt "; }
;
ifstmt    : IF expr THEN compstmt END                         { std::cout << "ifstmt"; }
          | IF expr THEN compstmt ELSE compstmt END           { std::cout << "ifstmt"; }
          | IF expr THEN compstmt elifstmt END                { std::cout << "ifstmt"; }
          | IF expr THEN compstmt elifstmt ELSE compstmt END  { std::cout << "ifstmt"; }
;

elifstmt  : ELSIF expr THEN compstmt                { std::cout << " elifstmt "; }
          | elifstmt ELSIF expr THEN compstmt       { std::cout << " elifstmt "; }
;

expr      : IDENTIFIER                              { std::cout << " expr 1"; }
          | IDENTIFIER assignop expr %prec ASSOP    { std::cout << " expr 2"; }
          | NOT expr %prec UNOT                     { std::cout << " expr 3"; }
          | BOOLEAN                                 { std::cout << " expr 4"; }
          | MINUS expr %prec UMINUS                 { std::cout << " expr 5"; }
          | IDENTIFIER LPAREN RPAREN                { std::cout << " expr 6"; }
          | IDENTIFIER LPAREN exprs RPAREN          { std::cout << " expr 7"; }
          | expr binop expr                         { std::cout << " expr 8"; }
;

exprs     : expr                      { std::cout << " exprs "; }
          | exprs COMMA expr          { std::cout << " exprs "; }
;

arglists  : arglist           { std::cout << " arglists "; }
          | arglists arglist  { std::cout << " arglists "; }
;

arglist   : IDENTIFIER        { std::cout << " arglist "; }
          | IDENTIFIER ids    { std::cout << " arglist "; }
;

ids       : SEMICOLON IDENTIFIER       { std::cout << " ids "; }
          | SEMICOLON IDENTIFIER ids   { std::cout << " ids "; }
;

t         : SEMICOLON     { std::cout << " t "; }
;

assignop  : ASSIGN        { std::cout << "assop" ; }
          | PLUSASSIGN    { std::cout << "assop" ; }
          | MINUSASSIGN   { std::cout << "assop" ; }
          | MULASSIGN     { std::cout << "assop" ; }
          | DIVASSIGN     { std::cout << "assop" ; }
          | ANDASSIGN     { }
          | ORASSIGN      { }
;

binop     : PLUS  { Binop op = $1; $$ = op;  }
          | MINUS { std::cout << "expr MINUS expr
"; }
          | MUL   { std::cout << "expr MUL expr
"; }
          | DIV   { std::cout << "expr DIV expr
"; }
          | LE    { std::cout << "expr LE expr
"; }
          | LT    { std::cout << "expr LT expr
"; }
          | GE    { std::cout << "expr GE expr
"; }
          | GT    { std::cout << "expr GT expr
"; }
          | EQ    { std::cout << "expr EQ expr
"; }
          | NE    { std::cout << "expr NE expr
"; }
          | AND   { std::cout << "expr AND expr
"; }
          | OR    { std::cout << "expr OR expr
"; }
;

%%

void yyerror (const char *s)
{
  // $$ = new OpExpr($1, $2, $3);
  //std::cout << "%f
",($1+$3));
}

And I try multiple ways of mixing my includes and compiling order. The latest way I try to compile everything is like this:

bison mRuby.yy cp -R mRuby.yy mRuby.y bison -d mRuby.y flex mRuby.l gcc -c lex.yy.c mRuby.tab.c -ll -ly g++ lex.yy.o -c g++ mRuby.tab.cc -o parser

My goal is to replace the c++ print statements with class construction for building a parse tree and interpreter in c++.

Yuval Reshef · Accepted Answer

Bison generates files with suffixes according to the original file's suffix as written in the Bison manual, sec.9.
If you include the header as #include "mRuby.tab.h" then your bison file should be named mRuby.y (if you use c++ in bison though then I recommend using a c++ suffix like .ypp which will produce .cpp and .hpp files).

producing the files with:

flex mRuby.l
bison -d mRuby.y
g++ mRuby.tab.c lex.yy.c -o parser

seems to be working just fine, though it is hard for me to check without the header files with the proper type definitions. Note that both the flex and bison files are compiled as c++ this way which is fine for both flex and bison.

Bison output for this example shows 18 shift/reduce conflicts.

(f)lex in c and bison/yacc in C++

Answers (1)

Related Questions