Reputation: 27
I just added a set of new tokens to my parser, and each of the new ones is reported as undeclared. The first line of tokens were included in the last working build.
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
The error messages I get after running my makefile indicate that none of the new tokens are properly declared, although all of the old ones are still functioning.
cScan.l:44:9: error: ‘STATIC’ undeclared (first use in this function)
static {return STATIC;}
^
cScan.l:44:9: note: each undeclared identifier is reported only once for each function it appears in
cScan.l:45:9: error: ‘BOOL’ undeclared (first use in this function)
bool {return BOOL;}
^
cScan.l:46:9: error: ‘CHAR’ undeclared (first use in this function)
char {return CHAR;}
^
cScan.l:47:10: error: ‘INT’ undeclared (first use in this function)
int { return INT; }
^
cScan.l:48:15: error: expected expression before ‘;’ token
begin { return BEGIN;}
^
cScan.l:49:9: error: ‘END’ undeclared (first use in this function)
end {return END;}
^
cScan.l:50:9: error: ‘IF’ undeclared (first use in this function)
if {return IF;}
^
cScan.l:51:9: error: ‘THEN’ undeclared (first use in this function)
then {return THEN;}
^
cScan.l:52:9: error: ‘ELSE’ undeclared (first use in this function)
else {return ELSE;}
^
cScan.l:53:9: error: ‘WHILE’ undeclared (first use in this function)
while {return WHILE;}
^
cScan.l:54:9: error: ‘DO’ undeclared (first use in this function)
do {return DO;}
^
cScan.l:55:9: error: ‘FOR’ undeclared (first use in this function)
for {return FOR;}
^
cScan.l:56:9: error: ‘TO’ undeclared (first use in this function)
to {return TO;}
^
cScan.l:57:9: error: ‘BY’ undeclared (first use in this function)
by {return BY;}
^
cScan.l:58:9: error: ‘RETURN’ undeclared (first use in this function)
return {return RETURN;}
^
cScan.l:59:9: error: ‘BREAK’ undeclared (first use in this function)
break {return BREAK;}
^
cScan.l:60:9: error: ‘OR’ undeclared (first use in this function)
or {return OR;}
^
cScan.l:61:9: error: ‘AND’ undeclared (first use in this function)
and {return AND;}
^
cScan.l:62:10: error: ‘NOT’ undeclared (first use in this function)
not { return NOT;}
^
cScan.l:64:10: error: ‘DPLUS’ undeclared (first use in this function)
"++" { return DPLUS; }
^
cScan.l:65:10: error: ‘DMINUS’ undeclared (first use in this function)
"--" { return DMINUS; }
^
cScan.l:66:10: error: ‘LASSIGN’ undeclared (first use in this function)
"<-" { return LASSIGN; }
^
cScan.l:67:10: error: ‘PLUSEQ’ undeclared (first use in this function)
"+=" { return PLUSEQ; }
^
cScan.l:68:10: error: ‘MINUSEQ’ undeclared (first use in this function)
"-=" { return MINUSEQ; }
^
cScan.l:69:10: error: ‘TIMEEQ’ undeclared (first use in this function)
"*=" { return TIMEEQ; }
^
cScan.l:70:10: error: ‘DIVEQ’ undeclared (first use in this function)
"/=" { return DIVEQ; }
^
cScan.l:71:10: error: ‘NOTEQ’ undeclared (first use in this function)
"!=" { return NOTEQ; }
Here's the flex file where I return each of the tokens:
%{
/*
* cScan.l
*/
#include "scanType.h"
#include "cScan.tab.h"
%}
%option yylineno
LETTER [A-Za-z]
ID {LETTER}[_A-Za-z0-9]*
NUMCONST [0-9]+
STRINGCONST \"([^\\\"]|\\.)*\"
CHARCONST '\\?.'
BOOLCONST true|false
%%
{BOOLCONST} {
struct TokenData boolToken;
yylval.token = &boolToken;
yylval.token->tokenclass = 5;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
if(yytext[0] == 't') {
yylval.token->nvalue = 1;
} else {
yylval.token->nvalue = 0;
}
return BOOLCONST;
}
static { return STATIC; }
bool { return BOOL; }
char { return CHAR; }
int { return INT; }
begin { return BEGIN;}
end { return END;}
if { return IF;}
then { return THEN;}
else { return ELSE;}
while { return WHILE;}
do { return DO;}
for { return FOR;}
to { return TO;}
by { return BY;}
return { return RETURN;}
break { return BREAK;}
or { return OR; }
and { return AND; }
not { return NOT;}
"++" { return DPLUS; }
"--" { return DMINUS; }
"<-" { return LASSIGN; }
"+=" { return PLUSEQ; }
"-=" { return MINUSEQ; }
"*=" { return TIMEEQ; }
"/=" { return DIVEQ; }
"!=" { return NOTEQ; }
{ID} {
struct TokenData idToken;
yylval.token = &idToken;
yylval.token->tokenclass = 1;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
yylval.token->svalue = yytext;
return IDENT;
}
{NUMCONST} {
struct TokenData numToken;
yylval.token = &numToken;
yylval.token->tokenclass = 2;
yylval.token->linenum = yylineno;
yylval.token->nvalue = atoi(yytext);
yylval.token->tokenstr = yytext;
return NUMCONST;
}
{STRINGCONST} {
struct TokenData stringToken;
yylval.token = &stringToken;
yylval.token->tokenclass = 3;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
yylval.token->svalue = yytext;
yylval.token->nvalue = yyleng-2;
return STRINGCONST;
}
{CHARCONST} {
struct TokenData charToken;
yylval.token = &charToken;
yylval.token->tokenclass = 4;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
yylval.token->svalue = yytext;
return CHARCONST;
}
"="|"<"|">"|"+"|"-"|"*"|"/"|"%"|"["|"]"|"*"|"-"|"?"|"("|")"|";"|","|":" { return yytext[0]; }
[ \t\r] ;
##.*\n ;
\n { ; /*option to add stuff*/ }
. { printf("ERROR(%d): Invalid or misplaced input character: '%c'. Character Ignored.\n", yylineno, yytext[0]); }
%%
/*
* When the end of an input file is encountered, exit with success (1).
*/
int yywrap() {
return 1;
}
The tokens are all listed in the cScan.tab.h file, which is included in cScan.l. Here's their definition.
/* Token type. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
enum yytokentype
{
NUMCONST = 258,
STRINGCONST = 259,
IDENT = 260,
CHARCONST = 261,
BOOLCONST = 262,
BEGIN = 263,
END = 264,
IF = 265,
THEN = 266,
ELSE = 267,
WHILE = 268,
DO = 269,
FOR = 270,
TO = 271,
BY = 272,
RETURN = 273,
BREAK = 274,
OR = 275,
AND = 276,
NOT = 277,
STATIC = 278,
BOOL = 279,
CHAR = 280,
INT = 281,
DPLUS = 282,
DMINUS = 283,
LASSIGN = 284,
PLUSEQ = 285,
MINUSEQ = 286,
TIMEEQ = 287,
DIVEQ = 288,
NOTEQ = 289
};
#endif
This is the make file I'm running. I've deleted each of the generated files and run it again, but that doesn't seem to be the issue.
cc = gcc
ccopts = #-ly
lex = flex
lexopts =
lexgens = lex.yy.c
yacc = bison
yaccopts = -d
yaccgens = cScan.tab.c cScan.tab.h
prj = cScan
$(prj): $(lexgens) $(yaccgens)
$(cc) $(lexgens) $(yaccgens) $(ccopts) -o $(prj)
clean:
rm $(lexgens) $(yaccgens) $(prj)
$(yaccgens): $(prj).y
$(yacc) $(yaccopts) $(prj).y
$(lexgens): $(prj).l $(yaccgens)
$(lex) $(lexopts) $(prj).l
Here's the whole bison file, for completeness.
%{
#include "scanType.h"
#include "treeType.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
void yyerror(char*);
int yylex(void);
extern FILE *yyin;
%}
%define parse.error verbose
%union {
struct TokenData *token;//for terminals, from yylex
struct TreeNode *tree;//for nonterminals, to build the tree
char op;
}
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
%%
program :
declList
;
declList
: declList decl
| decl
;
decl
: varDecl
| funDecl
;
varDecl
: typeSpec varDeclList ';'
;
scopedVarDecl
: STATIC typeSpec varDeclList ';'
| typeSpec varDeclList ';'
;
varDeclList
: varDeclList ',' varDeclInit
| varDeclInit
;
varDeclInit
: varDeclId
| varDeclId ':' simpleExp
;
varDeclId
: IDENT
| IDENT '[' NUMCONST ']'
;
typeSpec
: BOOL
| CHAR
| INT
;
funDecl
: typeSpec IDENT '(' parms ')' compoundStmt
| IDENT '(' parms ')' compoundStmt
;
parms
: parmList
| {/*Epsilon*/}
;
parmList
: parmList ';' parmTypeList
| parmTypeList
;
parmTypeList
: typeSpec parmIdList
;
parmIdList
: parmIdList ',' parmId
| parmId
;
parmId
: IDENT
| IDENT '['']'
;
stmt
: matchStmt
| unmatchStmt
;
matchStmt
: selectStmt_M
| iterStmt_M
| otherStmt
;
unmatchStmt
: selectStmt_U
| iterStmt_U
;
selectStmt_M
: IF simpleExp THEN matchStmt ELSE matchStmt
;
selectStmt_U
: IF simpleExp THEN stmt
| IF simpleExp THEN matchStmt ELSE unmatchStmt
;
iterStmt_U
: WHILE simpleExp DO unmatchStmt
| FOR IDENT LASSIGN iterRange DO unmatchStmt
;
iterStmt_M
: WHILE simpleExp DO matchStmt
| FOR IDENT LASSIGN iterRange DO matchStmt
;
iterRange
: simpleExp TO simpleExp iterRangeStmtPr
;
iterRangeStmtPr
: BY simpleExp
| {/*Addition to stop ambiguity*/}
;
otherStmt
: expStmt
| returnStmt
| breakStmt
| compoundStmt
;
compoundStmt
: BEGIN localDecls stmtList END
;
localDecls
: localDecls scopedVarDecl
| {/*Epsilon*/}
;
stmtList
: stmtList stmt
| {/*Epsilon*/}
;
expStmt
: exp ';'
| ';'
;
returnStmt
: RETURN ';'
| RETURN exp ';'
;
breakStmt
: BREAK ';'
;
exp
: mutExp
| simpleExp
;
mutExp
: mutable assignop exp
| mutable DPLUS
| mutable DMINUS
;
assignop
: LASSIGN | PLUSEQ | MINUSEQ | TIMEEQ | DIVEQ
;
simpleExp
: simpleExp OR andExp
| andExp
;
andExp
: andExp AND unaryRelExp
| unaryRelExp
;
unaryRelExp
: NOT unaryRelExp
| relExp
;
relExp
: sumExp relop sumExp
| sumExp
;
relop
: '<' | '<' '=' | '>' | '>' '=' | '=' | NOTEQ
;
sumExp
: sumExp sumop mulExp
| mulExp
;
sumop
: '+' | '-'
;
mulExp
: mulExp mulop unaryExp
| unaryExp
;
mulop
: '*' | '/' | '%'
;
unaryExp
: unaryop unaryExp
| factor
;
unaryop
: '-' | '*' | '?'
;
factor
: mutable
| immutable
;
mutable
: IDENT
| IDENT '[' exp ']'
;
immutable
: '(' exp ')'
| call
| constant
;
call
: IDENT '(' args ')'
;
args
: argList
| {/*Epsilon*/}
;
argList
: argList ',' exp
| exp
;
constant
: NUMCONST | STRINGCONST | CHARCONST | BOOLCONST
;
%%
int main(int argc, char *argv[])
{
FILE * fp;
if(argc > 1) {
fp = fopen (argv[1], "r");
yyin = fp;
} else {
yyin = stdin;
}
yyparse();
return 0;
}
void yyerror(char* s)
{
printf("yyerror: \"%s\"\n", s);
}
Edit: ScanType.h
#ifndef TOKNDATA_H
#define TOKNDATA_H __DATE__" "__TIME__
struct TokenData {
int tokenclass; // token class
int linenum; // line where found
char *tokenstr; // what string was actually read
char cvalue; // any character value
int nvalue; // any numeric value or Boolean value
char *svalue; // any string value e.g. an id
} * useToken;
#endif /*TOKNDATA_H*/
Edit 2:
Swapping the position of the tokens in the bison file meant that the old tokens also were undeclared.
After changing the order like so
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
I got the following error log.
cScan.l:44:10: error: ‘STATIC’ undeclared (first use in this function)
static { return STATIC; }
^
cScan.l:45:10: error: ‘BOOL’ undeclared (first use in this function)
bool { return BOOL; }
^
cScan.l:46:10: error: ‘CHAR’ undeclared (first use in this function)
char { return CHAR; }
^
cScan.l:47:10: error: ‘INT’ undeclared (first use in this function)
int { return INT; }
^
cScan.l:48:15: error: expected expression before ‘;’ token
begin { return BEGIN;}
^
cScan.l:49:10: error: ‘END’ undeclared (first use in this function)
end { return END;}
^
cScan.l:50:10: error: ‘IF’ undeclared (first use in this function)
if { return IF;}
^
cScan.l:51:10: error: ‘THEN’ undeclared (first use in this function)
then { return THEN;}
^
cScan.l:52:10: error: ‘ELSE’ undeclared (first use in this function)
else { return ELSE;}
^
cScan.l:53:10: error: ‘WHILE’ undeclared (first use in this function)
while { return WHILE;}
^
cScan.l:54:10: error: ‘DO’ undeclared (first use in this function)
do { return DO;}
^
cScan.l:55:10: error: ‘FOR’ undeclared (first use in this function)
for { return FOR;}
^
cScan.l:56:10: error: ‘TO’ undeclared (first use in this function)
to { return TO;}
^
cScan.l:57:10: error: ‘BY’ undeclared (first use in this function)
by { return BY;}
^
cScan.l:58:10: error: ‘RETURN’ undeclared (first use in this function)
return { return RETURN;}
^
cScan.l:59:10: error: ‘BREAK’ undeclared (first use in this function)
break { return BREAK;}
^
cScan.l:60:10: error: ‘OR’ undeclared (first use in this function)
or { return OR; }
^
cScan.l:61:10: error: ‘AND’ undeclared (first use in this function)
and { return AND; }
^
cScan.l:62:10: error: ‘NOT’ undeclared (first use in this function)
not { return NOT;}
^
cScan.l:64:10: error: ‘DPLUS’ undeclared (first use in this function)
"++" { return DPLUS; }
^
cScan.l:65:10: error: ‘DMINUS’ undeclared (first use in this function)
"--" { return DMINUS; }
^
cScan.l:66:10: error: ‘LASSIGN’ undeclared (first use in this function)
"<-" { return LASSIGN; }
^
cScan.l:67:10: error: ‘PLUSEQ’ undeclared (first use in this function)
"+=" { return PLUSEQ; }
^
cScan.l:68:10: error: ‘MINUSEQ’ undeclared (first use in this function)
"-=" { return MINUSEQ; }
^
cScan.l:69:10: error: ‘TIMEEQ’ undeclared (first use in this function)
"*=" { return TIMEEQ; }
^
cScan.l:70:10: error: ‘DIVEQ’ undeclared (first use in this function)
"/=" { return DIVEQ; }
^
cScan.l:71:10: error: ‘NOTEQ’ undeclared (first use in this function)
"!=" { return NOTEQ; }
^
cScan.l:80:12: error: ‘IDENT’ undeclared (first use in this function)
return IDENT;
^
cScan.l:90:12: error: ‘NUMCONST’ undeclared (first use in this function)
return NUMCONST;
^
cScan.l:101:12: error: ‘STRINGCONST’ undeclared (first use in this function)
return STRINGCONST;
^
cScan.l:112:12: error: ‘CHARCONST’ undeclared (first use in this function)
return CHARCONST;
Undoing this change returned the old tokens to functionality.
Upvotes: 1
Views: 748
Reputation: 241671
You can't use BEGIN
as a token name, because token names are used as C values, and BEGIN
is a macro defined by flex (you use it to switch start states).
That causes a syntax error in the enum
declaration which you quote in your answer, with the result that all the enum members after BEGIN
are undeclared. But the most important error message was the one referring to the syntax error in the enum declaration itself:
lex.yy.c:117:15: error: expected identifier before ‘(’ token
#define BEGIN (yy_start) = 1 + 2 *
^
cScan.tab.h:62:5: note: in expansion of macro ‘BEGIN’
BEGIN = 263, /* BEGIN */
^~~~~
which for some reason you omitted from your question.
The same would be true for any macro, including ones in system library headers, if you use any of those. I generally prefer to prefix my token names with something like T_
, and then use bison aliases to make the grammar look prettier:
%token T_BEGIN "begin"
T_END "end"
// ...
%%
// ...
compoundStmt
: "begin" localDecls stmtList "end"
By the way, your struct TokenData
will lead to undefined behaviour if you ever actually use the data (which really should not be necessary for anything. Bison has lots of debugging mechanisms which don't require much effort on your part.)
As an example, consider
{BOOLCONST} {
struct TokenData boolToken;
yylval.token = &boolToken;
yylval.token->tokenclass = 5;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
if(yytext[0] == 't') {
yylval.token->nvalue = 1;
} else {
yylval.token->nvalue = 0;
}
return BOOLCONST;
}
boolToken
is an automatic ("local") variable, so its lifetime ends when the return BOOLCONST
executes. The address stored in yylval
(yylval.token = &boolToken;
) is a dangling pointer, and the contents of whatever yylval.token
points to are completely unpredictable as soon as yylex
returns. Moreover, if the contents of that memory region happen to still be intact, one of the other pointers you store:
yylval.token->tokenstr = yytext;
is a pointer into Flex's internal input buffer, whose contents are modified by yylex
the next time it is called (which almost certainly happens before the semantic value of the BOOLCONST
can be used, since the bison-generated parser usually reads one token ahead.)
Upvotes: 4