Reputation: 23
I have the following code below and I am verifying matching types. For example relational operations are only being performed using Boolean types. I have gotten most of that. I understand that $$ is the result of the operation and $n would be right hand. But I am confused when I apply it to the function header, I need to make sure that the specified return type is being returned. Based on my grammar below the return type would $5 but the actual value being returned should be $$. My question am I applying it to the right part of the grammar and is my logic correct on how to validate that the function is returning the right type?
%{
#include <string>
#include <vector>
#include <map>
using namespace std;
#include "types.h"
#include "listing.h"
#include "symbols.h"
int yylex();
void yyerror(const char* message);
Symbols<Types> symbols;
vector<Types> case_statements;
%}
%define parse.error verbose
%union
{
CharPtr iden;
Types type;
}
%token <iden> IDENTIFIER
%token <type>INT_LITERAL REAL_LITERAL BOOL_LITERAL NOTOP CASE CASES TRUE FALSE ELSE ENDIF IF
%token ADDOP MULOP RELOP ANDOP EXPOP OROP REMOP
%token BEGIN_ BOOLEAN END ENDREDUCE INTEGER IS FUNCTION REDUCE RETURNS ENDCASE OTHERS REAL THEN WHEN ARROW
%type <type> type function_header statement statement_ reductions expression binary exponent unary relation term factor primary case cases
%%
function:
function_header optional_variable body;
function_header:
FUNCTION IDENTIFIER parameters RETURNS type ';' {checkAssignment($$,$5, "Narrowing Function Returns");}|
FUNCTION IDENTIFIER RETURNS type ';' {checkAssignment($$,$4, "Narrowing Function Returns");}|
error ';' ;
optional_variable:
optional_variable variable |
error ';' |
;
variable:
IDENTIFIER ':' type IS statement_
{checkAssignment($3, $5, "Variable Initialization");
{if (symbols.find($1, $3)) appendError(DUPLICATE_IDENTIFIER, $1);
symbols.insert($1, $3);} };
parameters:
parameter optional_parameter;
optional_parameter:
optional_parameter ',' parameter|
;
parameter:
IDENTIFIER ':' type {symbols.insert($1, $3);}
type:
INTEGER {$$ = INT_TYPE;} |
REAL {$$ = REAL_TYPE;} |
BOOLEAN {$$ = BOOL_TYPE;} ;
body:
BEGIN_ statement_ END ';' ;
statement_:
statement ';' |
error ';' {$$ = MISMATCH;} ;
statement:
expression |
REDUCE operator reductions ENDREDUCE {$$ = $3;} ; |
IF expression THEN statement_ ELSE statement_ ENDIF {$$ = checkIfThen($$, $4, $6);} |
CASE expression IS cases OTHERS ARROW statement_ ENDCASE {$$ =checkExpression($2); case_statements.push_back($7);
int size = case_statements.size();
if (!case_statements.empty()) {
int org;
for (int i = 0; i < size; i++){
for (int j = 0; j < case_statements.size(); j++) {
if (org == j) {
$$ = checkCases(case_statements[i], case_statements[j]);
}
}
org = i;
}
case_statements.clear();
}
} ;
operator:
ADDOP |
RELOP |
EXPOP |
MULOP ;
cases:
| cases case {$$ = $2;} ;
case:
WHEN INT_LITERAL ARROW statement_ { case_statements.push_back($4);} ;
reductions:
reductions statement_ {$$ = checkArithmetic($1, $2);} |
{$$ = INT_TYPE;} ;
expression:
expression OROP binary {$$ = checkLogical($1, $3);} |
binary ;
binary:
binary ANDOP relation {$$ = checkLogical($1, $3);} |
relation
relation:
relation RELOP term {$$ = checkRelational($1, $3);}|
term ;
term:
term ADDOP factor {$$ = checkArithmetic($1, $3);} |
factor ;
factor:
factor MULOP primary {$$ = checkArithmetic($1, $3);} |
factor REMOP exponent {$$ = checkInteger($1, $3);} |
exponent ;
exponent:
unary |
unary EXPOP exponent {$$ = checkArithmetic($1,$3);};
unary:
NOTOP primary {$$ = $2;}|
primary;
primary:
'(' expression ')' {$$ = $2;} |
INT_LITERAL |
REAL_LITERAL |
IDENTIFIER {if (!symbols.find($1, $$)) appendError(UNDECLARED, $1);} ;
%%
void yyerror(const char* message)
{
appendError(SYNTAX, message);
}
int main(int argc, char *argv[])
{
firstLine();
yyparse();
lastLine();
return 0;
}
Edit: Updated the code based on the information below
function:
function_header optional_variable body {checkAssignment($1,$3, "Narrowing Function Return");};;
function_header:
FUNCTION IDENTIFIER parameters RETURNS type ';' {$$ = $5;} ;|
FUNCTION IDENTIFIER RETURNS type ';' {$$ = $4;} ;|
error ';' {$$ = MISMATCH;};
optional_variable:
optional_variable variable |
error ';' |
;
variable:
IDENTIFIER ':' type IS statement_
{checkAssignment($3, $5, "Variable Initialization");
{if (symbols.find($1, $3)) appendError(DUPLICATE_IDENTIFIER, $1);
symbols.insert($1, $3);} }
parameters:
parameter optional_parameter;
optional_parameter:
optional_parameter ',' parameter|
;
parameter:
IDENTIFIER ':' type {symbols.insert($1, $3);}
type:
INTEGER {$$ = INT_TYPE;} |
REAL {$$ = REAL_TYPE;} |
BOOLEAN {$$ = BOOL_TYPE;} ;
body:
BEGIN_ statement_ END ';' {$$ = $2;}
statement_:
statement ';' {$$ = $1;}|
error ';' {$$ = MISMATCH;} ;
statement:
expression {$$ = $1;}|
REDUCE operator reductions ENDREDUCE {$$ = $3;} ; |
IF expression THEN statement_ ELSE statement_ ENDIF {$$ = checkIfThen($1, $4, $6);} |
CASE expression IS cases OTHERS ARROW statement_ ENDCASE {$$ =checkCaseExpression($2);case_statements.push_back($7);
int size = case_statements.size();
if (!case_statements.empty()) {
int org;
for (int i = 0; i < size; i++){
for (int j = 0; j < case_statements.size(); j++) {
if (org == j) {
$$ = checkCases(case_statements[i], case_statements[j]);
}
}
org = i;
}
case_statements.clear();
};} ;
operator:
ADDOP |
RELOP |
EXPOP |
MULOP ;
cases:
%empty {$$ = EMPTY; }|
cases case ;
case:
WHEN INT_LITERAL ARROW statement_ { case_statements.push_back($4);}|
%empty {$$ = EMPTY;} ;
Checker function
void checkAssignment(Types lValue, Types rValue, string message)
{
if (lValue != MISMATCH && rValue != MISMATCH && lValue != rValue)
appendError(GENERAL_SEMANTIC, "Type Mismatch on " + message);
}
Upvotes: 0
Views: 705
Reputation: 241771
Every grammar symbol (token or non-terminal) has an associated "semantic" value. The word "semantic" is used to indicate that these values are not syntactic; that is, they are not part of the parse. The bison/yacc parser does not compute these values for you, since it is only concerned with parsing the input. However, it does arrange for two things:
It's up to you to write semantic actions which compute the semantic values for those non-terminals whose semantic values are needed in the productions which use them. You do this by *assigning to" the special symbol $$
, which the parser will use as the semantic value of the non-terminal whose production has been recognised
In a semantic action, you can refer to the semantic values of the grammar symbols on the right-hand side of the production, using the special symbols $1
, $2
, etc. The number is the index of the symbol in the right-hand side. If that symbol is a token, its value must have been set by the lexical scanner, by being placed into yylval
. If the referenced symbol is a non-terminal, the value must be set by every production for that non-terminal (whose actions must assign some value to $$
.
There's a chapter in the Bison manual about Language semantics which goes into a lot more details, with examples. (It might help to read the introductory material in that manual first, if you have not already done so.)
For what it's worth, I would suggest not trying to do type-checking during the parse. You'll probably find it easier to separate concerns, by constructing a semantic tree ("AST") during the parse, and then doing separate passes over that tree to complete your analysis.
Upvotes: 1