Tasha O
Tasha O

Reputation: 23

Bison parsing semantic values

I have the following code below and I am verifying matching types. For example relational operations are only being performed using Boolean types. I have gotten most of that. I understand that $$ is the result of the operation and $n would be right hand. But I am confused when I apply it to the function header, I need to make sure that the specified return type is being returned. Based on my grammar below the return type would $5 but the actual value being returned should be $$. My question am I applying it to the right part of the grammar and is my logic correct on how to validate that the function is returning the right type?

%{

#include <string>
#include <vector>
#include <map>

using namespace std;

#include "types.h"
#include "listing.h"
#include "symbols.h"

int yylex();
void yyerror(const char* message);

Symbols<Types> symbols;
vector<Types> case_statements;
%}

%define parse.error verbose

%union
{
    CharPtr iden;
    Types type;
}

%token <iden> IDENTIFIER
%token <type>INT_LITERAL REAL_LITERAL BOOL_LITERAL NOTOP CASE CASES TRUE FALSE ELSE ENDIF IF

%token ADDOP MULOP RELOP ANDOP EXPOP OROP REMOP
%token BEGIN_ BOOLEAN END ENDREDUCE INTEGER IS FUNCTION REDUCE RETURNS ENDCASE OTHERS REAL THEN WHEN ARROW


%type <type> type function_header statement statement_ reductions expression binary exponent unary relation term factor primary case cases

%%

function:   
    function_header optional_variable body;
    
function_header:    
    FUNCTION IDENTIFIER parameters RETURNS type ';' {checkAssignment($$,$5, "Narrowing Function Returns");}|
    FUNCTION IDENTIFIER RETURNS type ';' {checkAssignment($$,$4, "Narrowing Function Returns");}|
    error ';' ;
    

optional_variable:
    optional_variable variable |
    error ';' |
    ;

variable:   
    IDENTIFIER ':' type IS statement_ 
        {checkAssignment($3, $5, "Variable Initialization");
        {if (symbols.find($1, $3)) appendError(DUPLICATE_IDENTIFIER, $1);
        symbols.insert($1, $3);} };

parameters:
    parameter optional_parameter;
    
optional_parameter:
    optional_parameter ',' parameter|
    ;
    
parameter:
    IDENTIFIER ':' type {symbols.insert($1, $3);}

type:
    INTEGER {$$ = INT_TYPE;} |
    REAL {$$ = REAL_TYPE;} |
    BOOLEAN {$$ = BOOL_TYPE;} ;

body:
    BEGIN_ statement_ END ';' ;
    
statement_:
    statement ';' |
    error ';' {$$ = MISMATCH;} ;
    
statement:
    expression |
    REDUCE operator reductions ENDREDUCE {$$ = $3;} ; |
    IF expression THEN statement_ ELSE statement_ ENDIF {$$ = checkIfThen($$, $4, $6);} |
    CASE expression IS cases OTHERS ARROW statement_ ENDCASE {$$ =checkExpression($2); case_statements.push_back($7);
    int size = case_statements.size();
    if (!case_statements.empty()) {
        int org;
        for (int i = 0; i < size; i++){
            for (int j = 0; j < case_statements.size(); j++) {
                if (org == j) {
                    $$ = checkCases(case_statements[i], case_statements[j]);
                }
            }
            org = i;
        }
    case_statements.clear();
    }
    } ;

operator:
    ADDOP |
    RELOP |
    EXPOP |
    MULOP ;

cases:
    | cases case {$$ = $2;} ;

case:
    WHEN INT_LITERAL ARROW statement_ { case_statements.push_back($4);} ;
    

reductions:
    reductions statement_ {$$ = checkArithmetic($1, $2);} |
    {$$ = INT_TYPE;} ;

expression:
    expression OROP binary {$$ = checkLogical($1, $3);} |
    binary ;

binary:
    binary ANDOP relation {$$ = checkLogical($1, $3);} |
    relation

relation:
    relation RELOP term {$$ = checkRelational($1, $3);}|
    term ;

term:
    term ADDOP factor {$$ = checkArithmetic($1, $3);} |
    factor ;
      
factor:
    factor MULOP primary  {$$ = checkArithmetic($1, $3);} |
    factor REMOP exponent {$$ = checkInteger($1, $3);} |
    exponent ;

exponent:
    unary |
    unary EXPOP exponent {$$ = checkArithmetic($1,$3);};

unary:
    NOTOP primary {$$ = $2;}|
    primary;

primary:
    '(' expression ')' {$$ = $2;} |
    INT_LITERAL |
    REAL_LITERAL |
    IDENTIFIER {if (!symbols.find($1, $$)) appendError(UNDECLARED, $1);} ;
    
%%

void yyerror(const char* message)
{
    appendError(SYNTAX, message);
}

int main(int argc, char *argv[])    
{
    firstLine();
    yyparse();
    lastLine();
    return 0;
} 

Edit: Updated the code based on the information below

function:   
    function_header optional_variable body {checkAssignment($1,$3, "Narrowing Function Return");};;
    
function_header:    
    FUNCTION IDENTIFIER parameters RETURNS type ';' {$$ = $5;} ;|
    FUNCTION IDENTIFIER RETURNS type ';' {$$ = $4;} ;|
    error ';' {$$ = MISMATCH;};
    

optional_variable:
    optional_variable variable |
    error ';' |
    ;

variable:   
    IDENTIFIER ':' type IS statement_ 
        {checkAssignment($3, $5, "Variable Initialization");
        {if (symbols.find($1, $3)) appendError(DUPLICATE_IDENTIFIER, $1);
        symbols.insert($1, $3);} }

parameters:
    parameter optional_parameter;
    
optional_parameter:
    optional_parameter ',' parameter|
    ;
    
parameter:
    IDENTIFIER ':' type {symbols.insert($1, $3);}

type:
    INTEGER {$$ = INT_TYPE;} |
    REAL {$$ = REAL_TYPE;} |
    BOOLEAN {$$ = BOOL_TYPE;} ;

body:
    BEGIN_ statement_ END ';' {$$ = $2;}
    
statement_:
    statement ';' {$$ = $1;}|
    error ';' {$$ = MISMATCH;} ;
    
statement:
    expression {$$ = $1;}|
    REDUCE operator reductions ENDREDUCE {$$ = $3;} ; |
    IF expression THEN statement_ ELSE statement_ ENDIF {$$ = checkIfThen($1, $4, $6);} |
    CASE expression IS cases OTHERS ARROW statement_ ENDCASE {$$ =checkCaseExpression($2);case_statements.push_back($7);
    int size = case_statements.size();
    if (!case_statements.empty()) {
        int org;
        for (int i = 0; i < size; i++){
            for (int j = 0; j < case_statements.size(); j++) {
                if (org == j) {
                    $$ = checkCases(case_statements[i], case_statements[j]);
                }
            }
            org = i;
        }
    case_statements.clear();
    };} ;

operator:
    ADDOP |
    RELOP |
    EXPOP |
    MULOP ;

cases:
    %empty {$$ = EMPTY; }|
    cases case ;

case:
    WHEN INT_LITERAL ARROW statement_ { case_statements.push_back($4);}|
    %empty {$$ = EMPTY;} ;

Checker function

void checkAssignment(Types lValue, Types rValue, string message)
{
    if (lValue != MISMATCH && rValue != MISMATCH && lValue != rValue)
        appendError(GENERAL_SEMANTIC, "Type Mismatch on " + message);
}

Upvotes: 0

Views: 705

Answers (1)

rici
rici

Reputation: 241771

Every grammar symbol (token or non-terminal) has an associated "semantic" value. The word "semantic" is used to indicate that these values are not syntactic; that is, they are not part of the parse. The bison/yacc parser does not compute these values for you, since it is only concerned with parsing the input. However, it does arrange for two things:

  • Semantic values set by the lexical analyser are associated with the corresponding token;
  • Whatever code you associate with a production (the "semantic action") is executed when that production is recognised.

It's up to you to write semantic actions which compute the semantic values for those non-terminals whose semantic values are needed in the productions which use them. You do this by *assigning to" the special symbol $$, which the parser will use as the semantic value of the non-terminal whose production has been recognised

In a semantic action, you can refer to the semantic values of the grammar symbols on the right-hand side of the production, using the special symbols $1, $2, etc. The number is the index of the symbol in the right-hand side. If that symbol is a token, its value must have been set by the lexical scanner, by being placed into yylval. If the referenced symbol is a non-terminal, the value must be set by every production for that non-terminal (whose actions must assign some value to $$.

There's a chapter in the Bison manual about Language semantics which goes into a lot more details, with examples. (It might help to read the introductory material in that manual first, if you have not already done so.)

For what it's worth, I would suggest not trying to do type-checking during the parse. You'll probably find it easier to separate concerns, by constructing a semantic tree ("AST") during the parse, and then doing separate passes over that tree to complete your analysis.

Upvotes: 1

Related Questions