sol
sol

Reputation: 323

For ANTLR3 exception, antlrworks can work, but generated java code can not work

I want to parse SQL expression that contains and and or key words. The problem is Antlrwork can throw exception for error grammar, but generated java code can not throw exception. Meanwhile, i find that generated java code is error that only can generate a half of AST and have no error information, that execute path can not reach point of exception.

This is my dot g files:

 grammar ContainsExpr;

    options {
      language = Java;
      output=AST;
      ASTLabelType=CommonTree;
    }

    tokens {
        DIVIDE = '/' ;
        PLUS = '+' ;
        MINUS = '-' ;
        STAR = '*' ;
        MOD = '%' ;

        AMPERSAND = '&' ;
        TILDE = '~' ;
        BITWISEOR = '|' ;
        COMMA = ',';
        DOT = '.';
        LPAREN = '(' ;
        RPAREN = ')' ;

        EQUAL = '=';
        NOTEQUAL = '!=';
        LESSTHANOREQUALTO = '<=';
        LESSTHAN = '<';
        GREATERTHANOREQUALTO = '>=';
        GREATERTHAN = '>';

        AND = 'AND';
        OR = 'OR' ;
        TRUE = 'TRUE';
        FALSE = 'FALSE';
        KW_NEAR = 'NEAR';
        DOUBLE_QUOTE = '\"';
        SINGLE_QUOTE = '\'';

        TOK_NEAR;
        TOK_ITEMS;
        TOK_PARAMETER;
        TOK_WILDCARDS;
    }

    @header {
    package test1;
    }

    @members {

       //override method
      public void reportError(RecognitionException e) {
        displayRecognitionError(this.getTokenNames(), e);
      }

     @Override
      public void emitErrorMessage(String message) {
        throw new RuntimeException(message);
      }
    }

    @lexer::header {
    package test1;
    }

    @lexer::members {

       //override method
      public void reportError(RecognitionException e) {
        displayRecognitionError(this.getTokenNames(), e);
      }
    }

    @rulecatch {
        catch (RecognitionException e) {
          reportError(e);
          throw e;
        }
    }

    // LITERALS
    fragment
    Letter
        : 'a'..'z' | 'A'..'Z'
        ;

    fragment
    Digit
        :
        '0'..'9'
        ;

    fragment
    Exponent
        :
        ('e' | 'E') ( PLUS|MINUS )? (Digit)+
        ;

    Number
        :
        (Digit)+ ( DOT (Digit)* (Exponent)? | Exponent)?
        ;

    fragment
    UnquotedString
        :  
          ( ~(SINGLE_QUOTE|DOUBLE_QUOTE|' '|'\t'|'\n'|LPAREN|RPAREN|COMMA))+
        ;

    fragment
    QuotedLiteral
        :
        DOUBLE_QUOTE ( ~(DOUBLE_QUOTE|'\\') | ('\\' .) )* DOUBLE_QUOTE 
        ;

    Parameter
        :
        UnquotedString | QuotedLiteral
        ;

    WS  :  (' '|'\r'|'\t'|'\n'|'\u000C')* {$channel=HIDDEN;}
        ;

    eval
        :
        searchCondition 
        ;

    //AND has precedence over OR    
    searchCondition
        :
        andExpr (precedenceOrOperator^ andExpr)*
        ;

    andExpr
        :
        subCondition (precedenceAndOperator^ subCondition)*
        ;

    precedenceAndOperator
        :
        AND | AMPERSAND
        ;

    precedenceOrOperator
        :
        OR | BITWISEOR
        ;

    subCondition 
        : 
          atom
        | LPAREN searchCondition RPAREN
        ;

    atom 
        :
        subEressixpon      
        ;

    subEressixpon  
        :
          Parameter -> ^(TOK_PARAMETER Parameter) 
        ;

When input incorrectly expression expr1 epxr2(The AND is skipped), The execution result of java code is "expr1".

The generated java code of searchCondition rule is incorrect:

try {
            //  ( andExpr ( precedenceOrOperator ^ andExpr )* )
            //  andExpr ( precedenceOrOperator ^ andExpr )*
            {
            root_0 = (CommonTree)adaptor.nil();


            pushFollow(FOLLOW_andExpr_in_searchCondition714);
            andExpr2=andExpr();
            state._fsp--;

            adaptor.addChild(root_0, andExpr2.getTree());

            // ( precedenceOrOperator ^ andExpr )*
            loop1:
            while (true) {
                int alt1=2;
                int LA1_0 = input.LA(1);
                if ( (LA1_0==BITWISEOR||LA1_0==OR) ) {
                    alt1=1;
                }

                switch (alt1) {
                case 1 :
                    // precedenceOrOperator ^ andExpr
                    {
                    pushFollow(FOLLOW_precedenceOrOperator_in_searchCondition717);
                    precedenceOrOperator3=precedenceOrOperator();
                    state._fsp--;

                    root_0 = (CommonTree)adaptor.becomeRoot(precedenceOrOperator3.getTree(), root_0);
                    pushFollow(FOLLOW_andExpr_in_searchCondition720);
                    andExpr4=andExpr();
                    state._fsp--;

                    adaptor.addChild(root_0, andExpr4.getTree());

                    }
                    break;

                default :
                    break loop1;
                }
            }

            }

            retval.stop = input.LT(-1);

            retval.tree = (CommonTree)adaptor.rulePostProcessing(root_0);
            adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop);

        }

            catch (RecognitionException e) {
              reportError(e);
              throw e;
            }

        finally {
            // do for sure before leaving
        }

In while loop, When LA1_0 is OR key words, It will be normal reach point of exception. So it will can not to throw exception.

Upvotes: 2

Views: 250

Answers (2)

Rainer
Rainer

Reputation: 793

OK, I could confirm your problem in the first place, but after a little teaking it worked.

I changed 3 things:

  1. put the lexer rules AFTER the parser rules
  2. Added the EOF symbol on your top level parser rule
  3. only override the needed two methods "reportError" in parser and lexer

Have fun and keep asking^^

grammar ContainsExpr;

    options {
      language = Java;
      output=AST;
      ASTLabelType=CommonTree;
    }

    tokens {
        DIVIDE = '/' ;
        PLUS = '+' ;
        MINUS = '-' ;
        STAR = '*' ;
        MOD = '%' ;

        AMPERSAND = '&' ;
        TILDE = '~' ;
        BITWISEOR = '|' ;
        COMMA = ',';
        DOT = '.';
        LPAREN = '(' ;
        RPAREN = ')' ;

        EQUAL = '=';
        NOTEQUAL = '!=';
        LESSTHANOREQUALTO = '<=';
        LESSTHAN = '<';
        GREATERTHANOREQUALTO = '>=';
        GREATERTHAN = '>';

        AND = 'AND';
        OR = 'OR' ;
        TRUE = 'TRUE';
        FALSE = 'FALSE';
        KW_NEAR = 'NEAR';
        DOUBLE_QUOTE = '\"';
        SINGLE_QUOTE = '\'';

        TOK_NEAR;
        TOK_ITEMS;
        TOK_PARAMETER;
        TOK_WILDCARDS;
    }

    @header {
    package test1;
    }

    @lexer::header {
    package test1;
    }

    @parser::members {
      @Override
      public void reportError(RecognitionException e) {
        throw new RuntimeException("I quit!\n" + e.getMessage()); 
      }
    }

    @lexer::members {
      @Override
      public void reportError(RecognitionException e) {
        throw new RuntimeException("I quit!\n" + e.getMessage()); 
      }
    }

    eval
        :
        searchCondition EOF
        ;

    //AND has precedence over OR    
    searchCondition
        :
        andExpr (precedenceOrOperator^ andExpr)*
        ;

    andExpr
        :
        subCondition (precedenceAndOperator^ subCondition)*
        ;

    precedenceAndOperator
        :
        AND | AMPERSAND
        ;

    precedenceOrOperator
        :
        OR | BITWISEOR
        ;

    subCondition 
        : 
          atom
        | LPAREN searchCondition RPAREN
        ;

    atom 
        :
        subEressixpon      
        ;

    subEressixpon  
        :
          Parameter -> ^(TOK_PARAMETER Parameter) 
        ;

    // LITERALS
    fragment
    Letter
        : 'a'..'z' | 'A'..'Z'
        ;

    fragment
    Digit
        :
        '0'..'9'
        ;

    fragment
    Exponent
        :
        ('e' | 'E') ( PLUS|MINUS )? (Digit)+
        ;

    Number
        :
        (Digit)+ ( DOT (Digit)* (Exponent)? | Exponent)?
        ;

    fragment
    UnquotedString
        :  
          ( ~(SINGLE_QUOTE|DOUBLE_QUOTE|' '|'\t'|'\n'|LPAREN|RPAREN|COMMA))+
        ;

    fragment
    QuotedLiteral
        :
        DOUBLE_QUOTE ( ~(DOUBLE_QUOTE|'\\') | ('\\' .) )* DOUBLE_QUOTE 
        ;

    Parameter
        :
        UnquotedString | QuotedLiteral
        ;

    WS  :  (' '|'\r'|'\t'|'\n'|'\u000C')* {$channel=HIDDEN;}
        ;

Here is my little test case

package test1;

import junit.framework.TestCase;

import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.TokenStream;
import org.junit.Assert;

public class Test extends TestCase {

    public void test() {
        String test = "expr1 epxr2"; // AND missing

        ANTLRStringStream input = new ANTLRStringStream(test);
        TokenStream tokens = new CommonTokenStream(new ContainsExprLexer(input));
        ContainsExprParser parser = new ContainsExprParser(tokens);

        try {
            parser.eval();
            Assert.fail("Should throw Exception");
        } catch (Exception e) {
            //jippi
            System.out.println(e);
        }
    }
}

With the result

java.lang.RuntimeException: I quit!
null

UPDATE

I found myself a part of my answer here ANTLR not throwing errors on invalid input

Upvotes: 1

Rainer
Rainer

Reputation: 793

It has been a while since I worked with antlr but you may have a look here.

How Get error messages of antlr parsing?

You can get the error messages from the parser.

Hope this helps. Cheers

Upvotes: 2

Related Questions