Reputation: 65
I am trying to use Antlr-maven-plugin @ 4.7.2 to parse some Java source code on my ubuntu22.04. I used the g4 files from Here and successfully generated the Parser and Lexer. The main function is as follows:
public static void main(String[] args) throws IOException {
InputStream inputStream = Files.newInputStream(Paths.get(
"/xxx/java-antler-parser/src/main/java/Test.java"));
Java8Lexer lexer = new Java8Lexer(CharStreams.fromStream(inputStream));
Java8Parser parser = new Java8Parser(new CommonTokenStream(lexer));
System.out.println(parser.expression());
}
The test file is just a java file with regular import and basic logic. Showing part of the file as follows:
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.atn.PredictionMode;
import java.io.File;
import java.lang.System;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.BrokenBarrierException;
import java.util.concurrent.CyclicBarrier;
class Test {
// public static long lexerTime = 0;
public static boolean profile = false;
public static boolean notree = false;
public static boolean gui = false;
public static boolean printTree = false;
...
However, there was no output except one error message:
line 1:0 extraneous input 'import' expecting {'boolean', 'byte', 'char', 'double', 'float', 'int', 'long', 'new', 'short', 'super', 'this', 'void', IntegerLiteral, FloatingPointLiteral, BooleanLiteral, CharacterLiteral, StringLiteral, 'null', '(', '!', '~', '++', '--', '+', '-', Identifier, '@'}
[]
The "expecting" list is far less than that defined in the g4 file. Meanwhile, when I searched the word "import" in the Parser, I could see tons of definitions including "import".
What I want is a parser that could provide me with the tokens' type and index such as
[@0,0:5='import',<IMPORT>,1:0],[@1,6:6=' ',<WHITESPACE>1:6], [@2,7:9='org',<Identifier>1:9]...
What should I do?
Upvotes: 1
Views: 291
Reputation: 170207
... System.out.println(parser.expression());
If you're trying to parse the input using the expression
production, then it won't work (because the input isn't an expression). Use the compilationUnit
production instead:
String source = "import org.antlr.v4.runtime.Lexer;\n" +
"import org.antlr.v4.runtime.ParserRuleContext;\n" +
"import org.antlr.v4.runtime.atn.PredictionMode;\n" +
"\n" +
"import java.io.File;\n" +
"import java.lang.System;\n" +
"import java.util.ArrayList;\n" +
"import java.util.List;\n" +
"import java.util.concurrent.BrokenBarrierException;\n" +
"import java.util.concurrent.CyclicBarrier;\n" +
"\n" +
"class Test {\n" +
" // public static long lexerTime = 0;\n" +
" public static boolean profile = false;\n" +
" public static boolean notree = false;\n" +
" public static boolean gui = false;\n" +
" public static boolean printTree = false;\n" +
"}";
Java8Lexer lexer = new Java8Lexer(CharStreams.fromString(source));
Java8Parser parser = new Java8Parser(new CommonTokenStream(lexer));
ParseTree root = parser.compilationUnit();
System.out.println(root.toStringTree(parser));
// If `compilationUnit` didn't produce any errors, you know the file is
// syntactically correct, now just dump the tokens to the STDOUT
lexer.reset();
CommonTokenStream commonTokenStream = new CommonTokenStream(lexer);
commonTokenStream.fill();
for (Token t : commonTokenStream.getTokens()) {
System.out.printf(
"type=%-25s text='%s'%n",
Java8Lexer.VOCABULARY.getSymbolicName(t.getType()),
t.getText()
);
}
which produces:
(compilationUnit (importDeclaration (singleTypeImportDeclaration import (typeName (packageOrTypeName (packageOrTypeName (packageOrTypeName (packageOrTypeName org) . antlr) . v4) . runtime) . Lexer) ;)) (importDeclaration (singleTypeImportDeclaration import (typeName (packageOrTypeName (packageOrTypeName (packageOrTypeName (packageOrTypeName org) . antlr) . v4) . runtime) . ParserRuleContext) ;)) (importDeclaration (singleTypeImportDeclaration import (typeName (packageOrTypeName (packageOrTypeName (packageOrTypeName (packageOrTypeName (packageOrTypeName org) . antlr) . v4) . runtime) . atn) . PredictionMode) ;)) (importDeclaration (singleTypeImportDeclaration import (typeName (packageOrTypeName (packageOrTypeName java) . io) . File) ;)) (importDeclaration (singleTypeImportDeclaration import (typeName (packageOrTypeName (packageOrTypeName java) . lang) . System) ;)) (importDeclaration (singleTypeImportDeclaration import (typeName (packageOrTypeName (packageOrTypeName java) . util) . ArrayList) ;)) (importDeclaration (singleTypeImportDeclaration import (typeName (packageOrTypeName (packageOrTypeName java) . util) . List) ;)) (importDeclaration (singleTypeImportDeclaration import (typeName (packageOrTypeName (packageOrTypeName (packageOrTypeName java) . util) . concurrent) . BrokenBarrierException) ;)) (importDeclaration (singleTypeImportDeclaration import (typeName (packageOrTypeName (packageOrTypeName (packageOrTypeName java) . util) . concurrent) . CyclicBarrier) ;)) (typeDeclaration (classDeclaration (normalClassDeclaration class Test (classBody { (classBodyDeclaration (classMemberDeclaration (fieldDeclaration (fieldModifier public) (fieldModifier static) (unannType (unannPrimitiveType boolean)) (variableDeclaratorList (variableDeclarator (variableDeclaratorId profile) = (variableInitializer (expression (assignmentExpression (conditionalExpression (conditionalOrExpression (conditionalAndExpression (inclusiveOrExpression (exclusiveOrExpression (andExpression (equalityExpression (relationalExpression (shiftExpression (additiveExpression (multiplicativeExpression (unaryExpression (unaryExpressionNotPlusMinus (postfixExpression (primary (primaryNoNewArray_lfno_primary (literal false)))))))))))))))))))))) ;))) (classBodyDeclaration (classMemberDeclaration (fieldDeclaration (fieldModifier public) (fieldModifier static) (unannType (unannPrimitiveType boolean)) (variableDeclaratorList (variableDeclarator (variableDeclaratorId notree) = (variableInitializer (expression (assignmentExpression (conditionalExpression (conditionalOrExpression (conditionalAndExpression (inclusiveOrExpression (exclusiveOrExpression (andExpression (equalityExpression (relationalExpression (shiftExpression (additiveExpression (multiplicativeExpression (unaryExpression (unaryExpressionNotPlusMinus (postfixExpression (primary (primaryNoNewArray_lfno_primary (literal false)))))))))))))))))))))) ;))) (classBodyDeclaration (classMemberDeclaration (fieldDeclaration (fieldModifier public) (fieldModifier static) (unannType (unannPrimitiveType boolean)) (variableDeclaratorList (variableDeclarator (variableDeclaratorId gui) = (variableInitializer (expression (assignmentExpression (conditionalExpression (conditionalOrExpression (conditionalAndExpression (inclusiveOrExpression (exclusiveOrExpression (andExpression (equalityExpression (relationalExpression (shiftExpression (additiveExpression (multiplicativeExpression (unaryExpression (unaryExpressionNotPlusMinus (postfixExpression (primary (primaryNoNewArray_lfno_primary (literal false)))))))))))))))))))))) ;))) (classBodyDeclaration (classMemberDeclaration (fieldDeclaration (fieldModifier public) (fieldModifier static) (unannType (unannPrimitiveType boolean)) (variableDeclaratorList (variableDeclarator (variableDeclaratorId printTree) = (variableInitializer (expression (assignmentExpression (conditionalExpression (conditionalOrExpression (conditionalAndExpression (inclusiveOrExpression (exclusiveOrExpression (andExpression (equalityExpression (relationalExpression (shiftExpression (additiveExpression (multiplicativeExpression (unaryExpression (unaryExpressionNotPlusMinus (postfixExpression (primary (primaryNoNewArray_lfno_primary (literal false)))))))))))))))))))))) ;))) })))) <EOF>)
type=IMPORT text='import'
type=Identifier text='org'
type=DOT text='.'
type=Identifier text='antlr'
type=DOT text='.'
type=Identifier text='v4'
type=DOT text='.'
type=Identifier text='runtime'
type=DOT text='.'
type=Identifier text='Lexer'
type=SEMI text=';'
type=IMPORT text='import'
type=Identifier text='org'
type=DOT text='.'
type=Identifier text='antlr'
type=DOT text='.'
type=Identifier text='v4'
type=DOT text='.'
type=Identifier text='runtime'
type=DOT text='.'
type=Identifier text='ParserRuleContext'
type=SEMI text=';'
type=IMPORT text='import'
type=Identifier text='org'
type=DOT text='.'
type=Identifier text='antlr'
type=DOT text='.'
type=Identifier text='v4'
type=DOT text='.'
type=Identifier text='runtime'
type=DOT text='.'
type=Identifier text='atn'
type=DOT text='.'
type=Identifier text='PredictionMode'
type=SEMI text=';'
type=IMPORT text='import'
type=Identifier text='java'
type=DOT text='.'
type=Identifier text='io'
type=DOT text='.'
type=Identifier text='File'
type=SEMI text=';'
type=IMPORT text='import'
type=Identifier text='java'
type=DOT text='.'
type=Identifier text='lang'
type=DOT text='.'
type=Identifier text='System'
type=SEMI text=';'
type=IMPORT text='import'
type=Identifier text='java'
type=DOT text='.'
type=Identifier text='util'
type=DOT text='.'
type=Identifier text='ArrayList'
type=SEMI text=';'
type=IMPORT text='import'
type=Identifier text='java'
type=DOT text='.'
type=Identifier text='util'
type=DOT text='.'
type=Identifier text='List'
type=SEMI text=';'
type=IMPORT text='import'
type=Identifier text='java'
type=DOT text='.'
type=Identifier text='util'
type=DOT text='.'
type=Identifier text='concurrent'
type=DOT text='.'
type=Identifier text='BrokenBarrierException'
type=SEMI text=';'
type=IMPORT text='import'
type=Identifier text='java'
type=DOT text='.'
type=Identifier text='util'
type=DOT text='.'
type=Identifier text='concurrent'
type=DOT text='.'
type=Identifier text='CyclicBarrier'
type=SEMI text=';'
type=CLASS text='class'
type=Identifier text='Test'
type=LBRACE text='{'
type=PUBLIC text='public'
type=STATIC text='static'
type=BOOLEAN text='boolean'
type=Identifier text='profile'
type=ASSIGN text='='
type=BooleanLiteral text='false'
type=SEMI text=';'
type=PUBLIC text='public'
type=STATIC text='static'
type=BOOLEAN text='boolean'
type=Identifier text='notree'
type=ASSIGN text='='
type=BooleanLiteral text='false'
type=SEMI text=';'
type=PUBLIC text='public'
type=STATIC text='static'
type=BOOLEAN text='boolean'
type=Identifier text='gui'
type=ASSIGN text='='
type=BooleanLiteral text='false'
type=SEMI text=';'
type=PUBLIC text='public'
type=STATIC text='static'
type=BOOLEAN text='boolean'
type=Identifier text='printTree'
type=ASSIGN text='='
type=BooleanLiteral text='false'
type=SEMI text=';'
type=RBRACE text='}'
type=EOF text='<EOF>'
Upvotes: 2