Reputation: 411
We have to convert a segmentSetOperations String into an abstract syntax tree (AST). The following are expected in a segmentSetOperations String:
Example usage:
SegmentSetOperationsToken.java
public interface SegmentSetOperationsToken {
record IdentifierToken(Long value) implements SegmentSetOperationsToken {
}
record UnionToken() implements SegmentSetOperationsToken {
public static final UnionToken INSTANCE = new UnionToken();
}
record IntersectToken() implements SegmentSetOperationsToken {
public static final IntersectToken INSTANCE = new IntersectToken();
}
record LeftParenToken() implements SegmentSetOperationsToken {
public static final LeftParenToken INSTANCE = new LeftParenToken();
}
record RightParenToken() implements SegmentSetOperationsToken {
public static final RightParenToken INSTANCE = new RightParenToken();
}
}
SegmentSetOperationsAST.java
public interface SegmentSetOperationsAST {
record UnionOfTwoSegments(List<Long> values) implements SegmentSetOperationsAST {
}
record IntersectOfTwoSegments(List<Long> values) implements SegmentSetOperationsAST {
}
record IntersectionsOfUnions(List<SegmentSetOperationsAST> left, SegmentSetOperationsAST right) implements SegmentSetOperationsAST {
}
record UnionsOfIntersections(List<SegmentSetOperationsAST> left, SegmentSetOperationsAST right) implements SegmentSetOperationsAST {
}
}
SegmentSetOperationsLexer.java
public class SegmentSetOperationsLexer {
private static final Pattern WHITESPACE_PATTERN = Pattern.compile("[ \\t\\r\\f\\n]+");
private static final Pattern IDENTIFIER_PATTERN = Pattern.compile("\\d+");
private static final String UNION = "u";
private static final String INTERSECT = "n";
private static final String LEFT_PAREN = "(";
private static final String RIGHT_PAREN = ")";
public static Either<SegmentSetOperationsLexerError, List<SegmentSetOperationsToken>> tokenize(String expression) {
try {
expression = WHITESPACE_PATTERN.matcher(expression).replaceAll("");
return Either.right(generateTokens(expression).stream().map(SegmentSetOperationsLexer::mapToToken).toList());
} catch (IllegalArgumentException e) {
return Either.left(new SegmentSetOperationsLexerError(e.getMessage()));
}
}
private static List<String> generateTokens(String expression) {
List<String> tokens = new ArrayList<>();
Matcher matcher = Pattern.compile("(\\d+|[un()])").matcher(expression);
while (matcher.find()) {
tokens.add(matcher.group());
}
return tokens;
}
private static SegmentSetOperationsToken mapToToken(String token) {
if (token.matches(IDENTIFIER_PATTERN.pattern())) {
return new IdentifierToken(Long.parseLong(token));
} else {
return switch (token) {
case UNION -> UnionToken.INSTANCE;
case INTERSECT -> IntersectToken.INSTANCE;
case LEFT_PAREN -> LeftParenToken.INSTANCE;
case RIGHT_PAREN -> RightParenToken.INSTANCE;
default -> throw new IllegalArgumentException("Unknown token: " + token);
};
}
}
}
We have a SegmentSetOperationsConverter class which uses the above SegmentSetOperationsLexer class and a parser to try and convert the segmentSetOperations string to the corresponding AST.
SegmentSetOperationsConverter.java
@NoArgsConstructor(access = AccessLevel.PRIVATE)
public class SegmentSetOperationsConverter {
public static Either<SegmentSetOperationsConverterError, SegmentSetOperationsAST> convertToAst(String expression) {
Either<SegmentSetOperationsLexerError, List<SegmentSetOperationsToken>> tokens = SegmentSetOperationsLexer.tokenize(expression);
if (tokens.isLeft()) {
return Either.left(tokens.getLeft());
} else {
SegmentSetOperationsParser parser = Parboiled.createParser(SegmentSetOperationsParser.class);
return parser.parseToAst(tokens.get());
}
}
}
I can get the List using SegmentSetOperationsLexer. Now I need to write a parser that takes in these tokens and generates the final AST of the given format i.e. Right(IntersectionsOfUnions(List(UnionOfTwoSegments(List(542))), UnionOfTwoSegments(List(15, 6519, 884)))). I tried using the Parboild library but it is not working as expected. Any suggestions on how can I write such a parser?
The parser should identify:
My implementation of the parser:
@BuildParseTree
public class SegmentSetOperationsParser extends BaseParser<SegmentSetOperationsAST> {
Rule astParser() {
return FirstOf(
unionOfTwoSegments(),
intersectionOfTwoSegments(),
intersectionsOfUnions(),
unionsOfIntersections()
);
}
Rule unionOfTwoSegments() {
Var<List<Long>> ids = new Var<>(new ArrayList<>());
return Sequence(
LeftParenToken.class,
unionSequence(),
RightParenToken.class,
push(new UnionOfTwoSegments(ids.get()))
);
}
Rule intersectionOfTwoSegments() {
Var<List<Long>> ids = new Var<>(new ArrayList<>());
return Sequence(
LeftParenToken.class,
intersectionSequence(),
RightParenToken.class,
push(new IntersectOfTwoSegments(ids.get()))
);
}
Rule intersectionsOfUnions() {
Var<List<SegmentSetOperationsAST>> left = new Var<>(new ArrayList<>());
return Sequence(
unionOfTwoSegments(),
IntersectToken.class,
unionOfTwoSegments(),
push(new IntersectionsOfUnions(left.get(), pop()))
);
}
Rule unionsOfIntersections() {
Var<List<SegmentSetOperationsAST>> left = new Var<>(new ArrayList<>());
return Sequence(
intersectionOfTwoSegments(),
UnionToken.class,
intersectionOfTwoSegments(),
push(new UnionsOfIntersections(left.get(), pop()))
);
}
Rule unionSequence() {
return Sequence(
IdentifierToken.class,
ZeroOrMore(
UnionToken.class,
IdentifierToken.class
)
);
}
Rule intersectionSequence() {
return Sequence(
IdentifierToken.class,
ZeroOrMore(
IntersectToken.class,
IdentifierToken.class
)
);
}
public Either<SegmentSetOperationsConverterError, SegmentSetOperationsAST> parseToAst(List<SegmentSetOperationsToken> tokens) {
ParsingResult<?> result = new ReportingParseRunner<>(astParser()).run(tokens.toString());
if (!result.hasErrors()) {
return Either.right((SegmentSetOperationsAST) result.resultValue);
} else {
return Either.left(new SegmentSetOperationsParserError(result.parseErrors.getFirst().getErrorMessage()));
}
}
}
But it gives the below error:
org.parboiled.errors.GrammarException: 'class com.paytm.adtech.map.ruleengine.http.form.setoperations.SegmentSetOperationsToken$UnionToken' cannot be automatically converted to a parser Rule
Upvotes: 0
Views: 57