Gurucharan Sharma
Gurucharan Sharma

Reputation: 411

Converts a segmentSetOperations String into an abstract syntax tree (AST)

We have to convert a segmentSetOperations String into an abstract syntax tree (AST). The following are expected in a segmentSetOperations String:

Example usage:

SegmentSetOperationsToken.java

public interface SegmentSetOperationsToken {

  record IdentifierToken(Long value) implements SegmentSetOperationsToken {

  }

  record UnionToken() implements SegmentSetOperationsToken {

    public static final UnionToken INSTANCE = new UnionToken();
  }

  record IntersectToken() implements SegmentSetOperationsToken {

    public static final IntersectToken INSTANCE = new IntersectToken();
  }

  record LeftParenToken() implements SegmentSetOperationsToken {

    public static final LeftParenToken INSTANCE = new LeftParenToken();
  }

  record RightParenToken() implements SegmentSetOperationsToken {

    public static final RightParenToken INSTANCE = new RightParenToken();
  }
}

SegmentSetOperationsAST.java

public interface SegmentSetOperationsAST {

  record UnionOfTwoSegments(List<Long> values) implements SegmentSetOperationsAST {

  }

  record IntersectOfTwoSegments(List<Long> values) implements SegmentSetOperationsAST {

  }

  record IntersectionsOfUnions(List<SegmentSetOperationsAST> left, SegmentSetOperationsAST right) implements SegmentSetOperationsAST {

  }

  record UnionsOfIntersections(List<SegmentSetOperationsAST> left, SegmentSetOperationsAST right) implements SegmentSetOperationsAST {

  }
}

SegmentSetOperationsLexer.java

public class SegmentSetOperationsLexer {

  private static final Pattern WHITESPACE_PATTERN = Pattern.compile("[ \\t\\r\\f\\n]+");
  private static final Pattern IDENTIFIER_PATTERN = Pattern.compile("\\d+");

  private static final String UNION = "u";
  private static final String INTERSECT = "n";
  private static final String LEFT_PAREN = "(";
  private static final String RIGHT_PAREN = ")";

  public static Either<SegmentSetOperationsLexerError, List<SegmentSetOperationsToken>> tokenize(String expression) {
    try {
      expression = WHITESPACE_PATTERN.matcher(expression).replaceAll("");
      return Either.right(generateTokens(expression).stream().map(SegmentSetOperationsLexer::mapToToken).toList());
    } catch (IllegalArgumentException e) {
      return Either.left(new SegmentSetOperationsLexerError(e.getMessage()));
    }
  }

  private static List<String> generateTokens(String expression) {
    List<String> tokens = new ArrayList<>();
    Matcher matcher = Pattern.compile("(\\d+|[un()])").matcher(expression);
    while (matcher.find()) {
      tokens.add(matcher.group());
    }

    return tokens;
  }

  private static SegmentSetOperationsToken mapToToken(String token) {
    if (token.matches(IDENTIFIER_PATTERN.pattern())) {
      return new IdentifierToken(Long.parseLong(token));
    } else {
      return switch (token) {
        case UNION -> UnionToken.INSTANCE;
        case INTERSECT -> IntersectToken.INSTANCE;
        case LEFT_PAREN -> LeftParenToken.INSTANCE;
        case RIGHT_PAREN -> RightParenToken.INSTANCE;
        default -> throw new IllegalArgumentException("Unknown token: " + token);
      };
    }
  }
}

We have a SegmentSetOperationsConverter class which uses the above SegmentSetOperationsLexer class and a parser to try and convert the segmentSetOperations string to the corresponding AST.

SegmentSetOperationsConverter.java

@NoArgsConstructor(access = AccessLevel.PRIVATE)
public class SegmentSetOperationsConverter {

  public static Either<SegmentSetOperationsConverterError, SegmentSetOperationsAST> convertToAst(String expression) {
    Either<SegmentSetOperationsLexerError, List<SegmentSetOperationsToken>> tokens = SegmentSetOperationsLexer.tokenize(expression);
    if (tokens.isLeft()) {
      return Either.left(tokens.getLeft());
    } else {
      SegmentSetOperationsParser parser = Parboiled.createParser(SegmentSetOperationsParser.class);
      return parser.parseToAst(tokens.get());
    }
  }
}

I can get the List using SegmentSetOperationsLexer. Now I need to write a parser that takes in these tokens and generates the final AST of the given format i.e. Right(IntersectionsOfUnions(List(UnionOfTwoSegments(List(542))), UnionOfTwoSegments(List(15, 6519, 884)))). I tried using the Parboild library but it is not working as expected. Any suggestions on how can I write such a parser?

The parser should identify:

My implementation of the parser:

@BuildParseTree
public class SegmentSetOperationsParser extends BaseParser<SegmentSetOperationsAST> {

  Rule astParser() {
    return FirstOf(
        unionOfTwoSegments(),
        intersectionOfTwoSegments(),
        intersectionsOfUnions(),
        unionsOfIntersections()
    );
  }

  Rule unionOfTwoSegments() {
    Var<List<Long>> ids = new Var<>(new ArrayList<>());

    return Sequence(
        LeftParenToken.class,
        unionSequence(),
        RightParenToken.class,
        push(new UnionOfTwoSegments(ids.get()))
    );
  }

  Rule intersectionOfTwoSegments() {
    Var<List<Long>> ids = new Var<>(new ArrayList<>());

    return Sequence(
        LeftParenToken.class,
        intersectionSequence(),
        RightParenToken.class,
        push(new IntersectOfTwoSegments(ids.get()))
    );
  }

  Rule intersectionsOfUnions() {
    Var<List<SegmentSetOperationsAST>> left = new Var<>(new ArrayList<>());

    return Sequence(
        unionOfTwoSegments(),
        IntersectToken.class,
        unionOfTwoSegments(),
        push(new IntersectionsOfUnions(left.get(), pop()))
    );
  }

  Rule unionsOfIntersections() {
    Var<List<SegmentSetOperationsAST>> left = new Var<>(new ArrayList<>());

    return Sequence(
        intersectionOfTwoSegments(),
        UnionToken.class,
        intersectionOfTwoSegments(),
        push(new UnionsOfIntersections(left.get(), pop()))
    );
  }

  Rule unionSequence() {
    return Sequence(
        IdentifierToken.class,
        ZeroOrMore(
            UnionToken.class,
            IdentifierToken.class
        )
    );
  }

  Rule intersectionSequence() {
    return Sequence(
        IdentifierToken.class,
        ZeroOrMore(
            IntersectToken.class,
            IdentifierToken.class
        )
    );
  }

  public Either<SegmentSetOperationsConverterError, SegmentSetOperationsAST> parseToAst(List<SegmentSetOperationsToken> tokens) {
    ParsingResult<?> result = new ReportingParseRunner<>(astParser()).run(tokens.toString());
    if (!result.hasErrors()) {
      return Either.right((SegmentSetOperationsAST) result.resultValue);
    } else {
      return Either.left(new SegmentSetOperationsParserError(result.parseErrors.getFirst().getErrorMessage()));
    }
  }
}

But it gives the below error:

org.parboiled.errors.GrammarException: 'class com.paytm.adtech.map.ruleengine.http.form.setoperations.SegmentSetOperationsToken$UnionToken' cannot be automatically converted to a parser Rule

Upvotes: 0

Views: 57

Answers (0)

Related Questions