Scy
Scy

Reputation: 245

Recursive Descent Parser in Erlang

I'm trying to construct a RDP in Erlang. So far I have read and processed a file of tokens, which I will pass to the function as eg [2,6,3,7,3,2,4,6,3,2,4,4,99] (sample input that should work) and I need to ensure that every character(or set of) can be derived to by transforming from the default rule [bexp0] into some matching list of terminals.

get_terminal_list() ->
    [1,2,3,4,5,6,7,8,9,10,11,12,99].

get_prod_list() ->
[bexp0,bexp,bexp1,bterm].

get_sym_list(Prod) ->
    case Prod of
        bexp0 -> [[bexp,99]];
        bexp  -> [[bterm,bexp1]];
        bexp1 -> [[5,bexp,bexp1],[6,bexp,bexp1]];
        bterm -> [[3,bexp,4],[2],[8],[9],[2,10,1],[2,12,1],[2,11,1],[7,bterm]]
    end.

get_sym_list shows the grammar in use - where each int stands for a terminal character and each sublist is a set, i.e. bterm-> [[7,bterm]] means that bterm can turn into terminal '7' followed by non-terminal 'bterm'.

Right now I'm working on somehow realizing this:

Check if first set of rule has some terminal
 if so, check which side, reduce list of tokens from same side until first occurrence of this token,  
     parse this new list (w/o token) with rest of the set of this rule (also without the matched terminal). 
       return {success|failure, list_of_tokens, list_of_rules}  
   if success -> check with new list_of_tokens, default_rule
   if failure, check with old list_of_tokens, and new list_of_rules.

I assume the end states will be reached if the list of rules is empty - hence we have exhausted every possible production, hence not valid, or
list of tokens is empty, hence we have matched every token/set of tokens to some rule

Upvotes: 0

Views: 694

Answers (1)

Greg
Greg

Reputation: 8340

Probably this will do what you want:

-module(parse).
-export([parse1/0, parse1/1, parse2/0, parse2/1]).

parse1() ->
    parse([bexp], [2,6,3,7,3,2,4,6,3,2,4,4,99], fun get_sym_list1/1).

parse1(Input) ->
    parse([bexp], Input, fun get_sym_list1/1).


parse2() ->
    parse([bexp0], [2,6,3,7,3,2,4,6,3,2,4,4,99], fun get_sym_list2/1).

parse2(Input) ->
    parse([bexp0], Input, fun get_sym_list2/1).


parse([], [], _) ->
    true;
parse([], _, _) ->
    false;
parse([X | TX], [X | TY], Fun) ->
    io:format("+ Current:~w\tTokens:~w Input:~w~n", [X, TX, TY]),
    parse(TX, TY, Fun);
parse([X | TX], Input, Fun) ->
    io:format("  Current:~w\tTokens:~w Input:~w~n", [X, TX, Input]),
    case lists:member(X, get_terminal_list()) of
        true -> false;
        false -> lists:any(fun(T) -> parse(T ++ TX, Input, Fun) end, Fun(X))
    end.

get_terminal_list() ->
    [1,2,3,4,5,6,7,8,9,10,11,12,99].

get_sym_list1(Prod) ->
    case Prod of
        bexp    -> [[bexp1],[bterm],[bterm,bexp2]];
        bexp1   -> [[99]];
        bexp2   -> [[5,bterm,bexp2],[6,bterm,bexp2]];
        bterm   -> [[bfactor],[7,bterm]];
        bfactor -> [[3,bexp,4],[bconst],[2,10,1],[2,12,1],[2,11,1],[2]];
        bconst  -> [[8],[9]]
    end.

get_sym_list2(Prod) ->
    case Prod of
        bexp0 -> [[bterm,bexp1]];
        bexp  -> [[bterm,bexp1]];
        bexp1 -> [[5,u1],[6,bexp,bexp1],[99]];
        bterm -> [[u1,4],[2],[8],[9],[2,10,1],[2,12,1],[2,11,1],[7,bterm]];
        u1    -> [[3,bexp]]
    end.

However, it looks like either the grammar or the input list is incorrect because as far as I can see neither the old nor the new grammar parses the input. And it seems to be working fine because it will parse an input like this one:

41> parse:parse2([2,6,8,5,3,bterm,5,3,9,99,99]).
  Current:bexp0 Tokens:[] Input:[2,6,8,5,3,bterm,5,3,9,99,99]
  Current:bterm Tokens:[bexp1] Input:[2,6,8,5,3,bterm,5,3,9,99,99]
  Current:u1    Tokens:[4,bexp1] Input:[2,6,8,5,3,bterm,5,3,9,99,99]
  Current:3     Tokens:[bexp,4,bexp1] Input:[2,6,8,5,3,bterm,5,3,9,99,99]
+ Current:2     Tokens:[bexp1] Input:[6,8,5,3,bterm,5,3,9,99,99]
  Current:bexp1 Tokens:[] Input:[6,8,5,3,bterm,5,3,9,99,99]
  Current:5     Tokens:[u1] Input:[6,8,5,3,bterm,5,3,9,99,99]
+ Current:6     Tokens:[bexp,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
  Current:bexp  Tokens:[bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
  Current:bterm Tokens:[bexp1,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
  Current:u1    Tokens:[4,bexp1,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
  Current:3     Tokens:[bexp,4,bexp1,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
  Current:2     Tokens:[bexp1,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
+ Current:8     Tokens:[bexp1,bexp1] Input:[5,3,bterm,5,3,9,99,99]
  Current:bexp1 Tokens:[bexp1] Input:[5,3,bterm,5,3,9,99,99]
+ Current:5     Tokens:[u1,bexp1] Input:[3,bterm,5,3,9,99,99]
  Current:u1    Tokens:[bexp1] Input:[3,bterm,5,3,9,99,99]
+ Current:3     Tokens:[bexp,bexp1] Input:[bterm,5,3,9,99,99]
  Current:bexp  Tokens:[bexp1] Input:[bterm,5,3,9,99,99]
+ Current:bterm Tokens:[bexp1,bexp1] Input:[5,3,9,99,99]
  Current:bexp1 Tokens:[bexp1] Input:[5,3,9,99,99]
+ Current:5     Tokens:[u1,bexp1] Input:[3,9,99,99]
  Current:u1    Tokens:[bexp1] Input:[3,9,99,99]
+ Current:3     Tokens:[bexp,bexp1] Input:[9,99,99]
  Current:bexp  Tokens:[bexp1] Input:[9,99,99]
  Current:bterm Tokens:[bexp1,bexp1] Input:[9,99,99]
  Current:u1    Tokens:[4,bexp1,bexp1] Input:[9,99,99]
  Current:3     Tokens:[bexp,4,bexp1,bexp1] Input:[9,99,99]
  Current:2     Tokens:[bexp1,bexp1] Input:[9,99,99]
  Current:8     Tokens:[bexp1,bexp1] Input:[9,99,99]
+ Current:9     Tokens:[bexp1,bexp1] Input:[99,99]
  Current:bexp1 Tokens:[bexp1] Input:[99,99]
  Current:5     Tokens:[u1,bexp1] Input:[99,99]
  Current:6     Tokens:[bexp,bexp1,bexp1] Input:[99,99]
+ Current:99    Tokens:[bexp1] Input:[99]
  Current:bexp1 Tokens:[] Input:[99]
  Current:5     Tokens:[u1] Input:[99]
  Current:6     Tokens:[bexp,bexp1] Input:[99]
+ Current:99    Tokens:[] Input:[]
true

BTW true means the input has been parsed and false that it has not.

Upvotes: 2

Related Questions