Wert
Wert

Reputation: 133

Jison Google-Like Parser

I have written a parser using Jison that is able to handle google-like search queries with operators and boolean operation support. Currently, I am having difficulty figuring out how to accept whitespace in between the AND OR and NOT operators. Any help would be greatly appreciated and I have attached some examples of desired input/output below.

Input:

  1. true && false || true
  2. ( true ) && ( false || true )
  3. true&&false||true

Result:

1-3. ([true]&&([false]||[true]))

Code:

%lex
%%

/* Lexical Grammar */

"AND"|"&&"          { return "AND" }
"OR"|"||"           { return "OR" }
"NOT"|"!"           { return "NOT" }
"("                 { return "OPEN" }
")"                 { return "CLOSE" }
":"                 { return "QUAL" }
"-"                 { return "DASH" }
"\""|"'"            { return "QUOTE" }
\s+                 { return "SPACE" }
\w+                 { return "WORD" }
"."                 { return "DOT" }
<<EOF>>             { return "EOF" }
.                   { return "INVALID" }
/lex

/* Operators */

%right AND OR
%right NOT
%right QUAL DASH DOT

%start START

%%

/* Language Grammar */

START
    : EXP EOF
        { return $1; }
    ;

EXP
    : EXP AND EXP
        { $$ = "(" + $1 + "&&" + $3 + ")"; }
    | EXP OR EXP
        { $$ = "(" + $1 + "||" + $3 + ")"; }
    | NOT EXP
        { $$ = "(!" + $2 + ")"; }
    | OPEN EXP CLOSE
        { $$ = $2; }
    | ARGS
        { $$ = "[" + $1 + "]"; }
    ;

ARGS
    : ARG SPACE ARGS
        { $$ = [ $1 ].concat($3); }
    | OP SPACE ARGS
        { $$ = [ $1 ].concat($3); }
    | ARG
        { $$ = [ $1 ]; }
    | OP
        { $$ = [ $1 ]; }
    ;

OP
    : DASH OP
        { $$ = "-" + $2; }
    | ARG QUAL ARG
        { $$ = $1 + ":" + $3; }
    ;

ARG
    : DASH ARG
        { $$ = "-" + $2; }
    | QUOTE TERMS QUOTE
        { $$ = $2.join(" "); }
    | TERM
        { $$ = $1; }
    ;

TERMS
    : TERM SPACE TERMS
        { $$ = [ $1 ].concat($3); }
    | TERM
        { $$ = [ $1 ]; }
    ;

TERM
    : TERM DASH TERM
        { $$ = $1 + $2 + $3; }
    | TERM DOT TERM
        { $$ = $1 + $2 + $3; }
    | WORD
        { $$ = $1; }
    ;

Upvotes: 3

Views: 334

Answers (1)

Wert
Wert

Reputation: 133

Figured it out. I started ignoring the whitespace, changed a few rules, and resolved the conflicts. The parser returns a function that is used to determine if some object matches the query. Here is the final result:

/* Google-Like Parser */

/* Lexical Grammar */

%lex
%%

\s+                   { /* ignore whitespace */ }
"AND"|"&&"            { return "AND" }
"OR"|"||"             { return "OR" }
"NOT"|"!"             { return "NOT" }
"("                   { return "OPEN" }
")"                   { return "CLOSE" }
":"                   { return "QUAL" }
"-"                   { return "NEG" }
"\""|"'"              { return "QUOTE" }
\w+                   { return "WORD" }
"."                   { return "DOT" }
<<EOF>>               { return "EOF" }
.                     { return "INVALID" }

/lex

/* Operators */

%right AND OR
%right NOT
%right QUAL NEG DOT

%start START

%%

/* Language Grammar */

START
    : EXP EOF
        { return $1; }
    ;

EXP
    : EXP AND EXP
        { $$ = function(obj) { return ($1(obj) && $3(obj)); }; }
    | EXP OR EXP
        { $$ = function(obj) { return ($1(obj) || $3(obj)); }; }
    | NOT EXP
        { $$ = function(obj) { return !($2(obj)); }; }
    | OPEN EXP CLOSE
        { $$ = $2; }
    | ARGS
        { $$ = function(obj) { return parser.processArgs(obj, $1)(obj); }; }
    ;

ARGS
    : ARG ARGS
        { $$ = [ $1, $2]; }
    | OP ARGS
        { $$ = [ $1, $2]; }
    | ARG
        { $$ = [ $1 ]; }
    | OP
        { $$ = [ $1 ]; }
    ;

OP
    : NEG ARG
        {{
            $2.not = true;
            $$ = $2;
        }}
    | NEG ARG QUAL ARG
        {{
            $$ = {
                "not": true,
                "operator": $2.operand,
                "operand": $4.operand
            };
        }}
    | ARG QUAL ARG
        {{
            $$ = {
                "not": false,
                "operator": $1.operand,
                "operand": $3.operand
            };
        }}
    ;

ARG
    : QUOTE TERMS QUOTE
        {{
            $$ = {
                "not": false,
                "operator": null,
                "operand": $2.join(" ")
            };
        }}
    | TERM
        {{
            $$ = {
                "not": false,
                "operator": null,
                "operand": $1
            };
        }}
    ;

TERMS
    : TERM TERMS
        { $$ = [ $1 ].concat($2); }
    | TERM
        { $$ = [ $1 ]; }
    ;

TERM
    : WORD DOT TERM
        { $$ = $1 + $2 + $3; }
    | WORD
        { $$ = $1; }
    ;

%%

parser.processArgs = function(obj, args) {
    if (args.length > 1)
    {
        if (args[0].operator)
            return function(obj) { return (parser.matchArg(obj, args[0]) && parser.processArgs(args[1])(obj)); };
        else
            return function(obj) { return (parser.matchArg(obj, args[0]) || parser.processArgs(args[1])(obj)); };
    }
    else
    {
        return function(obj) { return parser.matchArg(obj, args[0]); };
    }
}

/* Override Later */
parser.matchArg = function(obj, arg) {
    return true;
}

Upvotes: 2

Related Questions