Reputation: 59
I am trying to create a simple script language. For the beginning i just wanted stuff like
i = 5;
i += 3;
out(i);
So i created the following grammar for jison:
%lex
%%
\s+ { /* ignore */ }
"=" { return '='; }
"+=" { return '+='; }
"-=" { return '-='; }
"*=" { return '*='; }
"/=" { return '/='; }
"." { return '.'; }
"(" { return '('; }
")" { return ')'; }
"{" { return '{'; }
"}" { return '}'; }
[0-9]+ { return 'NUMBER'; }
[A-Z]* { return 'CHAR_SEQUENCE'; }
<<EOF>> { return 'EOF'; }
/lex
%%
Program
: StatementList EOF
{
return function()
{
for(var i = 0; i < $1.length; i++)
{
$1[i]();
}
};
}
;
StatementList
: StatementList Statement
{ $$ = $1.concat($2); }
|
{ $$ = []; }
;
Statement
: AssignStatement
| VariableOutput
;
Operator
: "="
{ $$ = function(left, right) { left.set(right); }; }
| "+="
{ $$ = function(left, right) { left.add(right); }; }
| "-="
{ $$ = function(left, right) { left.remove(right); }; }
| "*="
{ $$ = function(left, right) { left.multiplicate(right); }; }
| "/="
{ $$ = function(left, right) { left.divide(right); }; }
;
VariableOutput
: 'out(' CHAR_SEQUENCE ')' ';'
{
$$ = function()
{
var t = new Tellraw("Output: ");
t.extra.push(vars[$1].toTellrawExtra());
t.tell(new Entities.Player("@a"));
};
}
;
AssignStatement
: CHAR_SEQUENCE Operator CHAR_SEQUENCE ';'
{
$$ = function()
{
Util.assert(typeof vars[$3] != 'undefined', "Unknown identifier '"+$3+"'");
if(typeof vars[$1] == 'undefined')
vars[$1] = vars[$3].constructor.call();
$2(vars[$1], vars[$3]);
};
}
| CHAR_SEQUENCE Operator '"' CHAR_SEQUENCE '"' ';'
{
$$ = function()
{
if(typeof vars[$1] == 'undefined')
vars[$1] = new Runtime.String($3);
$2(vars[$1], $3);
};
}
| CHAR_SEQUENCE Operator NUMBER ';'
{
$$ = function()
{
if(typeof vars[$1] == 'undefined')
vars[$1] = new Runtime.Integer($3);
$2(vars[$1], $3);
};
}
;
It generates the parser without complaining about the grammar. My problem is that when I do
parser.parse('i=5;out(i);')();
I get this error
Parse error on line 1:
i = 5;out(i);
^
Expecting '=', '+=', '-=', '*=', '/=', got 'CHAR_SEQUENCE'
This totally confuses me :/ there is no rule that expects an operator at first. The only rules expecting an operator are the AssignStatements but they all expect an CHAR_SQUENCE as first object.
Am i doing something wrong? Or why does it not work? If you need any further information feel free to ask :)
Upvotes: 0
Views: 1138
Reputation: 241861
You are expecting i
to be a CHAR_SEQUENCE
but CHAR_SEQUENCE
is [A-Z]*
, which is to say, only capital letters. You probably wanted something like [A-Za-z_][A-Za-z_0-9]*
. So the lexer is not recognizing i
at all.
However, it is recognizing an empty CHAR_SEQUENCE
. In jison
, unlike flex
, patterns which can match the empty string will do so, and should almost always be avoided.
Upvotes: 1
Reputation: 521
Is it possible that when you use
[A-Z]* { return 'CHAR_SEQUENCE'; }
with *
instead of +
you are considering the empty string as a CHAR_SEQUENCE and then the parser is finding two CHAR_SEQUENCE instead of just one?
Upvotes: 0