Reputation: 539
I am building a small framework that reads my custom made HTML syntax and converts it into HTML code. However, I am stuck at tokenising my code and creating an AST. I understand that the algorithm requires recursive approach but I cannot figure out how to do it properly.
This is my custom code in app.txt
file:
View {
Heading {
}
Text {
}
}
And here is my recursive parser so far:
function parse(source) {
let tag = "";
let children = [];
for (let i = 0; i < source.length; i++) {
const char = source[i];
if (char === "{") {
const child = parse(source.substring(i + 1, source.length));
children.push(child);
} else if (char === "}") {
return {
tag: tag,
children: children
};
} else {
tag += char;
}
}
return;
}
The parses is expected to produce something like this (and should be able to go to any depth):
{
tag: "View",
children: [
{
tag: "Heading",
children: []
},
{
tag: "Text",
children: []
}
]
}
What am I doing wrong? I'll be thankful for any help.
Upvotes: 2
Views: 1734
Reputation: 214959
Let's write your grammar more or less formally:
tag := name '{' children '}'
name := letter | letter name
children := tag | tag children
letter := [a-z]
Then, let's write a parsing function for each rule in the grammar. We need two helper functions: getsym
, which returns the first meaningful (non-whitespace) symbol from the input, and nextsym
, which removes that symbol.
Working example:
function parse(text) {
let chars = [...text]
function getsym() {
while (chars.length > 0 && /\s/.test(chars[0]))
chars.shift()
return chars[0] || ''
}
function nextsym() {
return chars.shift()
}
return tag()
//
function tag() {
let n = name()
if (getsym() !== '{')
throw new SyntaxError()
nextsym()
let c = children(text)
if (getsym() !== '}')
throw new SyntaxError()
nextsym()
return {name: n, children: c}
}
function name() {
let t = letter()
if (t)
return t + name()
return ''
}
function letter() {
if (/[a-z]/i.test(getsym()))
return nextsym()
}
function children() {
if (getsym() === '}')
return []
let t = tag()
return [t, ...children()]
}
}
///
text = ` View {
Heading {
Content {
One {}
Two {}
Three {}
}
}
Text {
More {}
}
}
`
console.log(parse(text))
That being said, if you plan for a more complex grammar, a more practical option would be to use a parser generator like peg.js.
Upvotes: 4