Reputation: 1165
I have the following example queries which represent objects of varying complexity:
abc,def,ghi
abc,\def,ghi
abc,\def{ghi,jkl},mno
abc,\def{ghi,\jkl},mno
abc,\def{ghi,\jkl{mno,pqr,\stu},vwx
In the above examples I've just used 3 letters of the alphabet to represent a word. This word could be of any length.
There's two different types of words. If a word starts with a back slash it is known as an edge. If it doesn't it is known as a field.
Fields and edges can belong to edges. These are defined within edge{ }. This is infinitely recursive, so words can belong to edges which belong to edges which belong to edges etc..
I want to parse this string to convert it into a JavaScript object.
It's hard to explain, so an example is probably better..
For example 1 this should convert to:
[
{
type: "field",
val: "abc"
},
{
type: "field",
val: "def"
},
{
type: "field",
val: "ghi"
},
]
For example 2 this should convert to:
[
{
type: "field",
val: "abc"
},
{
type: "edge",
val: "def",
children: []
},
{
type: "field",
val: "ghi"
}
]
For example 5 this should convert to:
[
{
type: "field",
val: "abc"
},
{
type: "edge",
val: "def",
children: [
{
type: "field",
val: "ghi"
},
{
type: "edge",
val: "jkl",
children: [
{
type: "field",
val: "mno"
},
{
type: "field",
val: "pqr"
},
{
type: "edge",
val: "stu",
children: []
}
]
}
]
},
{
type: "field",
val: "vwx"
}
]
To do this I presume it'll have to crawl through the string character by character, working out what it's currently looking at and building/traversing the object as it goes. Maybe there's some sort of recursive regular expression I could use?
Any thoughts on how this would best be done? Any ideas, concepts etc would be hugely appreciated.
Upvotes: 2
Views: 103
Reputation: 5963
I'd suggest writing a grammar and implementing a recursive descent parser. They're pretty straightforward.
var parse = function(s){
var i = 0;
var peek = function(c){
return s[i] === c;
};
var next = function(c){
var found = s[i];
if(c && s[i] !== c){
throw 'expected ' + c + ' at char ' + i + ': ' + s.slice(i,20);
}
i += 1;
return found;
};
var word = function(){
var w = '';
if(!/[a-z]/.test(s[i])){
throw 'expected a word at char ' + i + ': ' + s.slice(i,20);
}
while(s[i] && /[a-z]/.test(s[i])){
w += s[i];
i += 1;
}
return w;
};
var edge = function(){
next('\\');
var val = word();
var e = {
type: 'edge',
val: val
};
if(peek('{')){
next('{');
e.children = fields_or_edges();
next('}');
}
return e;
};
var field = function(){
return {
type: 'field',
val: word()
};
};
var fields_or_edges = function(){
var stuff = [];
var thing;
do {
if(peek('\\')){
thing = edge();
}else{
thing = field();
}
stuff.push(thing);
}while(peek(',') && next());
return stuff;
};
return fields_or_edges();
};
Let's test it.
[
'abc,def,ghi',
'abc,\\def,ghi',
'abc,\\def{ghi,jkl},mno',
'abc,\\def{ghi,\\jkl},mno',
'abc,\\def{ghi,\\jkl{mno,pqr,\\stu},vwx' // uncaught exception: expected } at char 35:
].forEach(function(s){
console.log(JSON.stringify(parse(s), null, 4));
});
Note that if you run this, the parser picks up a an error at char 35 in the final test string: no closing '}'.
Upvotes: 1
Reputation: 3599
Here's a single recursive function. The only real problem is that this will not work for high code point characters.
function parse (string) {
var array = []
, children = ''
, recursive = false
, level = 0
, object = { type: 'field', val: ''}
for (var i = 0; i < string.length; i++) {
var ch = string.charAt(i)
if (recursive) {
if (ch === '}' && level === 0) {
object.children = parse(children)
array.push(object)
object = {type: 'field', val: ''}
recursive = false
continue
}else if (ch === '}') {
level--
children += ch
} else if (ch === '{') {
level++
children += ch
} else {
children += ch
continue
}
}
if (ch === ',' && object.val.length > 0) {
if (object.type == 'edge') {
object.children = []
array.push(object)
object = {type: 'field', val: ''}
} else {
array.push(object)
object = {type: 'field', val: ''}
}
} else if (ch === '{') {
recursive = true
continue
} else if (ch === '\u005C') {
object.type = 'edge'
} else {
object.val += ch
}
}
if (object.val.length > 0) {
array.push(object)
}
return array
}
Upvotes: 1
Reputation: 549
Here is how I would do it without regexp:
function parseArray(s) {
var w = "", i = 0, c, array = [];
for (i = 0; i < s.length; i++) {
c = s.atChar(i);
if (c == ",") {
array.push(parseObject(w));
} else {
w += c;
}
}
}
function parseObject(s) {
if (s.length === 0) {
return null;
}
if (s.atChar(0) !== "\\") {
return {
type: "field",
value: w
};
}
var v, a, c, status = 0;
for (var i = 1; i < s.length; i++) {
c = s.atChar(i);
if (status === 0) {
if (c === "{") {
status = 1;
} else {
v += c;
}
} else {
if (c === "}") {
break;
}
a += c;
}
}
return {
type: "edge",
value: v,
children: parseArray(a)
};
}
There might be some javascript mistakes, there a still invalid queries to be dealt with, but at thinks it's a good start. I've got no idea about recursive regexp, but it is not that long to write such a function.
Upvotes: 0