Reputation: 931
This is the string. I need to validate the string in this is in the correct format and create a JSON. Function can be anything like add, multiply, OR, AND etc. And there can ne n number of arguments.
Valid string
'AND(OR(abc,bcd,amd),SUM(efg,hig,4gm,4uf,hht),lmb)';
Invalid strings exmaple
'AND(OR(abc,bcd),SUM(efg,,lmb)';
'AND(OR(abc,),SUM(),lmb)';
'AND(OR(abc,bcd),SUM(asd,s)ig,lmb)';
Expected JSON
{
value: 'AND',
inputs: [
{
value: 'AND',
inputs: [
{value: abc},
{value: bcd}
]
},
{
value: 'AND',
inputs: [
{value: efg},
{value: hig}
]
},
{
value: lmb
}
]
}
I have tried by char separation but I couldn't validate the nested strings.
[...stringData].forEach((char) => {
if (char === '(' || char === ',' || char === ')') {
let obj = {
value: splitString,
type: standardFunctions[splitString] ? 'function' : 'param',
};
result.push(obj);
splitString = '';
} else {
splitString += char;
}
});
Upvotes: 1
Views: 365
Reputation: 3178
You can use regex to validate it and another to tokenize it.
Then a recursive function to create the hierarchy.
This would likely need some more safeguards against potential errors, but it's a step in a possible direction.
const validInput = 'AND(AND(abc,bcd),AND(efg,hig),lmb)';
const invalidInput = 'AND(AND(abc,bcd),AND(efg,h)ig,lmb)';
const validator = /^([a-zA-Z]+\([^\)]+\),|[a-z]|,)+$/;
const functionSplit = /^([a-zA-Z]+)(\()(.*)(\))$/;
const tokenizer = /([a-zA-Z]+\([a-z,]+\)|[a-z]+)+/g;
function tokenize(string) {
try {
return recurse(string);
} catch {
return null;
}
function recurse(string) {
const funcTokens = string.match(functionSplit)
if (funcTokens) {
const [input, func, brace, inner] = funcTokens
return {
value: func,
inputs: recurse(inner),
};
} else {
if (!validator.test(string)) throw new Error('');
const tokens = string.match(tokenizer);
return tokens.length > 1 ? tokens.map(recurse) : {value: tokens[0]};
}
}
}
console.log('Invalid: ', tokenize(invalidInput));
console.log('Valid:', tokenize(validInput));
Upvotes: 1
Reputation: 18639
Well, I think I've solved it.
Here is a nice recursive solution built around a tokenStream
, which uses a regex to tokenize and iterate over the string.
parseExpr
parses an expression by:
(
, then parsing more expressions by calling itself (separated by commas), and finally a )
If it fails to parse the expression, it returns null
, and parse
will throw in that case.
function parseExpr(tokenStream){
const id = parseID(tokenStream)
if(!id)
return null
const isFn = parseDelim(tokenStream, '(')
if(isFn){
const args = []
while(!parseDelim(tokenStream, ')')){
if(args.length > 0){
if(!parseDelim(tokenStream, ','))
return null
}
const expr = parseExpr(tokenStream)
if(!expr)
return null
args.push(expr)
}
return {
value: id,
inputs: args
}
}else{
return {
value: id
}
}
}
function parseID(tokenStream){
if(tokenStream.ended)
return null
const [, id, delim] = tokenStream.next
if(id === null)
return null
advanceTokenStream(tokenStream)
return id
}
function parseDelim(tokenStream, target){
if(tokenStream.ended)
return null
const [, id, delim] = tokenStream.next
if(delim === null || delim !== target)
return null
advanceTokenStream(tokenStream)
return delim
}
function advanceTokenStream(tokenStream){
const {done, value} = tokenStream.iter.next()
tokenStream.ended = done
tokenStream.next = value
}
function parse(str){
if(!/^(?:[a-zA-Z0-9]+|[(),])*$/.test(str))
throw new Error('Tokenization failed')
const tokenStream = {
iter: str.matchAll(/(?:([a-zA-Z0-9]+)|([(),]))/gy)
}
advanceTokenStream(tokenStream)
const res = parseExpr(tokenStream)
if(res === null)
throw new Error('Parsing failed')
if(!tokenStream.ended)
throw new Error('Input has stuff after the expression')
return res
}
console.log(parse('AND(AND(abc,bcd),AND(efg,hig),lmb)'))
console.log(parse('AND(OR(abc,bcd,amd),SUM(efg,hig,4gm,4uf,hht),lmb)'))
console.log(parse('AND(AND(abc,bcd),AND(efg,h)ig,lmb)'))
Upvotes: 1