Prashanth Damam
Prashanth Damam

Reputation: 931

How to check if the string is valid and has valid nested parentheses in javascript

This is the string. I need to validate the string in this is in the correct format and create a JSON. Function can be anything like add, multiply, OR, AND etc. And there can ne n number of arguments.

Valid string

'AND(OR(abc,bcd,amd),SUM(efg,hig,4gm,4uf,hht),lmb)';

Invalid strings exmaple

'AND(OR(abc,bcd),SUM(efg,,lmb)';
'AND(OR(abc,),SUM(),lmb)';
'AND(OR(abc,bcd),SUM(asd,s)ig,lmb)';

Expected JSON

{
    value: 'AND',
    inputs: [
        {
            value: 'AND',
            inputs: [
                {value: abc},
                {value: bcd}
            ]
        },
        {
            value: 'AND',
            inputs: [
                {value: efg},
                {value: hig}
            ]
        },
        {
            value: lmb
        }
    ]
}

I have tried by char separation but I couldn't validate the nested strings.

[...stringData].forEach((char) => {
  if (char === '(' || char === ',' || char === ')') {
    let obj = {
      value: splitString,
      type: standardFunctions[splitString] ? 'function' : 'param',
    };
    result.push(obj);
    splitString = '';
  } else {
    splitString += char;
  }
});

Upvotes: 1

Views: 365

Answers (2)

Brother58697
Brother58697

Reputation: 3178

You can use regex to validate it and another to tokenize it.

Then a recursive function to create the hierarchy.

This would likely need some more safeguards against potential errors, but it's a step in a possible direction.

const validInput = 'AND(AND(abc,bcd),AND(efg,hig),lmb)';
const invalidInput = 'AND(AND(abc,bcd),AND(efg,h)ig,lmb)';

const validator = /^([a-zA-Z]+\([^\)]+\),|[a-z]|,)+$/;
const functionSplit = /^([a-zA-Z]+)(\()(.*)(\))$/;
const tokenizer = /([a-zA-Z]+\([a-z,]+\)|[a-z]+)+/g;

function tokenize(string) {
  try {
    return recurse(string);
  } catch {
    return null;
  }

  function recurse(string) {
    const funcTokens = string.match(functionSplit)
    if (funcTokens) {
      const [input, func, brace, inner] = funcTokens
      return {
        value: func,
        inputs: recurse(inner),
      };
    } else {
      if (!validator.test(string)) throw new Error('');
      const tokens = string.match(tokenizer);
      return tokens.length > 1 ? tokens.map(recurse) : {value: tokens[0]};
    }
  }
}
console.log('Invalid: ', tokenize(invalidInput));
console.log('Valid:', tokenize(validInput));

Upvotes: 1

FZs
FZs

Reputation: 18639

Well, I think I've solved it.

Here is a nice recursive solution built around a tokenStream, which uses a regex to tokenize and iterate over the string.

parseExpr parses an expression by:

  • Parsing an indentifier token (name).
  • If the next character is a (, then parsing more expressions by calling itself (separated by commas), and finally a )

If it fails to parse the expression, it returns null, and parse will throw in that case.

function parseExpr(tokenStream){
  const id = parseID(tokenStream)
  if(!id)
    return null
  const isFn = parseDelim(tokenStream, '(')
  if(isFn){
    const args = []
    while(!parseDelim(tokenStream, ')')){
      if(args.length > 0){
        if(!parseDelim(tokenStream, ','))
          return null
      }
      const expr = parseExpr(tokenStream)
      if(!expr)
        return null
      args.push(expr)
    }
    return {
      value: id,
      inputs: args
    }
  }else{
    return {
      value: id
    }
  }
}

function parseID(tokenStream){
  if(tokenStream.ended)
    return null
  const [, id, delim] = tokenStream.next
  if(id === null)
    return null
  
  advanceTokenStream(tokenStream)
  return id
}
function parseDelim(tokenStream, target){
  if(tokenStream.ended)
    return null
  const [, id, delim] = tokenStream.next
  if(delim === null || delim !== target)
    return null
  
  advanceTokenStream(tokenStream)
  return delim
}

function advanceTokenStream(tokenStream){
  const {done, value} = tokenStream.iter.next()
  tokenStream.ended = done 
  tokenStream.next = value
}

function parse(str){
  if(!/^(?:[a-zA-Z0-9]+|[(),])*$/.test(str))
    throw new Error('Tokenization failed')
  
  const tokenStream = {
    iter: str.matchAll(/(?:([a-zA-Z0-9]+)|([(),]))/gy)
  }
  advanceTokenStream(tokenStream)

  const res = parseExpr(tokenStream)
  if(res === null)
    throw new Error('Parsing failed')
  if(!tokenStream.ended)
    throw new Error('Input has stuff after the expression')
  return res
}

console.log(parse('AND(AND(abc,bcd),AND(efg,hig),lmb)'))
console.log(parse('AND(OR(abc,bcd,amd),SUM(efg,hig,4gm,4uf,hht),lmb)'))
console.log(parse('AND(AND(abc,bcd),AND(efg,h)ig,lmb)'))

Upvotes: 1

Related Questions