regexp word boundary for strings enclosed in non alnum chars

Question

I have looked at various posts at on the subject but cannot find a satisfactory answer

I need a regexp to match a string like #xxx# - that is a string which may have characters in the front and back that are not in a-z A-Z 0-9 - where it is within word boundaries - that is preceded and followed by ^ or $ or a char that is not in a-z A-Z 0-9

I am looking to use this with replace with case insensitive and global matches, I am looking for a solution in the form of:

regexp for #xxx#:

'#xxx#'.replace(regexp, 'bla') => 'bla'
'#xxx#,#xXx#)'.replace(regexp, 'bla') => 'bla,bla)'
'(#xXx#, #xxx#)'.replace(regexp, 'bla') => '(bla, bla)'

and:

'a#xxx#'.replace(regexp, 'bla') => 'a#xxx#'
'#xXx#0'.replace(regexp, 'bla') => '#xXx#0'
'hello'.replace(regexp, 'bla') => 'hello'

regexp for xxx:

'xxx'.replace(regexp, 'bla') => 'bla'
'xxx,xXx)'.replace(regexp, 'bla') => 'bla,bla)'
'(xXx, xxx),'.replace(regexp, 'bla') => '(bla, bla)'

and:

'axxx'.replace(regexp, 'bla') => 'axxx'
'xXx0'.replace(regexp, 'bla') => 'xXx0'
'hello'.replace(regexp, 'bla') => 'hello'

etc

I tried various solutions (ie (?!\w)#xxx#(?!\w)) but cannot get it to work.

Basically I'm looking for \b that works when the string has non alnum chars around.

Any help ?

kofifus · Accepted Answer

I'm not sure a regexp solution is possible, I went with a javascript solution like this:

const isAlnumChar = c => (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');

const replace = (s, f, r) => {
  const lcs = s.toLowerCase(), lcf = f.toLowerCase(), flen = f.length;
  let res = '', pos = 0, next = lcs.indexOf(lcf, pos);
  if (next === -1) return s;

  do {
    if ((next === 0 || !isAlnumChar(s[next - 1])) && (next + flen === s.length || !isAlnumChar(s[next + flen]))) {
      res += s.substring(pos, next) + r;
    } else {
      res += s.substring(pos, next + flen);
    }
    pos = next + flen;
  } while ((next = lcs.indexOf(lcf, pos)) !== -1);
  return res + s.substring(pos);
};


console.log(replace('#xxx#', '#xxx#', 'bla'));
console.log(replace('#xxx#,#xXx#)', '#xxx#', 'bla'));
console.log(replace('(#xXx#, #xxx#)', '#xxx#', 'bla'));

console.log(replace('a#xxx#', '#xxx#', 'bla'));
console.log(replace('#xXx#0', '#xxx#', 'bla'));
console.log(replace('hello', '#xxx#', 'bla'));

console.log(replace('xxx', 'xxx', 'bla'));
console.log(replace('xxx,xXx)', 'xxx', 'bla'));
console.log(replace('(xXx, xxx),', 'xxx', 'bla'));

console.log(replace('axxx', 'xxx', 'bla'));
console.log(replace('xXx0', 'xxx', 'bla'));
console.log(replace('hello', 'xxx', 'bla'));

regexp word boundary for strings enclosed in non alnum chars

Answers (2)

Related Questions