Reputation: 19295
I have looked at various posts at on the subject but cannot find a satisfactory answer
I need a regexp to match a string like #xxx# - that is a string which may have characters in the front and back that are not in a-z A-Z 0-9 - where it is within word boundaries - that is preceded and followed by ^ or $ or a char that is not in a-z A-Z 0-9
I am looking to use this with replace with case insensitive and global matches, I am looking for a solution in the form of:
regexp for #xxx#:
'#xxx#'.replace(regexp, 'bla') => 'bla'
'#xxx#,#xXx#)'.replace(regexp, 'bla') => 'bla,bla)'
'(#xXx#, #xxx#)'.replace(regexp, 'bla') => '(bla, bla)'
and:
'a#xxx#'.replace(regexp, 'bla') => 'a#xxx#'
'#xXx#0'.replace(regexp, 'bla') => '#xXx#0'
'hello'.replace(regexp, 'bla') => 'hello'
regexp for xxx:
'xxx'.replace(regexp, 'bla') => 'bla'
'xxx,xXx)'.replace(regexp, 'bla') => 'bla,bla)'
'(xXx, xxx),'.replace(regexp, 'bla') => '(bla, bla)'
and:
'axxx'.replace(regexp, 'bla') => 'axxx'
'xXx0'.replace(regexp, 'bla') => 'xXx0'
'hello'.replace(regexp, 'bla') => 'hello'
etc
I tried various solutions (ie (?!\w)#xxx#(?!\w)
) but cannot get it to work.
Basically I'm looking for \b that works when the string has non alnum chars around.
Any help ?
Upvotes: 0
Views: 75
Reputation: 19295
I'm not sure a regexp solution is possible, I went with a javascript solution like this:
const isAlnumChar = c => (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
const replace = (s, f, r) => {
const lcs = s.toLowerCase(), lcf = f.toLowerCase(), flen = f.length;
let res = '', pos = 0, next = lcs.indexOf(lcf, pos);
if (next === -1) return s;
do {
if ((next === 0 || !isAlnumChar(s[next - 1])) && (next + flen === s.length || !isAlnumChar(s[next + flen]))) {
res += s.substring(pos, next) + r;
} else {
res += s.substring(pos, next + flen);
}
pos = next + flen;
} while ((next = lcs.indexOf(lcf, pos)) !== -1);
return res + s.substring(pos);
};
console.log(replace('#xxx#', '#xxx#', 'bla'));
console.log(replace('#xxx#,#xXx#)', '#xxx#', 'bla'));
console.log(replace('(#xXx#, #xxx#)', '#xxx#', 'bla'));
console.log(replace('a#xxx#', '#xxx#', 'bla'));
console.log(replace('#xXx#0', '#xxx#', 'bla'));
console.log(replace('hello', '#xxx#', 'bla'));
console.log(replace('xxx', 'xxx', 'bla'));
console.log(replace('xxx,xXx)', 'xxx', 'bla'));
console.log(replace('(xXx, xxx),', 'xxx', 'bla'));
console.log(replace('axxx', 'xxx', 'bla'));
console.log(replace('xXx0', 'xxx', 'bla'));
console.log(replace('hello', 'xxx', 'bla'));
Upvotes: 0
Reputation: 214967
Not sure if I understand correctly, but to restrict the pattern as
preceded and followed by ^ or $ or a char that is not in a-z A-Z 0-9
You can use /(^|[^0-9a-zA-Z])pattern goes here([^0-9a-zA-Z]|$)/
:
(^|[^0-9a-zA-Z])
will match the beginning of string or a char that is not in 0-9a-zA-Z;([^0-9a-zA-Z]|$)
matches the end of string or a char that is not in 0-9a-zA-Z;Testing cases:
1) for #xxx#:
var samples = ['#xxx#',
'#xxx#)',
'(#xxx#,',
'a#xxx#',
'#xxx#0',
'hello']
console.log(
samples.map(s => s.replace(/(^|[^0-9a-zA-Z])#xxx#([^0-9a-zA-Z]|$)/, '$1bla$2'))
)
2) for xxx:
var samples = ['xxx',
'xxx)',
'(xxx,',
'axxx',
'xxx0',
'hello']
console.log(
samples.map(s => s.replace(/(^|[^0-9a-zA-Z])xxx([^0-9a-zA-Z]|$)/, '$1bla$2'))
)
Upvotes: 1