Richard
Richard

Reputation: 826

Regex Javascript How To Imitate Negative Lookbehind from PHP Regex

I have the following regex (?<!#)hazcon\s?0 and my sample test messages. The trouble I am having is converting this to Javascript from PHP since Javascript does not support negative lookbehind. I would like to match for any hazcon 0 or hazcon0 but ignoring any instances of #hazcon0 and #hazcon 0.

#hazcon0 <- don't match this

#hazcon 0 <- don't match this

hazcon 0

hazcon 0

blah blah blah blah hazcon0 blah blah

blah blah blah blah hazcon 0 blah blah

If there is a better way to do this I would like to understand!

Upvotes: 0

Views: 89

Answers (2)

Wiktor Stribiżew
Wiktor Stribiżew

Reputation: 627468

Reversed string approach can fail in specific situations (like when you need to match consecutive double apostrophes not before an opening bracket). The most reliable way to use a negative lookbehind is using XRegExp library with some extensions.

// Simulating infinite-length leading lookbehind in JavaScript. Uses XRegExp.
// Captures within lookbehind are not included in match results. Lazy
// repetition in lookbehind may lead to unexpected results.

(function (XRegExp) {

    function prepareLb(lb) {
        // Allow mode modifier before lookbehind
        var parts = /^((?:\(\?[\w$]+\))?)\(\?<([=!])([\s\S]*)\)$/.exec(lb);
        return {
            // $(?!\s) allows use of (?m) in lookbehind
            lb: XRegExp(parts ? parts[1] + "(?:" + parts[3] + ")$(?!\\s)" : lb),
            // Positive or negative lookbehind. Use positive if no lookbehind group
            type: parts ? parts[2] === "=" : !parts
        };
    }

    XRegExp.execLb = function (str, lb, regex) {
        var pos = 0, match, leftContext;
        lb = prepareLb(lb);
        while (match = XRegExp.exec(str, regex, pos)) {
            leftContext = str.slice(0, match.index);
            if (lb.type === lb.lb.test(leftContext)) {
                return match;
            }
            pos = match.index + 1;
        }
        return null;
    };

    XRegExp.testLb = function (str, lb, regex) {
        return !!XRegExp.execLb(str, lb, regex);
    };

    XRegExp.searchLb = function (str, lb, regex) {
        var match = XRegExp.execLb(str, lb, regex);
        return match ? match.index : -1;
    };

    XRegExp.matchAllLb = function (str, lb, regex) {
        var matches = [], pos = 0, match, leftContext;
        lb = prepareLb(lb);
        while (match = XRegExp.exec(str, regex, pos)) {
            leftContext = str.slice(0, match.index);
            if (lb.type === lb.lb.test(leftContext)) {
                matches.push(match[0]);
                pos = match.index + (match[0].length || 1);
            } else {
                pos = match.index + 1;
            }
        }
        return matches;
    };

    XRegExp.replaceLb = function (str, lb, regex, replacement) {
        var output = "", pos = 0, lastEnd = 0, match, leftContext;
        lb = prepareLb(lb);
        while (match = XRegExp.exec(str, regex, pos)) {
            leftContext = str.slice(0, match.index);
            if (lb.type === lb.lb.test(leftContext)) {
                // Doesn't work correctly if lookahead in regex looks outside of the match
                output += str.slice(lastEnd, match.index) + XRegExp.replace(match[0], regex, replacement);
                lastEnd = match.index + match[0].length;
                if (!regex.global) {
                    break;
                }
                pos = match.index + (match[0].length || 1);
            } else {
                pos = match.index + 1;
            }
        }
        return output + str.slice(lastEnd);
    };

}(XRegExp));

// DEMO
document.body.innerHTML = "hazcon 0: " + XRegExp.testLb("hazcon 0", "(?<!#)", XRegExp("\\bhazcon\\s*0\\b"));
document.body.innerHTML += "<br/>hazcon0: " + XRegExp.testLb("hazcon0", "(?<!#)", XRegExp("\\bhazcon\\s*0\\b"));
document.body.innerHTML += "<br/>#hazcon0: " + XRegExp.testLb("#hazcon0", "(?<!#)", XRegExp("\\bhazcon\\s*0\\b"));
document.body.innerHTML += "<br/>#hazcon 0: " + XRegExp.testLb("#hazcon 0", "(?<!#)", XRegExp("\\bhazcon\\s*0\\b"));
<script src="https://cdnjs.cloudflare.com/ajax/libs/xregexp/2.0.0/xregexp-all-min.js"></script>

See JavaScript Regex Lookbehind Redux:

That's less than 0.5 KB after minification and gzipping. It provides a collection of functions that make it simple to emulate leading lookbehind:
    XRegExp.execLb
    XRegExp.testLb
    XRegExp.searchLb
    XRegExp.matchAllLb
    XRegExp.replaceLb

Upvotes: 0

Paul Draper
Paul Draper

Reputation: 83393

Instead of requiring there not to be an octothorp, require that there is the beginning (^) or a character other than an octothorp ([^#])

(^|[^#])hazcon\s?0

Upvotes: 3

Related Questions