Unknown Coder
Unknown Coder

Reputation: 6741

Advanced JavaScript RegExp Replacement Within HTML tags

I have javascript code that works pretty well like:

var rgx = /MyName/g;
var curInnerHTML = document.body.innerHTML;
curInnerHTML = curInnerHTML.replace(rgx, "<span><span class='myName'>MyNameReplace</span></span>");

The problem is that its matching the regex even in scenarios where it is contained within HTML attributes and what-not. How can I modify the regex so that it will only find it within the content of the HTML? For example, in this string

    <div class="someclass" title="MyName">
MyName
</div>

it currently results like (note the change in the title attribute):

        <div class="someclass" title="<span><span class='myName'>MyNameReplace</span</span>">
<span><span class='myName'>
    MyNameReplace</span></span>
    </div>

But I need it to be (leave the title attribute untouched):

    <div class="someclass" title="MyName">
<span><span class='myName'>MyNameReplace</span></span>
</div>

Upvotes: 0

Views: 250

Answers (1)

T.J. Crowder
T.J. Crowder

Reputation: 1074979

Your best bet, and it's a lot easier than it sounds, is not to try to use regex to parse HTML, but to take advantage of the fact that the DOM already has and recursively process the text nodes.

Here's an off-the-cuff:

// We use this div's `innerHTML` to parse the markup of each replacment
const div = document.createElement('div');

// This is the recursive-descent function that processes all text nodes
// within the element you give it and its descendants
function doReplacement(node, rex, text) {
    // What kind of node did we get?
    switch (node.nodeType) {
        case Node.ELEMENT_NODE:
            // Probably best to leave `script` elements alone.
            // You'll probably find you want to add to this list
            // (`object`, `applet`, `style`, ...)
            if (node.nodeName.toUpperCase() !== "SCRIPT") {
                // It's an element we want to process, start with its
                // *last* child and work forward, since part of what
                // we're doing inserts into the DOM.
                let sibling;
                for (const child = node.lastChild; child; child = sibling) {
                    // Before we change this node, grab a reference to the
                    // one that precedes it
                    sibling = child.previousSibling;

                    // Recurse
                    doReplacement(child, rex, text);
                }
            }
            break;
        case Node.TEXT_NODE:
            // A text node -- let's do our replacements!
            // The first two deal with the fact that the text node
            // may have less-than symbols or ampersands in it.
            // The third, of course, does your replacement.
            div.innerHTML = node.nodeValue
                                .replace(/&/g, "&amp;")
                                .replace(/</g, "&lt;")
                                .replace(rex, text);

            // Now, the `div` has real live DOM elements for the replacement.
            // Insert them in front of this text node...
            insertChildrenBefore(div, node);
            // ...and remove the text node.
            node.parentNode.removeChild(node);
            break;
    }
}

// This function just inserts all of the children of the given container
// in front of the given reference node.
function insertChildrenBefore(container, refNode) {
    let sibling;
    const parent = refNode.parentNode;
    for (const child = container.firstChild; child; child = sibling) {
        sibling = child.nextSibling;
        parent.insertBefore(child, refNode);
    }
}

Which you'd call like this:

doReplacement(
    document.body,
    /MyName/g,
    "<span><span class='myName'>MyNameReplace</span></span>"
);

Live Example:

// We use this div's `innerHTML` to parse the markup of each replacment
const div = document.createElement('div');

// This is the recursive-descent function that processes all text nodes
// within the element you give it and its descendants
function doReplacement(node, rex, text) {
    // What kind of node did we get?
    switch (node.nodeType) {
        case Node.ELEMENT_NODE:
            // Probably best to leave `script` elements alone.
            // You'll probably find you want to add to this list
            // (`object`, `applet`, `style`, ...)
            if (node.nodeName.toUpperCase() !== "SCRIPT") {
                // It's an element we want to process, start with its
                // *last* child and work forward, since part of what
                // we're doing inserts into the DOM.
                let sibling;
                for (let child = node.lastChild; child; child = sibling) {
                    // Before we change this node, grab a reference to the
                    // one that precedes it
                    sibling = child.previousSibling;

                    // Recurse
                    doReplacement(child, rex, text);
                }
            }
            break;
        case Node.TEXT_NODE:
            // A text node -- let's do our replacements!
            // The first two deal with the fact that the text node
            // may have less-than symbols or ampersands in it.
            // The third, of course, does your replacement.
            div.innerHTML = node.nodeValue
                                .replace(/&/g, "&amp;")
                                .replace(/</g, "&lt;")
                                .replace(rex, text);

            // Now, the `div` has real live DOM elements for the replacement.
            // Insert them in front of this text node...
            insertChildrenBefore(div, node);
            // ...and remove the text node.
            node.parentNode.removeChild(node);
            break;
    }
}

// This function just inserts all of the children of the given container
// in front of the given reference node.
function insertChildrenBefore(container, refNode) {
    let sibling;
    const parent = refNode.parentNode;
    for (let child = container.firstChild; child; child = sibling) {
        sibling = child.nextSibling;
        parent.insertBefore(child, refNode);
    }
}

setTimeout(() => {
    doReplacement(
        document.body,
        /MyName/g,
        "<span><span class='myName'>MyNameReplace</span></span>"
    );
}, 800);
<p>MyName</p>
<p>This is MyName in a sentence.</p>
<p>This is <strong>MyName nested</strong></p>
<p>How 'bout <strong><em>making MyName nested more deeply</em></strong></p>
<p>This is MyName in an element with &lt; and &amp; in it.</p>

Upvotes: 3

Related Questions