\b metacharacter not working correctly

Question

I know this code works and it has before but it now has stopped working. I am working on a person highlighter tool but if I type in a word then type in mark, it highlights the actual mark element. Here is my code:

function Search (tagC) {
var notes = document.getElementsByClassName("NoteOp");
for (var i = 0; i < notes.length; i++) {
var n = notes[i];
var tagOut = tagC
var tagFront = tagOut.slice(0, -9);
var tagLast = tagOut.slice(-9);
n.innerHTML = n.innerHTML.replace(new RegExp("\b(" + tagFront + ")\b", "gim"), "$1");
if(window.Bold === "Yes") {
    $("mark").css("font-weight", "bold");
}
}
}

tagFront is the search term while tagLast is a class that always has 9 letters. Any problems seen in the coding?

An example of tagC would be:

testYelColBox

...and the text I'm searching looks like this:


This is a test paragraph uses to TeSt filters.
Random words, I need to see if it will mess up mark

Main question: Why does my code mark an HTML element even though my code has a \b metacharacter selector?

trincot · Accepted Answer

Your problem seems to be this:

If you first highlight a word, it works correctly. But now your HTML has tags, so if now you search a second time with search word "mark", that tag gets a nested mark, which is undesired and makes your HTML invalid.

Why this happens

The \b escape matches any position in the search string where the character sequence switches from an alphanumerical character to a non-alphanumerical character or vice versa. This means \b also matches with the position right after the < of , and with the position right after the k (because of the space that follows).

Solution

Do a controlled replacement by only applying it to text nodes, not to HTML elements. For this you need to iterate over the nodes, check their type, and when they are text nodes, perform the replacement. As the replacement involves the insertion of an HTML element, you should actually split that text node into what comes before, the mark element, and what comes after.

Here is code that does all this:

function clear() {
    var notes = document.getElementsByClassName("NoteOp");
    for (var i = 0; i < notes.length; i++) {
        var n = notes[i];
        // Remove all opening/closing mark tags
        n.innerHTML = n.innerHTML.replace(//gm, "");
    }
}

function highlight(tagC) {
    // Sanity check
    if (tagC.length <= 9) return; // ignore wrong input
    var notes = document.getElementsByClassName("NoteOp");
    // Split into parts before entering loop:
    var tagFront = tagC.slice(0, -9);
    var tagLast = tagC.slice(-9);
    // Escape tagFront characters that could conflict with regex syntax:
    tagLast = tagLast.replace(/([.*+?^${}()|/\])/g, "\$1");
    var regex = new RegExp("\b(" + tagFront + ")\b", "gim");
    // Create a template of the highlight that can be cloned
    var mark = document.createElement('mark');
    mark.setAttribute('class', tagLast);
    // Loop over notes
    for (var i = 0; i < notes.length; i++) {
        // Create a span that will have the contents after replacements
        var span = document.createElement('span');
        // Go through all child nodes of this note
        var nodes = notes[i].childNodes;
        for (var j = 0; j < nodes.length; j++) {
            var node = nodes[j];
            if (node.nodeType === 3) {
                // Only if text node, perform replacement
                parts = node.textContent.split(regex);
                // As regex has capture group, also the split expression is a part
                for (var k = 0; k < parts.length; k++) {
                    // Add this part 
                    if (k % 2) {
                        // Add highlighted text
                        mark.textContent = parts[k];
                        span.appendChild(mark.cloneNode(true));
                    } else { 
                        // Add text part that did not match as such
                        span.appendChild(document.createTextNode(parts[k]));
                    }
                }
            } else {
                // Non-text nodes are just copied as they are
                span.appendChild(node.cloneNode(true));
            }
        }
        // Replace note contents with new contents
        notes[i].innerHTML = span.innerHTML;
    }
    // Setting style for CSS class should happen outside of the loop
    $("mark").css("font-weight", window.Bold === "Yes" ? "bold": "normal");
}

// I/O 
var inp = document.querySelector('#inp');
var btnMark = document.querySelector('#mark');
var btnClear = document.querySelector('#clear');

btnMark.onclick = function () {
    highlight(inp.value + 'YelColBox');
}
btnClear.onclick = clear;

Type text to be highlighted and press Mark:






This is a test paragraph uses to TeSt filters.
Random words, I need to see if it will mess up mark

\b metacharacter not working correctly

Answers (1)

Why this happens

Solution

Related Questions