enigrify
enigrify

Reputation: 301

Javascript diacritic-insensitive search

I'm making a text-search feature for a web browser (like ⌘ + F) and I'm trying to make the search diacritic insensitive but I couldn't figure out how to do it.

So basically if there's text on the page like this:

يُرِدُ

and I search:

يرد

it should still work.

This is the pure javascript that I found and am currently using:

var uiWebview_SearchResultCount = 0;

function uiWebview_HighlightAllOccurencesOfStringForElement(element,keyword) {
    if (element) {
        if (element.nodeType == 3) {        // Text node

            var count = 0;
            var elementTmp = element;
            while (true) {
                var value = elementTmp.nodeValue;  // Search for keyword in text node
                var idx = value.toLowerCase().indexOf(keyword);

                if (idx < 0) break;

                count++;
                elementTmp = document.createTextNode(value.substr(idx+keyword.length));
            }

            uiWebview_SearchResultCount += count;

            var index = uiWebview_SearchResultCount;

            while (true) {
                var value = element.nodeValue;  // Search for keyword in text node
                var idx = value.toLowerCase().indexOf(keyword);

                if (idx < 0) break;             // not found, abort

                //we create a SPAN element for every parts of matched keywords
                var span = document.createElement("span");
                var text = document.createTextNode(value.substr(idx,keyword.length));
                var spacetxt = document.createTextNode("\u200D");//\u200D
                span.appendChild(text);
                span.appendChild(spacetxt);

                span.setAttribute("class","uiWebviewHighlight");
                span.style.backgroundColor="#007DC8a3";
                span.style.borderRadius="3px";

                index--;
                span.setAttribute("id", "SEARCH WORD"+(index));
                //span.setAttribute("id", "SEARCH WORD"+uiWebview_SearchResultCount);

                //element.parentNode.setAttribute("id", "SEARCH WORD"+uiWebview_SearchResultCount);

                //uiWebview_SearchResultCount++;    // update the counter

                text = document.createTextNode(value.substr(idx+keyword.length));
                element.deleteData(idx, value.length - idx);

                var next = element.nextSibling;
                //alert(element.parentNode);
                element.parentNode.insertBefore(span, next);
                element.parentNode.insertBefore(text, next);
                element = text;
            }


        } else if (element.nodeType == 1) { // Element node
            if (element.style.display != "none" && element.nodeName.toLowerCase() != 'select') {
                for (var i=element.childNodes.length-1; i>=0; i--) {
                    uiWebview_HighlightAllOccurencesOfStringForElement(element.childNodes[i],keyword);
                }
            }
        }
    }
}

// the main entry point to start the search
function uiWebview_HighlightAllOccurencesOfString(keyword) {
    uiWebview_RemoveAllHighlights();
    uiWebview_HighlightAllOccurencesOfStringForElement(document.body, keyword.toLowerCase());
}

// helper function, recursively removes the highlights in elements and their childs
function uiWebview_RemoveAllHighlightsForElement(element) {
    if (element) {
        if (element.nodeType == 1) {
            if (element.getAttribute("class") == "uiWebviewHighlight") {
                var text = element.removeChild(element.firstChild);
                element.parentNode.insertBefore(text,element);
                element.parentNode.removeChild(element);
                return true;
            } else {
                var normalize = false;
                for (var i=element.childNodes.length-1; i>=0; i--) {
                    if (uiWebview_RemoveAllHighlightsForElement(element.childNodes[i])) {
                        normalize = true;
                    }
                }
                if (normalize) {
                    element.normalize();
                }
            }
        }
    }
    return false;
}

// the main entry point to remove the highlights
function uiWebview_RemoveAllHighlights() {
    uiWebview_SearchResultCount = 0;
    uiWebview_RemoveAllHighlightsForElement(document.body);
}

function uiWebview_ScrollTo(idx) {
    var idkNum = uiWebview_SearchResultCount - idx
    var scrollTo = document.getElementById("SEARCH WORD" + idkNum);
    if (scrollTo) scrollTo.scrollIntoView();
}

It also has a problem of splitting the words apart if the search does not contain the whole word.

I found this and it does exactly what I'm looking for (includes diacritic-insensitive search), but the issue is that it's in JQuery and I'm not sure how to implement it in my code. How would I implement it?

Upvotes: 1

Views: 134

Answers (1)

Gabriele Petrioli
Gabriele Petrioli

Reputation: 196296

something like this could help

function removeArabicDiacritics(text = '') {
  return text
    .normalize('NFD')
    .replace(/[\u064B-\u065B]/g, '');
}

const withDiacritics = 'يُرِدُ';
const noDiacritics = 'يرد';


function insensitiveArabicSearch(text, query) {
  const cleanedText = removeArabicDiacritics(text);
  const cleanedQuery = removeArabicDiacritics(query);

  return cleanedText.includes(cleanedQuery);
}

const found = insensitiveArabicSearch(withDiacritics, noDiacritics);

console.log(`Does ${withDiacritics} contain ${noDiacritics} ?`, found ? 'Yes' : 'No');

Upvotes: 1

Related Questions