Reputation: 301
I'm making a text-search feature for a web browser (like ⌘ + F) and I'm trying to make the search diacritic insensitive but I couldn't figure out how to do it.
So basically if there's text on the page like this:
يُرِدُ
and I search:
يرد
it should still work.
This is the pure javascript that I found and am currently using:
var uiWebview_SearchResultCount = 0;
function uiWebview_HighlightAllOccurencesOfStringForElement(element,keyword) {
if (element) {
if (element.nodeType == 3) { // Text node
var count = 0;
var elementTmp = element;
while (true) {
var value = elementTmp.nodeValue; // Search for keyword in text node
var idx = value.toLowerCase().indexOf(keyword);
if (idx < 0) break;
count++;
elementTmp = document.createTextNode(value.substr(idx+keyword.length));
}
uiWebview_SearchResultCount += count;
var index = uiWebview_SearchResultCount;
while (true) {
var value = element.nodeValue; // Search for keyword in text node
var idx = value.toLowerCase().indexOf(keyword);
if (idx < 0) break; // not found, abort
//we create a SPAN element for every parts of matched keywords
var span = document.createElement("span");
var text = document.createTextNode(value.substr(idx,keyword.length));
var spacetxt = document.createTextNode("\u200D");//\u200D
span.appendChild(text);
span.appendChild(spacetxt);
span.setAttribute("class","uiWebviewHighlight");
span.style.backgroundColor="#007DC8a3";
span.style.borderRadius="3px";
index--;
span.setAttribute("id", "SEARCH WORD"+(index));
//span.setAttribute("id", "SEARCH WORD"+uiWebview_SearchResultCount);
//element.parentNode.setAttribute("id", "SEARCH WORD"+uiWebview_SearchResultCount);
//uiWebview_SearchResultCount++; // update the counter
text = document.createTextNode(value.substr(idx+keyword.length));
element.deleteData(idx, value.length - idx);
var next = element.nextSibling;
//alert(element.parentNode);
element.parentNode.insertBefore(span, next);
element.parentNode.insertBefore(text, next);
element = text;
}
} else if (element.nodeType == 1) { // Element node
if (element.style.display != "none" && element.nodeName.toLowerCase() != 'select') {
for (var i=element.childNodes.length-1; i>=0; i--) {
uiWebview_HighlightAllOccurencesOfStringForElement(element.childNodes[i],keyword);
}
}
}
}
}
// the main entry point to start the search
function uiWebview_HighlightAllOccurencesOfString(keyword) {
uiWebview_RemoveAllHighlights();
uiWebview_HighlightAllOccurencesOfStringForElement(document.body, keyword.toLowerCase());
}
// helper function, recursively removes the highlights in elements and their childs
function uiWebview_RemoveAllHighlightsForElement(element) {
if (element) {
if (element.nodeType == 1) {
if (element.getAttribute("class") == "uiWebviewHighlight") {
var text = element.removeChild(element.firstChild);
element.parentNode.insertBefore(text,element);
element.parentNode.removeChild(element);
return true;
} else {
var normalize = false;
for (var i=element.childNodes.length-1; i>=0; i--) {
if (uiWebview_RemoveAllHighlightsForElement(element.childNodes[i])) {
normalize = true;
}
}
if (normalize) {
element.normalize();
}
}
}
}
return false;
}
// the main entry point to remove the highlights
function uiWebview_RemoveAllHighlights() {
uiWebview_SearchResultCount = 0;
uiWebview_RemoveAllHighlightsForElement(document.body);
}
function uiWebview_ScrollTo(idx) {
var idkNum = uiWebview_SearchResultCount - idx
var scrollTo = document.getElementById("SEARCH WORD" + idkNum);
if (scrollTo) scrollTo.scrollIntoView();
}
It also has a problem of splitting the words apart if the search does not contain the whole word.
I found this and it does exactly what I'm looking for (includes diacritic-insensitive search), but the issue is that it's in JQuery and I'm not sure how to implement it in my code. How would I implement it?
Upvotes: 1
Views: 134
Reputation: 196296
something like this could help
function removeArabicDiacritics(text = '') {
return text
.normalize('NFD')
.replace(/[\u064B-\u065B]/g, '');
}
const withDiacritics = 'يُرِدُ';
const noDiacritics = 'يرد';
function insensitiveArabicSearch(text, query) {
const cleanedText = removeArabicDiacritics(text);
const cleanedQuery = removeArabicDiacritics(query);
return cleanedText.includes(cleanedQuery);
}
const found = insensitiveArabicSearch(withDiacritics, noDiacritics);
console.log(`Does ${withDiacritics} contain ${noDiacritics} ?`, found ? 'Yes' : 'No');
Upvotes: 1