Reputation: 132
How can you highlight, in pure JS only, a limited subset of matches from a set of text so that only x number of highlights occur per match.
var matches = new Array('fox', 'dog');
var MaxHighlights = 2;
The quick brown fox jumps over the lazy dog but the lazy dog is quick of the mark to catch the brown fox. In general the fox versus the dog is not a good match.
The quick brown fox jumps over the lazy dog but the lazy dog is quick of the mark to catch the brown fox. In general the fox versus the dog is not a good match.
For extra points I'd preferably only highlight one match per sentence.
The quick brown fox jumps over the lazy dog but the lazy dog is quick of the mark to catch the brown fox. In general the fox versus the dog is not a good match.
I was using this is a base for my highlighting attempts http://www.the-art-of-web.com/javascript/search-highlight
Upvotes: 4
Views: 373
Reputation: 12389
My solution uses replace() with word-boundaries in pattern and global modifier g.
The advantage of replace is, that a callback function can be passed as replacement. I hope you like it, found it very intresting, as didn't do much with JS yet. So correct any errors, if you find please :)
// test it
var WordsToMatch = new Array('fox', 'dog');
var MaxHighlights = 2; // no limit = 0
var TestStr =
'The quick brown fox jumps over the lazy dog but the lazy dog is '+
'quick of the mark to catch the brown fox. In general the ' +
'fox versus the dog is not a good match.';
document.write(highlight(TestStr, WordsToMatch, MaxHighlights));
// --- JOHNNY 5's WORD HIGHLIGHTER ---
// highlight words in str using a callback function
function highlight (str, words, limit)
{
for(var i = 0; i < words.length; i++)
{
// match each word case insensitive using word-boundaries
var pattern = new RegExp("\\b" + words[i] + "\\b","gi");
var j = 0;
str = str.replace(pattern, function (w) {
j++; return ((limit <= 0) || (j <= limit)) ? "<b>" + w + "</b>" : w;
});
}
return str;
}
The callback function will return highlighted matches as replacement until the limit is reached.
Output:
The quick brown fox jumps over the lazy dog but the lazy dog is quick of the mark to catch the brown fox. In general the fox versus the dog is not a good match.
EDIT: And now I see, there are extra points available...
For extra points I'd preferably only highlight one match per sentence.
That was a bit more challenging, and I hope it works, as it should in most cases. It's not so trivial to determine: What is a sentence? Well I decided, to make it simple and consider the split-sequence as a definable punctuation mark (var sep_punct
), followed by one or more white-spaces, if there's an upper letter or digit ahead.
var WordsToMatch = new Array('fox', 'dog');
var TestStr =
'The quick brown fox jumps over the lazy dog but the lazy dog is '+
'quick of the mark to catch the brown fox. In general the ' +
'fox versus the dog is not a good match.';
// --- JOHNNY 5's FIRST WORD IN SENTENCE HIGHLIGHTER ---
// highlight first occurence of word in each sentence
function higlight_first_w_in_sentence(str, words)
{
// split the string at what we consider a sentence:
// new sentences usually start with upper letters, maybe digits
// split-sequence: sep_punct, followed by one or more whitespaces,
// looking ahead for an upper letter or digit
var sep_punct = '[.;?!]';
// set the split-pattern, starting with sep_punct
var pattern = new RegExp(sep_punct + "\\s+(?=[A-Z0-9])", "g");
// remember split-sequence
var sep = str.match(pattern);
// split str into sentences
var snt = str.split(pattern);
// check sentences split
if((typeof snt != 'undefined') && (Object.prototype.toString.call(snt) === '[object Array]'))
{
// now we loop through the sentences...
for(var i = 0; i < snt.length; i++)
{
// and match each word case insensitive using word-boundaries (zero-with)
for(var j = 0; j < words.length; j++)
{
var pattern = new RegExp("\\b" + words[j] + "\\b", "i");
// and replace it with highlighted reference 0,
// which is $& in JS regex (part, that matches the whole pattern)
snt[i] = snt[i].replace(pattern, "<b>$&</b>");
}
}
// if seperators, rejoin string
if((typeof sep != 'undefined') && (Object.prototype.toString.call(sep) === '[object Array]') && (sep.length > 0) &&
(typeof snt != 'undefined') && (Object.prototype.toString.call(snt) === '[object Array]') && (snt.length > sep.length)
)
{
var ret = "";
for(var j = 0; j < snt.length; j++)
{
if(j>0) {
ret += (typeof sep[j-1] != 'undefined') ? sep[j-1] : " ";
}
ret += snt[j];
}
return ret;
}
// if no seperators
return snt.join(" ");
}
// if failed
return str;
}
document.write(higlight_first_w_in_sentence(TestStr, WordsToMatch));
Output:
The quick brown fox jumps over the lazy dog but the lazy dog is quick of the mark to catch the brown fox. In general the fox versus the dog is not a good match.
Upvotes: 4
Reputation: 14931
I haven't done JavaScript in a while, so this code might seem rusty:
matches = new Array('fox', 'dog');
originalContent = 'The quick brown fox jumps over the lazy dog but the lazy dog is quick of the mark to catch the brown fox. In general the fox versus the dog is not a good match.';
document.write(
highlight(originalContent, matches, 2)
+ '<br>' +
preferredHighlight(originalContent, matches, 2)
);
function highlight(input, matches, max){
var matchesStatistics = new Array();
for(i = 0, c = matches.length; i < c;i++){ // Performance !!!
matchesStatistics[matches[i]] = 0;
}
var re = new RegExp('\\b(?:' + matches.join('|') + ')\\b', 'g'); // Words regex
var highlightedContent = input.replace(re, function(group0){
matchesStatistics[group0]++;
if(matchesStatistics[group0] > max){
return group0;
}else{
return '<b>' + group0 + '</b>';
}
});
return highlightedContent;
}
function preferredHighlight(input, matches, max){
var sentenceRe = new RegExp('[\\s\\S]*?(?:[.?!]|$)', 'g'); // Sentence regex
var wordRe = new RegExp('\\b(?:' + matches.join('|') + ')\\b', 'g'); // Words regex
var highlightedContent = input.replace(sentenceRe, function(sentence){
var matchesStatistics = 0;
modifiedSentence = sentence.replace(wordRe, function(group0){
matchesStatistics++;
if(matchesStatistics > max){
return group0;
}else{
return '<b>' + group0 + '</b>';
}
});
return modifiedSentence;
});
return highlightedContent;
}
Output:
The quick brown fox jumps over the lazy dog but the lazy dog is quick of the mark to catch the brown fox. In general the fox versus the dog is not a good match.
The quick brown fox jumps over the lazy dog but the lazy dog is quick of the mark to catch the brown fox. In general the fox versus the dog is not a good match.
.join('|')
to join the array elements, so for this example. Our regex will look like \\b(?:fox|dog)\\b
. We use \b
to make sure to match only fox
and not something like firefox
. Double escaping is needed. And of course setting the g
modifier, to "replace all".[\\s\\S]*?
: match anything ungreedy zero or more times.(?:[.?!]|$)
: match either .
, ?
, !
or end of line.g
modifier : match all.The idea is to create an array to remember how many matches we had for certain words. So when the following code is run in our case:
var matchesStatistics = new Array();
for(i = 0, c = matches.length; i < c;i++){ // Performance !!!
matchesStatistics[matches[i]] = 0;
}
We would have an array that looks like:
Array(
"fox" => 0,
"dog" => 0
)
We then match our words, and use a function as callback to check how many we have matched and if we should highlight or not.
We basically, first match each sentence and then we replace the words (in each sentence). The words here also have a limit.
References:
Upvotes: 2