Reputation: 1063
I have put a general title for a good search, but my question is a bit more specific.
I have got one array:
var keywords= ["Anglo-Saxon English","American English","British English","Canadian English","Fast British English","Austrian German","American Football","British English","Fast British English","Blue Jeep","Hot Summer","Mild Summer","Hot Brazilian Summer"];
and another array:
var terms = ["english","english","english","english","english","german","football","british english","british english","jeep","summer","summer","summer"];
Every keyword corresponds to every term, as every keyword contains respective term in itself. For example, 'Anglo-Saxon English' keyword corresponds to 'english' term, 'American Football' keyword corresponds to 'football' term, etc. However, in keywords array there are some duplicates. In this example duplicate keywords are: 'British English' keywords which corresponds to 'english' term as well as 'british english' term, 'Fast British English' keywords which corresponds to 'english' term as well as 'british english' term. There can be any number of duplicates (I have put 2 for simplicity). All keywords has got their respective terms in the same order in the arrays, therefore length of both arrays is the same.
My question is how to leave only one lement among duplicates which has more specific corresponding term? For instance, 'british english' is more specific term than just 'english' therefore I want to remove 'British English' (keywords[2]) duplicate keyword with its respective term 'english' (terms[2]) from both arrays, and leave only 'British English' (keywords[7]) keyword with its term 'british english' (terms[7]).
UPDATE: With the solution offered by Tobos below, I came up with the working fiddle which is: http://jsfiddle.net/ZqEhQ/. However, here 'Fast British English' and 'British English' still get picked up by 'Languages' instead of being pichked up by 'Car' category which has got more specific 'british english' term versus just 'english' term of 'Languages'category. Any ideas?
Upvotes: 0
Views: 402
Reputation: 27853
Having items depend on one another's order in different arrays is generally a bad idea because it is very difficult to maintain. I would suggest using a different structure for your data:
var data= [
{ keyword : "Anglo-Saxon English", term : 'english', category : 'Language' },
{ keyword : "American English", term : 'english', category : 'Language' },
{ keyword : "Fast British English", term : 'english', category : 'Sport' },
{ keyword : "British English", term : 'english', category : 'Language' },
{ keyword : "British English", term : 'british english', category : 'Language' },
{ keyword : "Fast British English", term : 'british english', category : 'Sport' },
{ keyword : "Canadian English", term : 'french', category : 'Sport' }
];
Since your final data contains unique keywords, i would use yet another structuring of the data to hold it:
Expected output:
var uniques = {
"American English": "english"
"Anglo-Saxon English": "english"
"British English": "british english"
"Canadian English": "french"
"Fast British English": "british english"
}
Some way to get from input to expected output:
var uniques = {};
data.forEach(function(item){
if (isMoreSpecific(item.term, uniques[item.keyword])) {
uniques [item.keyword] = item.term;
}
});
function isMoreSpecific(term, reference) {
return !reference || term.indexOf(reference) !== -1;
}
You can obviously change the isMoreSpecific
function if you don't agree with my definition, or your logic of defining specificity changes. You could even inline it, though i prefer the function for clarity in this case.
Note: the solution above can be quite easily adapted to work with the two arrays you have originally. Simply iterate using a for loop over one array to build the uniques
object, then rebuild the arrays from it.
Solution for category inclusion with the keyword:
var uniques = {};
data.forEach(function(item){
var serialized = JSON.stringify({key:item.keyword, cat:item.category});
if (isMoreSpecific(item.term, uniques[serialized])) {
uniques [serialized] = item.term;
}
});
var keywordcategory = {};
for (var serialized in uniques) {
var obj = JSON.parse(serialized);
keywordcategory[obj.key] = obj.cat;
}
DEMO: http://jsbin.com/ODoDIXi/1/edit
If you can assume that the same keyword is only in one category, there is no need for serialization:
var uniques = {};
data.forEach(function(item){
if (isMoreSpecific(item.term, uniques[item.keyword].term)) {
uniques [item.keyword] = { term : item.term; category : item.category };
}
});
// you can now remove the unnecessary term information from the uniques map and keep just the category:
for (var key in uniques) {
uniques[key] = uniques[key].category;
}
Upvotes: 2
Reputation: 3256
Just as Tibos said, you need to restructure your data. It's not good to have 2 arrays.
var data = [
{keyword: "Anglo-Saxon English", term: 'english'},
{keyword: "British English", term: 'english'},
{keyword: "British English", term: 'british english'},
{keyword: "Fast British English", term: 'british english'},
{keyword: "Canadian English", term: 'french'}
];
Add a unique data array:
var uniqueData = [];
STEP 1- Extract all keywords into a uniqueKeywords
array
var uniqueKeywords = [];
data.forEach(function(item) {
//if keyword doesn't already exist, push it
if (uniqueKeywords.indexOf(item.keyword) === -1)
uniqueKeywords.push(item.keyword);
});
STEP 2- For each keyword find all corresponding data objects, and only add most relevant one to uniqueData
var extractMostRelevant = function(array){
var mostRelevant = array[0];
array.forEach(function(item){
if(item !== array[0]){
if(item.term.length > mostRelevant.term.length)
mostRelevant = item;
}
});
return mostRelevant;
};
uniqueKeywords.forEach(function(keyword){
var itemsWithCurrentKeyword = [];
data.forEach(function(item){
if(keyword === item.keyword)
itemsWithCurrentKeyword.push(item);
});
var mostRelevant = extractMostRelevant(itemsWithCurrentKeyword);
uniqueData.push(mostRelevant);
});
There you go, now you have 2 arrays: data
, and uniqueData
Upvotes: 1
Reputation: 215059
I'm not sure I understood correctly, but still...
Let's start with this small function:
function removeLessSpecific(ary) {
return ary.filter(function(x) {
return !ary.some(function(y) {
return x != y && y.indexOf(x) >= 0;
});
});
}
When applied to say
["american football","english","british english","football","german"]
it returns only more specific or "standalone" terms
["american football","british english","german"]
Now let's convert your arrays into a mapping structure:
mapping = {}
keywords.forEach(function(kw, i) {
mapping[kw] = (mapping[kw] || []);
mapping[kw].push(terms[i]);
})
The mapping will be like this:
{
"Anglo-Saxon English":["english"],
"American English":["english"],
"British English":["english","british english"], etc
Finally, iterate over the mapping, remove less specific keywords and populate new arrays:
newTerms = [], newKw = []
Object.keys(mapping).forEach(function(term) {
var kwords = mapping[term];
removeLessSpecific(kwords).forEach(function(kw) {
newTerms.push(term);
newKw.push(kw);
})
})
As a side note, your naming looks a bit confusing to me. In your example, the first array must be "terms" (=proper names) and the second one - "keywords".
Upvotes: 1
Reputation: 3196
Original Array : [1,3,2,1,4,5,6,4,3,5,6,2,3,4,1,4,6,4,10,3,10,"a","a"]
Duplicates removed : [1,10,2,3,4,5,6,"a"]
Array.prototype.removeDuplicates = function (){
var temp=new Array();
this.sort();
for(i=0;i<this.length;i++)
if(this[i]==this[i+1]) {continue}
temp[temp.length]=this[i];
}
return temp;
}
OR
var duplicatesArray = ['mike','shibu','shibu','alex'];
var uniqueArray = duplicatesArray.filter(function(elem, pos) {
return duplicatesArray.indexOf(elem) == pos;
});
Upvotes: 0