Reputation: 37
If the following represents my array,
1 UserA | Bob Smith | 12345 | hello
2 UserA | Bob Smith | 12345 |
3 UserA | Bob Smith | 123 |
4 UserA | Bob Smith
5 UserB | Bob Smith | 12333 | hello
and I have the following object to compare with,:
UserA | Bob Smith | 2222
I'd like it to "best match" with the 4th row on my user array.
UserA | Bob Smith | 2222 | hello
also matches up with the 4th row.
What can I do to get the best match?
I can loop around and do a if-else but this seems very dirty hoping someone has a clever solution.
My current approach:
Try matching
UserA | Bob Smith | 2222 | hello
It returns nothing, so cut the last one and try again
UserA | Bob Smith | 2222
It returns nothing, so cut the last one and try again
UserA | Bob Smith
Matches 4th row, returns true! Thoughts??
Additional info:
[
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234, "output": "hello"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234},
{"Title": "UserA", "Name": "Bob Smith", "Number": 123},
{"Title": "UserA", "Name": "Bob Smith"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 12333, "output": "hello"}
]
They all contain Title, Name, Number. Some may or may not contain additional info like "output"
Object I wanna match
{"Title": "UserA", "Name": "Bob Smith", "Number": 122, "output": "hello"}
will match with the 4th array object as it is matching left-to-right and that is the "best match"
Upvotes: 1
Views: 2836
Reputation: 32532
My take
/**
@func bestMatch - returns best matching object
@desc This function takes an array of objects (haystack) and compares
each property of needle to the property of haystack[n].
haystack[n] gets a "score" based on how many properties exist and
match the properties of needle, and js custom sort method is
used, based off the score, so that the first element in the
sorted haystack should have the highest score and therefore
"win" and be the best match
@param1 Array of objects to match against (haystack)
@param2 Object to find matches for (needle)
@return Object from haystack that is closest match against needle
**/
function bestMatch(h,n) {
return h.sort(function(a,b){
var c=0,d=0,p;
for (p in n) {
if (n.hasOwnProperty(p)) {
c+=Number((a[p]||0)&&a[p]===n[p]);
d+=Number((b[p]||0)&&b[p]===n[p]);
}
}
return (d<c)?-1:1;return 0;
})[0];
}
Example
var data = [
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234, "output": "hello"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234},
{"Title": "UserA", "Name": "Bob Smith", "Number": 123},
{"Title": "UserA", "Name": "Bob Smith", "Number": 12333, "output": "hello"},
{"Title": "UserA", "Name": "Bob Smith"}
];
var input= {"Title": "UserA", "Name": "Bob Smith", "Number": 12333};
bestMatch(data,input);
// return: {"Title":"UserA","Name":"Bob Smith","Number":12333,"output":"hello"}
Upvotes: 2
Reputation: 1620
If I understood correctly, you're trying to make a match evaluation criteria where undefined values are better than distinct values, so that
{"Title": "UserA", "Name": "Bob Smith", "Number": 2222}
is closer to
{"Title": "UserA", "Name": "Bob Smith"}
than
{"Title": "UserA", "Name": "Bob Smith", "Number": 123}
I'd suggest counting matches as positive, different values as negative, and undefined as neutral:
function bestMatch(array, object) {
if (!array) {array = [];}
if (!object) {object = {};}
var best_index = -1, best_value = 0;
for (var i = 0; i < array.length; i++) {
if (array[i]) {
var matches = 0, total = 0;
for (var p in object) {
total++;
if (array[i][p]) {
if (array[i][p] == object[p]) {
matches++;
} else {
matches--;
}
}
}
var value = matches/total;
if (value > best_value) {
best_value = value;
best_index = i;
}
}
}
return best_index;
}
var bestIndex = bestMatch(
[
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234, "output": "hello"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234},
{"Title": "UserA", "Name": "Bob Smith", "Number": 123},
{"Title": "UserA", "Name": "Bob Smith"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 12333, "output": "hello"}
],
{"Title": "UserA", "Name": "Bob Smith", "Number": 2222}
);
console.log(bestIndex); // 3
Upvotes: 0
Reputation: 350270
Going from left to right through object properties is not reliable as object properties are not guaranteed to be returned in a specific order.
You could instead assign a score to four situations that may occur for a certain object in your data set (the haystack), in comparison with the object you want to find a match for (the needle):
If you assign a score to each of these four, you can add them for all the properties that either of the two has (the needle and the row in the haystack): this gives a score for that particular row.
Then choose as best matching row, the one with the highest score.
function bestMatch(needle, haystack) {
return haystack
.map( row =>
[...new Set(Object.keys(needle).concat(Object.keys(row)))]
.reduce( (score, key, i) =>
score + (!(key in needle) ? -10
: !(key in row ) ? - 5
: needle[key] !== row[key] ? -30
: 5), 0) )
.reduce((best, score, i) => score > best[0] ? [score, i] : best, [-9999, i])
.pop();
}
// Sample data
var haystack = [
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234, "output": "hello"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234},
{"Title": "UserA", "Name": "Bob Smith", "Number": 123},
{"Title": "UserA", "Name": "Bob Smith"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 12333, "output": "hello"}
];
var needle = {"Title": "UserA", "Name": "Bob Smith", "Number": 122,"output": "hello"};
// Get best match
var i = bestMatch(needle, haystack);
// Show results
console.log('best match at index ', i);
console.log(haystack[i]);
You can see in the code the 4 scores given to the 4 situations mentioned above. You can tweak these to your liking. You might even give different scores depending on the name of the property, so that an equal value for "Title" will give more score points that for equal "Number" values.
Upvotes: 1
Reputation: 6883
Here an example of doing this. It build simple index of matches and then sort them by relevance in descending order (the first is the best). Code:
function searchByRelevance(search, data) {
let props = Object.getOwnPropertyNames(search);
return data.map((value) => { // Build match index
let match = 0;
for(let prop of props) {
if (value[prop] !== search[prop]) {
break;
}
match++;
}
return {
value,
match,
tail: match
? Object.getOwnPropertyNames(value).length - match
: Infinity,
};
})
.filter((item) => item.match) // Filter matched items only
.sort((a, b) => { // Sort by relevance
if (a.match !== b.match) {
return -(a.match - b.match);
}
if (a.tail !== b.tail) {
return a.tail - b.tail;
}
return 0;
})
.map((item) => item.value) // Get values from index items
;
}
// Example with time test
console.time('generate');
let set = [
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234, "output": "hello"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234},
{"Title": "UserA", "Name": "Bob Smith", "Number": 123},
{"Title": "UserA", "Name": "Bob Smith"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 12333, "output": "hello"}
];
let data = [];
for(let i = 0; i < 10000; i++) {
data = [...data, ...set];
}
console.timeEnd('generate');
let search = {"Title": "UserA", "Name": "Bob Smith", "Number": 1234, "output": "hello"};
console.time('search');
let matches = searchByRelevance(search, data);
console.timeEnd('search');
console.log(matches); // Matched items sorted by relevance
It took ~17 ms to search in collection of 50K documents like in your example.
Upvotes: 0
Reputation: 69
Instead of looping through each time, you could try mapping a function that returns a score for each item in the array and then get the index with the highest score?
var arr = [
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234, "output": "hello"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234},
{"Title": "UserA", "Name": "Bob Smith", "Number": 123},
{"Title": "UserA", "Name": "Bob Smith"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 12333, "output": "hello"}
];
var target = {"Title": "UserA", "Name": "Bob Smith", "Number": 122, "output": "hello"};
var highest = 0;
var index = undefined;
function score(obj, el, i) {
var s = 0;
Object.keys(obj).forEach(key => s += el[key] === obj[key] ? 1 : 0);
if (s > highest) {
highest = s;
index = i;
}
}
arr.forEach((el, i) => score(target, el, i));
This should leave highest equal to the highest score and index equal to the index of that item in the array.
Upvotes: 1
Reputation: 11140
Linear search:
<script>
function FindBestMatch( a, item )
{
var bestIndex = null;
var bestNumMatch = -1;
var bestNumMismatch = 0;
var numItemElements = item.length;
for (var i in a)
{
for (var numMatch=0; numMatch<numItemElements; numMatch++)
{
if (a[i][numMatch]!=item[numMatch]) break;
}
var numMismatch = a[i].length - numMatch;
if (numMatch==numItemElements && !numMismatch) return i;
if (numMatch>bestNumMatch
|| (numMatch==bestNumMatch && numMismatch<bestNumMismatch))
{
bestIndex = i;
bestNumMatch = numMatch;
bestNumMismatch = numMismatch;
}
}
return bestIndex;
}
var myArray = [
[ 'UserA', 'Bob Smith', 12345, 'hello' ],
[ 'UserA', 'Bob Smith', 12345 ],
[ 'UserA', 'Bob Smith', 123 ],
[ 'UserA', 'Bob Smith' ],
[ 'UserA', 'Bob Smith', 12345, 'hello' ]
];
var someItem = [ 'UserA', 'Bob Smith', 2222 ];
var i = FindBestMatch(myArray,someItem);
alert("The best match is number "+i+":\n\n"+myArray[i].join(" | "));
</script>
Upvotes: 0
Reputation: 3562
If you could restructure your object to something like
[
{item: UserA | Bob Smith | 12345 | hello},
{item: UserA | Bob Smith | 12345 | },
{item: UserA | Bob Smith | 123 |},
{item: UserA | Bob Smith},
{item: UserB | Bob Smith | 12333 | hello},
]
you could use .filter() function to get the matching record such as
Array.prototype.filteredItem = function(key, value) {
return this.filter(function(f) { return f[key] === value; })
}
Then you could use the filteredItem as:
var result =myArr.filteredItem("item", "UserB | Bob Smith | 12333 | hello");
Upvotes: 0