J. Doe
J. Doe

Reputation: 37

How to find the best match for an object in an array?

If the following represents my array,

1 UserA | Bob Smith | 12345 | hello
2 UserA | Bob Smith | 12345 |
3 UserA | Bob Smith | 123   |
4 UserA | Bob Smith
5 UserB | Bob Smith | 12333 | hello

and I have the following object to compare with,:

UserA | Bob Smith | 2222 

I'd like it to "best match" with the 4th row on my user array.

UserA | Bob Smith | 2222 | hello also matches up with the 4th row.

What can I do to get the best match?

I can loop around and do a if-else but this seems very dirty hoping someone has a clever solution.


My current approach:

Try matching

UserA | Bob Smith | 2222 | hello

It returns nothing, so cut the last one and try again

UserA | Bob Smith | 2222

It returns nothing, so cut the last one and try again

UserA | Bob Smith 

Matches 4th row, returns true! Thoughts??

Additional info:

[
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234, "output": "hello"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234},
{"Title": "UserA", "Name": "Bob Smith", "Number": 123},
{"Title": "UserA", "Name": "Bob Smith"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 12333, "output": "hello"}
]

They all contain Title, Name, Number. Some may or may not contain additional info like "output"

Object I wanna match

{"Title": "UserA", "Name": "Bob Smith", "Number": 122, "output": "hello"}

will match with the 4th array object as it is matching left-to-right and that is the "best match"

Upvotes: 1

Views: 2836

Answers (7)

CrayonViolent
CrayonViolent

Reputation: 32532

My take

/**
 @func   bestMatch - returns best matching object
 @desc   This function takes an array of objects (haystack) and compares
         each property of needle to the property of haystack[n]. 
         haystack[n] gets a "score" based on how many properties exist and
         match the properties of needle, and js custom sort method is 
         used, based off the score, so that the first element in the 
         sorted haystack should have the highest score and therefore 
         "win" and be the best match 
 @param1 Array of objects to match against (haystack)
 @param2 Object to find matches for (needle)
 @return Object from haystack that is closest match against needle
 **/
function bestMatch(h,n) {
  return h.sort(function(a,b){
    var c=0,d=0,p;
    for (p in n) {
      if (n.hasOwnProperty(p)) {
        c+=Number((a[p]||0)&&a[p]===n[p]);
        d+=Number((b[p]||0)&&b[p]===n[p]);
      }  
    }
    return (d<c)?-1:1;return 0;
  })[0];
}

Example

var data = [
  {"Title": "UserA", "Name": "Bob Smith", "Number": 1234, "output": "hello"},
  {"Title": "UserA", "Name": "Bob Smith", "Number": 1234},
  {"Title": "UserA", "Name": "Bob Smith", "Number": 123},
  {"Title": "UserA", "Name": "Bob Smith", "Number": 12333, "output": "hello"},
  {"Title": "UserA", "Name": "Bob Smith"}
];

var input=  {"Title": "UserA", "Name": "Bob Smith", "Number": 12333};

bestMatch(data,input); 
// return: {"Title":"UserA","Name":"Bob Smith","Number":12333,"output":"hello"}

Upvotes: 2

Javier Rey
Javier Rey

Reputation: 1620

If I understood correctly, you're trying to make a match evaluation criteria where undefined values are better than distinct values, so that
{"Title": "UserA", "Name": "Bob Smith", "Number": 2222}
is closer to
{"Title": "UserA", "Name": "Bob Smith"}
than
{"Title": "UserA", "Name": "Bob Smith", "Number": 123}
I'd suggest counting matches as positive, different values as negative, and undefined as neutral:

function bestMatch(array, object) {
  if (!array) {array = [];}
  if (!object) {object = {};}
  var best_index = -1, best_value = 0;
  for (var i = 0; i < array.length; i++) {
    if (array[i]) {
      var matches = 0, total = 0;
      for (var p in object) {
        total++;
        if (array[i][p]) {
          if (array[i][p] == object[p]) {
            matches++;
          } else {
            matches--;
          }
        }
      }
      var value = matches/total;
      if (value > best_value) {
        best_value = value;
        best_index = i;
      }
    }
  }
  return best_index;
}

var bestIndex = bestMatch(
  [
    {"Title": "UserA", "Name": "Bob Smith", "Number": 1234, "output": "hello"},
    {"Title": "UserA", "Name": "Bob Smith", "Number": 1234},
    {"Title": "UserA", "Name": "Bob Smith", "Number": 123},
    {"Title": "UserA", "Name": "Bob Smith"},
    {"Title": "UserA", "Name": "Bob Smith", "Number": 12333, "output": "hello"}
  ],
  {"Title": "UserA", "Name": "Bob Smith", "Number": 2222}
);

console.log(bestIndex); // 3

Upvotes: 0

trincot
trincot

Reputation: 350270

Going from left to right through object properties is not reliable as object properties are not guaranteed to be returned in a specific order.

You could instead assign a score to four situations that may occur for a certain object in your data set (the haystack), in comparison with the object you want to find a match for (the needle):

  • the needle has a key that the row in the haystack does not have;
  • the row in the haystack has a key that the needle does not have;
  • they share a key, but the corresponding values are different;
  • they share a key, and the corresponding values are the same

If you assign a score to each of these four, you can add them for all the properties that either of the two has (the needle and the row in the haystack): this gives a score for that particular row.

Then choose as best matching row, the one with the highest score.

function bestMatch(needle, haystack) {
    return haystack
        .map( row => 
            [...new Set(Object.keys(needle).concat(Object.keys(row)))]
                .reduce( (score, key, i) => 
                    score + (!(key in needle)         ? -10 
                           : !(key in row   )         ? - 5
                           : needle[key] !== row[key] ? -30 
                           :                              5), 0) )
        .reduce((best, score, i) => score > best[0] ? [score, i] : best, [-9999, i])
        .pop();
}

// Sample data
var haystack = [
    {"Title": "UserA", "Name": "Bob Smith", "Number": 1234, "output": "hello"},
    {"Title": "UserA", "Name": "Bob Smith", "Number": 1234},
    {"Title": "UserA", "Name": "Bob Smith", "Number": 123},
    {"Title": "UserA", "Name": "Bob Smith"},
    {"Title": "UserA", "Name": "Bob Smith", "Number": 12333, "output": "hello"}
];
var needle = {"Title": "UserA", "Name": "Bob Smith", "Number": 122,"output": "hello"};
// Get best match
var i = bestMatch(needle, haystack);
// Show results
console.log('best match at index ', i);
console.log(haystack[i]);

You can see in the code the 4 scores given to the 4 situations mentioned above. You can tweak these to your liking. You might even give different scores depending on the name of the property, so that an equal value for "Title" will give more score points that for equal "Number" values.

Upvotes: 1

Paul Rumkin
Paul Rumkin

Reputation: 6883

Here an example of doing this. It build simple index of matches and then sort them by relevance in descending order (the first is the best). Code:

function searchByRelevance(search, data) {
    let props = Object.getOwnPropertyNames(search);

    return data.map((value) => { // Build match index
      let match = 0;
      for(let prop of props) {
        if (value[prop] !== search[prop]) {
          break;
        }

        match++;
      }

      return {
        value,
        match,
        tail: match
          ? Object.getOwnPropertyNames(value).length - match
          : Infinity,
      };
    })
    .filter((item) => item.match) // Filter matched items only
    .sort((a, b) => { // Sort by relevance
      if (a.match !== b.match) {
        return -(a.match - b.match);
      }

      if (a.tail !== b.tail) {
        return a.tail - b.tail;
      }

      return 0;
    })
    .map((item) => item.value) // Get values from index items
    ;
}

// Example with time test

console.time('generate');
let set = [
  {"Title": "UserA", "Name": "Bob Smith", "Number": 1234, "output": "hello"},
  {"Title": "UserA", "Name": "Bob Smith", "Number": 1234},
  {"Title": "UserA", "Name": "Bob Smith", "Number": 123},
  {"Title": "UserA", "Name": "Bob Smith"},
  {"Title": "UserA", "Name": "Bob Smith", "Number": 12333, "output": "hello"}
];

let data = [];

for(let i = 0; i < 10000; i++) {
  data = [...data, ...set];
}

console.timeEnd('generate');

let search = {"Title": "UserA", "Name": "Bob Smith", "Number": 1234, "output": "hello"};

console.time('search');

let matches = searchByRelevance(search, data);

console.timeEnd('search');

console.log(matches); // Matched items sorted by relevance

It took ~17 ms to search in collection of 50K documents like in your example.

Upvotes: 0

Wren
Wren

Reputation: 69

Instead of looping through each time, you could try mapping a function that returns a score for each item in the array and then get the index with the highest score?

var arr = [
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234, "output": "hello"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 1234},
{"Title": "UserA", "Name": "Bob Smith", "Number": 123},
{"Title": "UserA", "Name": "Bob Smith"},
{"Title": "UserA", "Name": "Bob Smith", "Number": 12333, "output": "hello"}
];

var target = {"Title": "UserA", "Name": "Bob Smith", "Number": 122, "output": "hello"};

var highest = 0;
var index = undefined;

function score(obj, el, i) {
  var s = 0;
  Object.keys(obj).forEach(key => s += el[key] === obj[key] ? 1 : 0);
  if (s > highest) {
    highest = s;
    index = i;
  }
}

arr.forEach((el, i) => score(target, el, i));

This should leave highest equal to the highest score and index equal to the index of that item in the array.

Upvotes: 1

RocketNuts
RocketNuts

Reputation: 11140

Linear search:

<script>

function FindBestMatch( a, item )
{
    var bestIndex = null;
    var bestNumMatch = -1;
    var bestNumMismatch = 0;
    var numItemElements = item.length;
    for (var i in a)
    {
        for (var numMatch=0; numMatch<numItemElements; numMatch++)
        {
            if (a[i][numMatch]!=item[numMatch]) break;
        }
        var numMismatch = a[i].length - numMatch;
        if (numMatch==numItemElements && !numMismatch) return i;
        if (numMatch>bestNumMatch
          || (numMatch==bestNumMatch && numMismatch<bestNumMismatch))
        {
            bestIndex = i;
            bestNumMatch = numMatch;
            bestNumMismatch = numMismatch;
        }
    }
    return bestIndex;
}

var myArray = [
[ 'UserA', 'Bob Smith', 12345, 'hello' ],
[ 'UserA', 'Bob Smith', 12345 ],
[ 'UserA', 'Bob Smith', 123 ],
[ 'UserA', 'Bob Smith' ],
[ 'UserA', 'Bob Smith', 12345, 'hello' ]
];

var someItem = [ 'UserA', 'Bob Smith', 2222 ];

var i = FindBestMatch(myArray,someItem);

alert("The best match is number "+i+":\n\n"+myArray[i].join(" | "));

</script>

Upvotes: 0

rlcrews
rlcrews

Reputation: 3562

If you could restructure your object to something like

[
{item:  UserA | Bob Smith | 12345 | hello},
{item:  UserA | Bob Smith | 12345 | },
{item:  UserA | Bob Smith | 123   |},
{item:  UserA | Bob Smith},
{item:  UserB | Bob Smith | 12333 | hello},
]

you could use .filter() function to get the matching record such as

Array.prototype.filteredItem = function(key, value) {
    return this.filter(function(f) { return f[key] === value; })
}

Then you could use the filteredItem as:

var result =myArr.filteredItem("item", "UserB | Bob Smith | 12333 | hello");

Upvotes: 0

Related Questions