Reputation: 9732
Apologies about the vague titles, wasn't really sure how else to describe the problem.
I recently ran into a case where I have to loop through an array of objects to compare multiple value, I opted for using a for loop in a for loop to compare each object with every other object.
While this works fine on small arrays, once my array gets a little bigger (say 10,000 objects) performance tends to be a big issue.
This array contains these kind of objects:
[{ char: '_', from: 0, to: 2, chrLength: 2 },
{ char: '_', from: 0, to: 7, chrLength: 7 },
{ char: 'a', from: 1, to: 3, chrLength: 2 },
{ char: 'a', from: 1, to: 6, chrLength: 5 },
{ char: '_', from: 2, to: 7, chrLength: 5 },
{ char: 'a', from: 3, to: 6, chrLength: 3 }]
The idea is that I can only select objects where the from
and to
don't overlap with any other object. (from
and to
are indexes in another array)
So for the example array, possible outcomes would be:
[{ char: '_', from: 0, to: 2, chrLength: 2 },
{ char: 'a', from: 1, to: 3, chrLength: 2 },
{ char: 'a', from: 1, to: 6, chrLength: 5 },
{ char: 'a', from: 3, to: 6, chrLength: 3 }]
The way I handled it was as follows:
var canUse = true,
posibilities = [];
for(i = 0; i < l; i++) {
canUse = true;
for(var j = 0; j < l; j++) {
if((results[i].from < results[j].from && results[i].to > results[j].to)) {
canUse = false;
break;
}
}
if(canUse) posibilities.push(results[i]);
}
Seeing performance is quite terrible with larger arrays, I'm wondering if there is a better solution to do this?
Upvotes: 2
Views: 153
Reputation: 45106
Here is the idea (Demo):
(from + to)/2
.k
items into your tree and are about to insert k+1
. Your steps are:k+1
covers the root - ignore it.k+1
is covered by the root - remove the root from the tree and make another attempt.k+1
's mid to the root's one.Code
function process() {
console.log('Processing results of length: ' + l);
console.time('Processing');
var comparator = function(a, b) { //Comparator to build a tree
return a.mid - b.mid;
},
isAinB = function(a, b) { //util function to check if a is inside b
return b.from < a.from && b.to > a.to;
},
rbtree = new RBTree(comparator), //Build an empty tree
i = results.length - 1, item, posibilities = [];
function check(root, x) { //Recursive checker
var data;
if(!root) { //Either tree is empty or we've reached a leaf
rbtree.insert(x);
return;
}
data = root.data;
if(isAinB(data, x)) { //4
return;
}
if(isAinB(x, data)) { //5
rbtree.remove(data);
check(rbtree._root, x);
return;
}
check(root[comparator(data, x) > 0 ? 'left' : 'right'], x); //6
}
for(; i >= 0; i--) {
item = results[i];
item.mid = (item.from + item.to)/2; //2
check(rbtree._root, item); //3
}
rbtree.each(function(item) { //7
posibilities.push(item);
});
console.timeEnd('Processing');
console.log(posibilities.length);
}
BTW I've used this RBTree implementation. Not sure if it is the best one :)
Upvotes: 1
Reputation: 700840
Start by sorting the objects on the chrLength
property. When you look for objects that keep an object from being included, you only have to check the objects that are at least two characters shorter.
results.sort(function(x, y){ return x.chrLength - y.chrLength; });
var posibilities = [];
for (var i = 0; i < l; i++) {
var canUse = true, len = results[i].chrLength - 2;
for (var j = 0; results[j].chrLength <= len; j++) {
if((results[i].from < results[j].from && results[i].to > results[j].to)) {
canUse = false;
break;
}
}
if(canUse) posibilities.push(results[i]);
}
With your example data, this reduces the number of checks from 36 in the in the original code to only 8.
Comparison: http://jsfiddle.net/Guffa/5jsSb/
You can make an array where each item is an array of objects with the same chrLength
, and then sort each array on the from
property. That way you can easily skip to the point where the objects start to overlap, and stop comparing as soon as they don't overlap any more:
var map = [];
for (var i = 0; i < l; i++) {
var ch = results[i].chrLength;
while (map.length <= ch) map.push([]);
map[ch].push(results[i]);
}
for (var i = 1; i < map.length; i++) {
map[i].sort(function(x, y){ return x.from - y.from; });
}
var posibilities = [];
for (var i = 0; i < l; i++) {
var canUse = true, len = results[i].chrLength - 2, from = results[i].from, to = results[i].to;
for (var j = 1; canUse && j <= len; j++) {
if (map[j][map[j].length - 1].from > from) {
var k;
for (k = 0; map[j][k].from <= from; k++);
for (;k < map[j].length && map[j][k].from < to; k++) {
if (map[j][k].to < to) {
canUse = false;
break;
}
}
}
}
if(canUse) posibilities.push(results[i]);
}
This splits the checks for the from
and the to
properties in two stages, so the number of complete checks (where map[j][k].to < to
is evaluated) is actually smaller than the total number of objects.
Disclaimer: Naturally you would need to verify that the code does the right thing. I have checked that the result has the same number of items, but I haven't compared each item.
Upvotes: 1
Reputation: 3379
Well for starters, once canUse
is false
you don't need to carry on with the inner loop.
You can either add a break;
or change the second for loop to be:
for (var j = 0; canUse && (j < l); j++)
and you'll probably see a useful speed up.
Upvotes: 0