Reputation: 123
Here's the problem. Say I have these strings:
I want these strings to be grouped like this:
The point is to separate name of the item from its attributes(color, memory capacity etc).
I used this algorithm for finding longest common substring: link
Can you guys share your ideas? No code or implementation needed. Thank you.
Edited:
this.data = _.sortBy(this.data, function(item) {
return item.title;
});
var i = 0;
var groups = {};
var len = this.data.length - 1;
while(i < len) {
var key = this.lcs(this.data[i][this.attr], this.data[i+1][this.attr]) || this.data[i][this.attr];
groups[key] = true;
i++;
while(this.data[i][this.attr].startsWith(key) && i < len) {
i++;
}
}
console.log(groups)
This works great(tested only adding keys). But I want to add samsung s3 galaxy to list too. Thanks for help guys!
Upvotes: 2
Views: 2080
Reputation: 23955
If you just want to simply group by longest common prefix (that means "apple ipad mini" will be chosen even though "apple ipad" would yield a larger group), then maybe something like this?
sort the list
i = 0
while i < end of list:
key = longest common prefix of list[i] & list[i + 1]
or list[i] if the common prefix is less than (1?) words or i is the last index
groups[key] = list[i++]
while key is prefix of list[i]:
add list[i++] to groups[key]
Upvotes: 1
Reputation: 386560
An attempt to solve the problem with comparing two strings with same words and a look up if the length of the words is smaller then the previous path.
function groupObject(i, l) {
return { item: i, length: l };
}
function group(r, a, i, o) {
var rr = r.item.split(' '),
aa = a.split(' '),
j = 0,
key, keys = [];
while (aa[j] === rr[j]) {
keys.push(aa[j]);
j++;
}
if (keys.length < r.length && i < o.length - 1) {
return group(groupObject(o[i + 1], 0), a, Number.MAX_VALUE, o);
}
key = keys.join(' ');
if (!key || keys.length < r.length && i === o.length - 1) {
key = a;
}
grouped[key] = grouped[key] || [];
grouped[key].push(a);
return groupObject(a, keys.length);
}
var data = ['apple ipad mini 32gb', 'apple ipad mini 64gb', 'apple ipad air 64gb', 'apple ipad air 32gb', 'panasonic gh4', 'samsung s2 galaxy', 'samsung s2 galaxy red', 'samsung s3 galaxy'],
grouped = {};
data.reduce(group, groupObject(data[1], 0));
document.write('<pre>' + JSON.stringify(grouped, 0, 4) + '</pre>');
Upvotes: 2