Reputation: 1536
In redis I have a large data set with 100 000 users and growing. To make a leaderboard, I scan the entire database getting all hashes for each user. Then I go through one by one to get the score. Later, I do the sorting and trimming in javascript. My question is, what would be a faster way to do the same thing? Current queries take a few seconds. My instinct is to store the data in JS and only run the query once.
getLeaderboards: function(player){
var self = this;
async.waterfall([
function(callback){
client.smembers("usr", function(err, replies){
var pvpleaders = [];
var expleaders = [];
var lastUser = false;
for(var i = 0; i < replies.length; i++){
var name = replies[i].toString();
if(i == replies.length - 1){
lastUser = true;
}
if(name.charAt(0) != "_"){
callback(err, name, pvpleaders, expleaders, lastUser);
}
}
});
},
function(name, pvpleaders, expleaders, lastUser, callback){
client.hgetall("u:" + name, function(err, reply){
if(reply){
kills = parseInt(reply.kills);
exp = parseInt(reply.exp);
remortexp = parseInt(reply.remortexp) || 0;
if(kills > 0){
//log.info(name+' '+kills);
pvpleaders.push([kills, name]);
}
if(exp + remortexp > 0){
//log.info(name+' '+exp);
expleaders.push([exp + remortexp, exp, remortexp, name]);
}
}
if(lastUser){
log.info('user user');
callback(err, pvpleaders, expleaders);
}
});
}
], function (err, pvpleaders, expleaders) {
if(err){log.info(err);}
pvpleaders.sort(function(a, b) { if (a[0] === b[0]) { return 0; } else { return (a[0] > b[0]) ? -1 : 1; } });
pvpleaders.splice(30, pvpleaders.length);
expleaders.sort(function(a, b) { if (a[0] === b[0]) { return 0; } else { return (a[0] > b[0]) ? -1 : 1; } });
expleaders.splice(30, expleaders.length);
var leaderboards = JSON.stringify({pvp: pvpleaders, exp: expleaders});
player.sendLeaderboards(leaderboards);
});
},
Upvotes: 1
Views: 725
Reputation: 50072
Use the Sorted Set data structure - leaderboards are a classic use case for it.
Set the memeber's value to the player ID and the score as the player's score. You can then perform efficient ranges (the z[rev]rangebyscore commands) server-side and the results are already sorted. In most cases, this will be much faster than any client-side post processing because of the savings in network traffic.
Upvotes: 1
Reputation: 3022
Basically your question is about how to design a efficient scoreboard, if my understanding is correct.
A simple solution would be using ZADD in redis. it will create a sorted set. The time of ZADD complexity is O( M * log(N)) and you use ZRANGE to get the result, whose time complexity is O(log(N)+M).
A more efficient solution need you to think about more about your data. Is there is range of your data? for example, if all your user's score would fall in 0 to 10000? If so, you could use bucket sort here and cache the result in redis. Inserting and searching are both O(1)(because the number of bucket is fixed number). Only for the top 100, you maintain a sorted data structure. In this way, you have function "Show top 100 scoreboard" and "Search the ranking of given user" and both are constant time complexity.
Upvotes: 1