Reputation: 360
I have HashSet's where each stores T, I have written a test application that compares the different relation algorithms that I can think of, however I'm not all pleased with the results that I am getting.
Does there exist more efficient ways of achieving object relations, then the once that I am testing?
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Diagnostics;
namespace LINQTests
{
class Program
{
static void Main(string[] args)
{
HashSet<User> UserTable = new HashSet<User>();
HashSet<UserProperty> UserPropertyTable = new HashSet<UserProperty>();
#region Lets create some dummy data.
Console.WriteLine("Please wait while creating dummy data, this can take a while...");
Console.WriteLine("");
int rows = 1000000;
for(int x = 0; x < rows; x++)
{
Random rnd = new Random();
// Add new user.
User user = new User(string.Format("Joachim-{0}", x));
if(!UserTable.Add(user))
{
// throw new Exception.
}
else
{
UserProperty age = new UserProperty(user, "Age", rnd.Next(25, 30).ToString());
if(!UserPropertyTable.Add(age))
{
// throw new Exception.
}
UserProperty sex = new UserProperty(user, "Sex", "Male");
if (!UserPropertyTable.Add(sex))
{
// throw new Exception.
}
UserProperty location = new UserProperty(user, "Location", "Norway");
if (!UserPropertyTable.Add(location))
{
// throw new Exception.
}
}
}
#endregion
#region Lets do some query tests.
IEnumerable<User> Users;
Stopwatch stopwatch = new Stopwatch();
int matches = 0;
// Lets find all users who are of age 29.
Console.WriteLine("Finding all users who are of age 29");
Console.WriteLine("");
Console.WriteLine("---------------------------------------------------");
Console.WriteLine("{0,-20} | {1,6} | {2,9}", "Search Strategy", "Found", "Time");
Console.WriteLine("---------------------------------------------------");
// Join test.
stopwatch.Start();
Users = (from user in UserTable
join property in UserPropertyTable on user.Id equals property.UserId
where property.Key == "Age" && property.Value == "29"
select user);
matches = Users.Count();
stopwatch.Stop();
Console.WriteLine("{0,-20} | {1,6} | {2,6} ms.", "Joining Tables", matches, stopwatch.ElapsedMilliseconds);
// Dictionary test.
stopwatch.Restart();
var DictionarySearch = (from t in UserPropertyTable where t.Key == "Age" && t.Value == "29" select t).ToDictionary(x => x.UserId);
Users = (from t in UserTable where DictionarySearch.ContainsKey(t.Id) select t);
matches = Users.Count();
stopwatch.Stop();
Console.WriteLine("{0,-20} | {1,6} | {2,6} ms.", "Dictionary Contain", matches, stopwatch.ElapsedMilliseconds);
// HashSet test.
stopwatch.Restart();
var HashsetSearch = new HashSet<Guid>(from t in UserPropertyTable where t.Key == "Age" && t.Value == "29" select t.UserId);
Users = (from t in UserTable where HashsetSearch.Contains(t.Id) select t);
matches = Users.Count();
stopwatch.Stop();
Console.WriteLine("{0,-20} | {1,6} | {2,6} ms.", "HashSet Contain", matches, stopwatch.ElapsedMilliseconds);
// Following takes so long that we wont run them!
//// Array test.
//stopwatch.Restart();
//var ArrayMatch = (from t in UserPropertyTable where t.Key == "Age" && t.Value == "29" select t.UserId).ToArray();
//Users = (from t in UserTable where ArrayMatch.Contains(t.Id) select t);
//matches = Users.Count();
//stopwatch.Stop();
//Console.WriteLine("{0,-20} | {1,6} | {2,6} ms.", "Array Contain", matches, stopwatch.ElapsedMilliseconds);
//// List test.
//stopwatch.Restart();
//var ListMatch = (from t in UserPropertyTable where t.Key == "Age" && t.Value == "29" select t.UserId).ToList();
//Users = (from t in UserTable where ListMatch.Contains(t.Id) select t);
//matches = Users.Count();
//stopwatch.Stop();
//Console.WriteLine("{0,-20} | {1,6} | {2,6} ms.", "List Contain", matches, stopwatch.ElapsedMilliseconds);
Console.WriteLine("---------------------------------------------------");
#endregion
Console.WriteLine("");
Console.WriteLine("Hit return to exit...");
Console.Read();
}
}
public class User
{
public User(string UserName)
{
this.Id = Guid.NewGuid();
this.UserName = UserName;
}
public Guid Id { get; set; }
public string UserName { get; set; }
public override bool Equals(object obj)
{
User other = obj as User;
if (other == null)
return false;
return this.Id == other.Id;
}
public override int GetHashCode()
{
return Id.GetHashCode();
}
}
public class UserProperty
{
public UserProperty(User user, string key, string value)
{
this.Id = Guid.NewGuid();
this.UserId = user.Id;
this.Key = key;
this.Value = value;
}
public Guid Id { get; private set; }
public Guid UserId {get; private set;}
public string Key { get; set; }
public string Value { get; set; }
public override bool Equals(object obj)
{
UserProperty other = obj as UserProperty;
if (other == null)
return false;
return this.UserId == other.UserId && this.Key == other.Key;
}
public override int GetHashCode()
{
return string.Format("{0}-{1}", this.UserId, this.Key).GetHashCode();
}
}
}
Upvotes: 4
Views: 2099
Reputation: 171246
Here are a few things you can do:
UserId.GHC() * 37 ^ Name.GHC()
should do.property.Key == "Age" && property.Value == "29"
predicate in the measurements? Will that need to be run for each execution? You can try to optimize that by using integer values instead of strings. Maybe you can precompute an index for UserPropertyTable
so that you can get the matching elements in constant time.This should give you 1-10x speedup depending on how much you implement. If you need to go even faster than that I would need to ask some questions first. Often you can find specialized tricks that only apply to particular situations.
Upvotes: 2
Reputation: 8295
This will make the linq/join comparable to the other methods:
var properties = UserPropertyTable
.Where(p=>p.Key == "Age" && p.Value == "29")
.ToArray();
Users = (from user in UserTable
join property in properties
on user.Id equals property.UserId
select user);
Here is the fastest (~2X) I could achieve:
var filteredUserIds = new HashSet<Guid>(
UserPropertyTable
.Where(p=>p.Key == "Age" && p.Value == "29")
.Select(p=>p.UserId));
Users = (from user in UserTable
where filteredUserIds.Contains(user.Id)
select user);
With the output
--------------------------------------------------- Search Strategy | Found | Time --------------------------------------------------- My method | 210366 | 157 ms. Dictionary Contain | 210366 | 325 ms. HashSet Contain | 210366 | 325 ms. ---------------------------------------------------
Upvotes: 2