Reputation: 1042
For my project, I have to generate a list of unique strings.
Everything works fine, but my problem is that it is very slow at the end.
I've tried using Parallel-Loops, but I found out that my ConcurrentBag<T>
, which I was using, is also slow.
Now I'm using a simple for-loop and List<T>
and it is now a little bit faster, but also really slow.
Here's my code:
private List<string> Generate(int start, int end, bool allowDupes)
{
var list = new List<string>();
var generator = new StringGenerator(LowerCase, UpperCase, Digits, NumberOfCharacters);
for (var i = start; i < end; i++)
{
StringBuilder sb;
while (true)
{
sb = new StringBuilder();
for (var j = 0; j < NumberOfSegments; j++)
{
sb.Append(generator.GenerateRandomString());
if (j < NumberOfSegments - 1)
{
sb.Append(Delimiter);
}
}
if (!allowDupes)
{
if (list.Contains(sb.ToString()))
{
continue;
}
}
break;
}
list.Add(sb.ToString());
GeneratedStringCount = i + 1;
}
return new List<string>(list);
}
I've also talked to my teacher and he would use the same algorithm for generating these strings.
Do you know a better solution? (The GenerateRandomString()
Method in StringGenerator
is simple and does not consume much performance. list.Contains(xy)
is consuming alot of resources. [Performance Analysis in Visual Studio])
Upvotes: 2
Views: 817
Reputation: 1
public static String GenerateEightCode( int codeLenght, Boolean isCaseSensitive)
{
char[] chars = GetCharsForCode(isCaseSensitive);
byte[] data = new byte[1];
RNGCryptoServiceProvider crypto = new RNGCryptoServiceProvider();
crypto.GetNonZeroBytes(data);
data = new byte[codeLenght];
crypto.GetNonZeroBytes(data);
StringBuilder sb = new StringBuilder(codeLenght);
foreach (byte b in data)
{
sb.Append(chars[b % (chars.Length)]);
}
string key = sb.ToString();
if (codeLenght == 8)
key = key.Substring(0, 4) + "-" + key.Substring(4, 4);
else if (codeLenght == 16)
key = key.Substring(0, 4) + "-" + key.Substring(4, 4) + "-" + key.Substring(8, 4) + "-" + key.Substring(12, 4);
return key.ToString();
}
private static char[] GetCharsForCode(Boolean isCaseSensitive)
{
// all - abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890
char[] chars = new char[58];
if (isCaseSensitive)
{
chars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ123456789".ToCharArray();//počet unikátních kombinací 4 - 424 270, 8 - 1 916 797 311, 16 - 7.99601828013E+13
}
else
{
chars = new char[35];
chars = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789".ToCharArray();//počet unikátních kombinací 4 - 52 360, 8 - 23 535 820, 16 - 4 059 928 950
}
return chars;
}
Upvotes: 0
Reputation: 18877
List.Contains
is slow. Use a HashSet
instead.
private List<string> Generate(int start, int end, bool allowDupes)
{
var strings = new HashSet<string>();
var list = new List<string>();
var generator = new StringGenerator(LowerCase, UpperCase, Digits, NumberOfCharacters);
for (var i = start; i < end; i++)
{
while (true)
{
string randomString = GetRandomString();
if (allowDupes || strings.Add(randomString))
{
list.Add(randomString);
break;
}
}
GeneratedStringCount = i + 1;
}
return new List<string>(list);
}
private string GetRandomString()
{
var segments = Enumerable.Range(1, NumberOfSegments)
.Select(_ => generator.GenerateRandomString());
var result = string.Join(Delimeter, segments);
return result;
}
This still has the chance for slow performance, but you could remedy that with a smart GenerateRandomString
function.
Upvotes: 2