Reputation: 662
I have method for computing Levenshtein distance
public static LevenshteinMatches LevenshteinSingleThread(this string str, string expression, int maxDistance) {
if (str.Length > expression.Length + 1) {
int len = expression.Length;
long strLen = str.Length - len + 1;
int[] results = new int[strLen];
int[][,] dimension = new int[strLen][,];
for (int i = 0; i < strLen; i++) {
dimension[i] = new int[len + 1, len + 1];
}
string source = str;
source = source.ToUpper();
expression = expression.ToUpper();
for (int i = 0; i < strLen; i++) {
results[i] = SqueareLevenshtein(ref dimension[i], str.Substring(i, len).ToUpper(), expression, len);
}
LevenshteinMatches matches = new LevenshteinMatches();
for (int i = 0; i < strLen; i++) {
if (results[i] <= maxDistance) {
matches.addMatch(str.Substring(i, len), Math.Round((1.0 - ((double)results[i] / len)) * 100.0, 2), i, len, results[i]);
}
}
return matches;
}
else {
LevenshteinMatch match = str.LevenshteinCPU(expression, maxDistance);
if (match != null)
return new LevenshteinMatches(match);
else
return new LevenshteinMatches();
}
}
What should I do to make it async?
Or should i leave this method and just call it some different way?
Here is my attempt to make it async; Not sure what is wrong but I can't get any results - thread is inf working but it should take only few ms.
public static async Task<LevenshteinMatches> LevenshteinSingleThread(this string str, string expression, int maxDistance) {
return await Task.Factory.StartNew(() => {
if (str.Length > expression.Length + 1) {
int len = expression.Length;
long strLen = str.Length - len + 1;
int[] results = new int[strLen];
int[][,] dimension = new int[strLen][,];
for (int i = 0; i < strLen; i++) {
dimension[i] = new int[len + 1, len + 1];
}
string source = str;
source = source.ToUpper();
expression = expression.ToUpper();
for (int i = 0; i < strLen; i++) {
results[i] = SqueareLevenshtein(ref dimension[i], str.Substring(i, len).ToUpper(), expression, len);
}
LevenshteinMatches matches = new LevenshteinMatches();
for (int i = 0; i < strLen; i++) {
if (results[i] <= maxDistance) {
matches.addMatch(str.Substring(i, len), Math.Round((1.0 - ((double)results[i] / len)) * 100.0, 2), i, len, results[i]);
}
}
return matches;
}
else {
LevenshteinMatch match = str.LevenshteinCPU(expression, maxDistance);
if (match != null)
return new LevenshteinMatches(match);
else
return new LevenshteinMatches();
}
});
}
rest of the code link
And that's how I call it:
string s = "xcjavxzcbvmrmummuuutmtumuumtryumtryumtrutryumtryumtrymutryumtyumtryumtrmutyumtrurtmutymurtmyutrymut";
s = string.Concat(Enumerable.Repeat(s, 4000));
var watch = System.Diagnostics.Stopwatch.StartNew();
var ret = s.LevenshteinSingleThread("jas", 1);
var res = ret.Result;
watch.Stop();
var elapsedMs = watch.ElapsedMilliseconds;
Upvotes: 2
Views: 615
Reputation: 127543
There is nothing async about your function, it is entirely CPU bound work. Changing the signature to async Task<LevenshteinMatches>
and never using await
in the function should have caused a warning in the compiler.
Instead of "making it async" if the thing you are really after is making it work in parallel then just call the code in parallel.
string s = "xcjavxzcbvmrmummuuutmtumuumtryumtryumtrutryumtryumtrymutryumtyumtryumtrmutyumtrurtmutymurtmyutrymut";
s = string.Concat(Enumerable.Repeat(s, 4000));
var expressions = new[] {"jas", "cbv"}
var tasks = new List<Task<LevenshteinMatches>>()
foreach(var expression in expressions)
{
var task = new Task.Run(()=> message.LevenshteinSingleThread(expression, 1)); //Start multiple threads
tasks.Add(task);
}
LevenshteinMatches[] results = Task.WaitAll(tasks); //Wait for all the threads to end.
You could even make parts of the internal function multi-threaded by using Parallel.For(
to make some of the for loops parallel, just be careful that any collections you call Add
on are either synchronized inside a lock
or are thread safe collections.
For example if SqueareLevenshtein
is thread safe internally you could do
Parallel.For(0, strLen, i => {
//Are you sure ref is needed here?
results[i] = SqueareLevenshtein(ref dimension[i], str.Substring(i, len).ToUpper(), expression, len);
});
LevenshteinMatches matches = new LevenshteinMatches();
Parallel.For(0, strLen; i => {
if (results[i] <= maxDistance) {
lock(matches)
{
matches.addMatch(str.Substring(i, len), Math.Round((1.0 - ((double)results[i] / len)) * 100.0, 2), i, len, results[i]);
}
}
});
return matches;
Upvotes: 5