Reputation: 79
I have an application that recursively walks a very large (6 TB) folder. To speed things up, I create a new thread for each recursion. At one point my thread count was in excess of 12,000. As the task gets closer to completion, my thread count gets drops, but on Task Manager the thread count keeps climbing. I think that indicates that the threads are not being garbage collected when they finish.
At one point, my internal thread count showed 5575 threads while the Windows resource monitor showed the task using 33,023 threads.
static void Main(string[] args)
{
string folderName = Properties.Settings.Default.rootFolder;
ParameterizedThreadStart needleThreader = new ParameterizedThreadStart(needle);
Thread eye = new Thread(needleThreader);
threadcount = 1;
eye.Start(folderName);
}
static void needle(object objFolderName)
{
string folderName = (string)objFolderName;
FolderData folderData = getFolderData(folderName);
addToDB(folderData);
//since the above statement gets executed (my database table
//gets populated), I think the thread should get garbage collected
//here, but the windows thread count keeps climbing.
}
// recursive routine to walk directory structure and create annotated treeview
private static FolderData getFolderData(string folderName)
{
//Console.WriteLine(folderName);
long folderSize = 0;
string[] directories = new string[] { };
string[] files = new string[] { };
try
{
directories = Directory.GetDirectories(folderName);
}
catch { };
try
{
files = Directory.GetFiles(folderName);
}
catch { }
for (int f = 0; f < files.Length; f++)
{
try
{
folderSize += new FileInfo(files[f]).Length;
}
catch { } //cannot access file so skip;
}
FolderData folderData = new FolderData(folderName, directories.Length, files.Length, folderSize);
List<String> directoryList = directories.ToList<String>();
directoryList.Sort();
for (int d = 0; d < directoryList.Count; d++)
{
Console.Write(" " + threadcount + " ");
//threadcount is my internal counter. it increments here
//where i start a new thread and decrements when the thread ends
//see below
threadcount++;
ParameterizedThreadStart needleThreader = new ParameterizedThreadStart(needle);
Thread eye = new Thread(needleThreader);
eye.Start(directoryList[d]);
}
//thread is finished, so decrement
threadcount--;
return folderData;
}
Upvotes: 0
Views: 74
Reputation: 79
Thanks to matt-dot-net's suggestion I spent a few hours research TPL (Task Parallel Library), and it was well worth it.
Here is my new code. It works blazingly fast, does not peg the CPU (uses 41% which is a lot but still plays nice in the sandbox), uses only about 160MB of memory (instead of nearly all of the 4GB available) and uses a maximum of about 70 threads.
You'd almost think I new what I was doing. But the .net TPL handles all the hard stuff, like determining the correct number of threads and making sure they clean up after themselves.
class Program {
static object padlock = new object();
static void Main(string[] args)
{
OracleConnection ora = new OracleConnection(Properties.Settings.Default.ora);
ora.Open();
new OracleCommand("DELETE FROM SCRPT_APP.S_DRIVE_FOLDERS", ora).ExecuteNonQuery();
ora.Close();
string folderName = Properties.Settings.Default.rootFolder;
Task processRoot = new Task((value) =>
{
getFolderData(value);
}, folderName);
//wait is like join; it waits for this asynchronous task to finish.
processRoot.Start();
processRoot.Wait();
}
// recursive routine to walk directory structure and create annotated treeview
private static void getFolderData(object objFolderName)
{
string folderName = (string)objFolderName;
Console.WriteLine(folderName);
long folderSize = 0;
string[] directories = new string[] { };
string[] files = new string[] { };
try
{
directories = Directory.GetDirectories(folderName);
}
catch { };
try
{
files = Directory.GetFiles(folderName);
}
catch { }
for (int f = 0; f < files.Length; f++)
{
try
{
folderSize += new FileInfo(files[f]).Length;
}
catch { } //cannot access file so skip;
}
FolderData folderData = new FolderData(folderName, directories.Length, files.Length, folderSize);
List<String> directoryList = directories.ToList<String>();
directoryList.Sort();
//create a task for each subdirectory
List<Task> dirTasks = new List<Task>();
for (int d = 0; d < directoryList.Count; d++)
{
dirTasks.Add(new Task((value) =>
{
getFolderData(value);
}, directoryList[d]));
}
//start all tasks
foreach (Task task in dirTasks)
{
task.Start();
}
//wait fo them to finish
Task.WaitAll(dirTasks.ToArray());
addToDB(folderData);
}
private static void addToDB(FolderData folderData)
{
lock (padlock)
{
OracleConnection ora = new OracleConnection(Properties.Settings.Default.ora);
ora.Open();
OracleCommand addFolderData = new OracleCommand(
"INSERT INTO FOLDERS " +
"(PATH, FOLDERS, FILES, SPACE_USED) " +
"VALUES " +
"(:PATH, :FOLDERS, :FILES, :SPACE_USED) ",
ora);
addFolderData.BindByName = true;
addFolderData.Parameters.Add(":PATH", OracleDbType.Varchar2);
addFolderData.Parameters.Add(":FOLDERS", OracleDbType.Int32);
addFolderData.Parameters.Add(":FILES", OracleDbType.Int32);
addFolderData.Parameters.Add(":SPACE_USED", OracleDbType.Int64);
addFolderData.Prepare();
addFolderData.Parameters[":PATH"].Value = folderData.FolderName;
addFolderData.Parameters[":FOLDERS"].Value = folderData.FolderCount;
addFolderData.Parameters[":FILES"].Value = folderData.FileCount;
addFolderData.Parameters[":SPACE_USED"].Value = folderData.Size;
addFolderData.ExecuteNonQuery();
ora.Close();
}
}
}
}
Upvotes: 1