Reputation: 23
I have a problem reading text files of 1-20kb each. The folder has about 400,000 files. I've limited the program to read just only the first line of each text file but it is still slow. Before reading a file, the program will get the filename from the folder that I choose, check the filename to see if it is what I want, then read the first line and check if it is correct, and finally copy the file to somewhere I want.
class FileChoose
{
public string chooseFolder()
{
FolderBrowserDialog Fld = new FolderBrowserDialog();
Fld.ShowNewFolderButton = false;
if (Fld.ShowDialog() == DialogResult.OK)
{
return Fld.SelectedPath;
}
return "";
}
public List<string> getFileName(string path)
{
string[] filePaths = Directory.GetFiles(@path, "*.log");
List<string> listPath = new List<string>();
foreach (var item in filePaths)
{
string[] itemSplit = item.Split('\\');
string year = itemSplit[itemSplit.Length - 1].Substring(0, 4);
string month = itemSplit[itemSplit.Length - 1].Substring(4, 2);
if ((year == "2013") && (month == "08"))
{
// string fileNamePDF = itemSplit[itemSplit.Length - 1];
listPath.Add(item);
}
}
return listPath;
}
public bool isDrawing(string drawing, string path)
{
string drawingRead = readLog(path);
if (drawingRead == drawing)
{
return true;
}
else
{
return false;
}
}
public string readLog(string path)
{
StreamReader sr = new StreamReader(path);
string line;
line = sr.ReadLine();
string checkDrawing = line.Substring(1, 8);
return checkDrawing;
}
}
Main class
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
// public string pathGlobal = "D:\\OMT\\OMT1";
// public string pathGlobal2 = "D:\\OMT\\OMT2";
public string pathGlobal = "D:\\logfileProductionline\\RD Team\\Production logfile\\Grundfos\\OMT2-1";
public string pathGlobal2 = "D:\\logfileProductionline\\RD Team\\Production logfile\\Grundfos\\OMT3";
List<string> listPath = new List<string>();
List<string> listPath2 = new List<string>();
List<string> listFile = new List<string>();
FileChoose.FileChoose folder = new FileChoose.FileChoose();
public string folderPath;
// public string folderPath;
private void button1_Click(object sender, EventArgs e)
{
string folderPathIN = folder.chooseFolder();
//label1.Text = folderPathIN;
this.folderPath = folderPathIN;
}
private void button2_Click(object sender, EventArgs e)
{
// listPath = folder.getFileName(folderPath);
listPath = folder.getFileName(pathGlobal);
foreach (var item in listPath)
{
// string pathFile = folderPath+item;
bool check = folder.isDrawing("96642678", item);
if (check)
copyFile(item);
}
listPath2 = folder.getFileName(pathGlobal2);
foreach (var item in listPath2)
{
// string pathFile = folderPath+item;
bool check = folder.isDrawing("96642678", item);
if (check)
copyFileSeparate(item);
}
MessageBox.Show("Success", "Success");
label1.Text = "Copied files are in D:\\OMT_NEW";
}
public void copyFileSeparate(string item)
{
string[] splitItem = item.Split('\\');
string folderName = splitItem[splitItem.Length - 1].Substring(0, 8);
try
{
bool isExists = System.IO.Directory.Exists("D:\\OMTSeparate");
if (!isExists)
System.IO.Directory.CreateDirectory("D:\\OMTSeparate");
isExists = System.IO.Directory.Exists("D:\\OMTSeparate\\"+folderName);
if (!isExists)
System.IO.Directory.CreateDirectory("D:\\OMTSeparate\\"+folderName);
File.Copy(item, "D:\\OMTSeparate\\"+folderName+"\\" + splitItem[splitItem.Length - 1]);
}
catch (Exception)
{
}
}
public void copyFile(string item)
{
string[] splitItem = item.Split('\\');
try
{
bool isExists = System.IO.Directory.Exists("D:\\OMT_NEW");
if (!isExists)
System.IO.Directory.CreateDirectory("D:\\OMT_NEW");
File.Copy(item, "D:\\OMT_NEW\\" + splitItem[splitItem.Length - 1]);
}
catch(Exception)
{
}
}
//
}
Upvotes: 1
Views: 143
Reputation: 4777
That's a lot of files.
Step one use System.IO.Directory.EnumerateFiles rather than GetFiles. Then have GetFilename return IEnumerable using yield return. This will save you from having to allocate space for 400,000 filenames. Opening the files is still going to take a lot of time. You can thread the opens & reads, depends a lot on your processor and disk subsystem as to how much you can do and how much it helps. Run some tests on a much smaller test case and use that to determine the approximate time required and make sure to put some sort of progress indicator in so you have some idea how things are going. It might also be useful to create occasional checkfiles that record your progress so if something happens you don't have to restart from the beginning.
Upvotes: 3