Reputation: 371
I have found a library which connects csv-files with linq. I understood the principles and my code works well. But i have some problems with big csv files.
http://www.codeproject.com/Articles/25133/LINQ-to-CSV-library
Now i want to access specific single items from my _dataTable object. I get them like this:
public class CsvFile
{
private IEnumerable<DataRow> _dataTable = cc.Read<DataRow>(_filePath, _inputFileDescription);
public string GetItem(int row, int column)
{
return _dataTable.ElementAt<DataRow>(row).ElementAt<DataRowItem>(column).Value;
}
}
When i now call the method like this in a loop:
CsvFile file1 = new CsvFile("C:\\dev_csvcompare\\Master.csv", ';', true);
for(int i = 0; i < 10000; i++)
{
string dummy = file1.GetItem(1, i); //Does not make sense, my loop is a bit more complicated
}
it gets very slow, because the IEnumerable opens the stream every call.
In the documentation(link) under "Deferred Reading" they say i can access the ienumerable "_dataTable" with a foreach loop (this does work fine), but this is in my case no option because i want access to specific items in the csv.
Are there possibilities to keep the filestream open so that the performace increases?
EDIT (My code, maybe a lot of nosense, im not so experienced with .net, c# and oop):
public void Compare(int key1, int key2, int col1, int col2)
{
string lastKeyCol1 = null;
string lastKeyCol2 = null;
List<string> sortedKeyColFile1 = new List<string>();
List<string> sortedKeyColFile2 = new List<string>();
int file1counter = 0;
int file2counter = 0;
int cnt = 0;
sortedKeyColFile1 = _file1.GetCol(key1);
sortedKeyColFile1.Sort();
sortedKeyColFile2 = _file2.GetCol(key2);
sortedKeyColFile2.Sort();
while ((file1counter < sortedKeyColFile1.Count) || (file2counter < sortedKeyColFile2.Count))
{
_outputList.Add(new OutputValues(key1, key2, col1, col2));
//Keys are in both files
if (sortedKeyColFile1[file1counter] == sortedKeyColFile2[file2counter])
{
if (lastKeyCol1 == sortedKeyColFile1[file1counter])
{
//Keys are redundant
_outputList[cnt].RedundantKeyF1 = true;
}
if (lastKeyCol2 == sortedKeyColFile2[file2counter])
{
//Keys are redundant
_outputList[cnt].RedundantKeyF2 = true;
}
lastKeyCol1 = sortedKeyColFile1[file1counter];
lastKeyCol2 = sortedKeyColFile2[file2counter];
_outputList[cnt].ValF1 = _file1.GetItem(file1counter, col1);
_outputList[cnt].ValF2 = _file2.GetItem(file2counter, col2);
_outputList[cnt].LineNumF1 = file1counter;
_outputList[cnt].LineNumF2 = file2counter;
//compare the values (because keys do match at this place)
_outputList[cnt].CompareResult = CompareString(_file1.GetItem(file1counter, col1), _file2.GetItem(file2counter, col2));
if (file1counter < sortedKeyColFile1.Count)
{
file1counter++;
}
if (file2counter < sortedKeyColFile2.Count)
{
file2counter++;
}
}
//Key sortedKeyColFile2[file2counter] is not in file 1
else if (file2counter < sortedKeyColFile2.Count && 0 < (string.Compare(sortedKeyColFile1[file1counter], sortedKeyColFile2[file2counter])))
{
_outputList[cnt].LineNumF2 = file2counter;
if (lastKeyCol2 == sortedKeyColFile2[file2counter])
{
//Keys are redundant
_outputList[cnt].RedundantKeyF2 = true;
}
lastKeyCol2 = sortedKeyColFile2[file2counter];
file2counter++;
}
//Key sortedKeyColFile1[file1counter] is not in file 2
else if (file1counter < sortedKeyColFile1.Count)
{
_outputList[cnt].LineNumF1 = file1counter;
if (lastKeyCol1 == sortedKeyColFile1[file1counter])
{
//Keys are redundant
_outputList[cnt].RedundantKeyF1 = true;
}
lastKeyCol1 = sortedKeyColFile1[file1counter];
file1counter++;
}
cnt++;
}
}
//And here the important part of the csv-file class, maybe not so interesting
public class CsvFile
{
private string _filePath = null;
private char _separator = ',';
private bool _hasHeader = true;
private CsvContext _cc = null;
private CsvFileDescription _inputFileDescription = null;
private List<string> _headers = null;
private IEnumerable<DataRow> _dataTable = null;
/// <summary>
/// Constructor for a new CsvFile object.
/// The Constructor initiates the Object and read the values out of the File
/// </summary>
/// <param name="filePath">Full path of the csv-file</param>
/// <param name="separator">Seperator of the csv-file, eg: ';' or ',' or '\t'</param>
/// <param name="hasHeader">Is true if the first col of the csv-file contains a headers</param>
public CsvFile(string filePath, char separator, bool hasHeader = true)
{
//Throws an exception if something is wrong with the file
File.OpenRead(filePath);
_filePath = filePath;
_separator = separator;
_hasHeader = hasHeader;
_cc = new CsvContext();
_inputFileDescription = new CsvFileDescription
{
SeparatorChar = separator,
FirstLineHasColumnNames = hasHeader
};
_dataTable = _cc.Read<DataRow>(_filePath, _inputFileDescription);
if (hasHeader)
{
ParseHeaders();
}
}
public List<string> GetCol(int col)
{
List<string> column = new List<string>();
int cnt = 0;
foreach(DataRow x in _dataTable)
{
column.Add(x[col].Value);
cnt++;
}
return column;
}
private void ParseHeaders()
{
System.IO.StreamReader file = new System.IO.StreamReader(_filePath);
if (!file.EndOfStream)
{
//_headers = file.ReadLine().Split(_separator);
_headers = new List<string> (file.ReadLine().Split(_separator));
}
file.Close();
}
}
Upvotes: 0
Views: 774
Reputation: 10427
Try this:
public class CsvFile
{
private IEnumerable<DataRow> rows = cc.Read<DataRow>(_filePath, _inputFileDescription);
//...
public IEnumerable<DataRow> Rows { get { return rows; } }
}
And then:
CsvFile file1 = new CsvFile("C:\\dev_csvcompare\\Master.csv", ';', true);
foreach(DataRow row in file1.Rows)
{
string dummy = row[1];
}
Upvotes: 0