Reputation: 2525
I want to compare two input csv files to see if there were rows that were added or removed. What is the best way to go about this. I am not using column names because the names of the columns are not consistent for all files.
private void compare_btn_Click(object sender, EventArgs e)
{
string firstFile = firstExcel_txt.Text;
var results = ReadExcel(openFileDialog1);
string secondFile = secondExcel_txt.Text;
var results2 = ReadExcel(openFileDialog2);
}
Reading:
public object ReadExcel(OpenFileDialog openFileDialog)
{
var _excelFile = new ExcelQueryFactory(openFileDialog.FileName);
var _info = from c in _excelFile.WorksheetNoHeader() select c;
string header1, header2, header3;
foreach (var item in _info)
{
header1 = item.ElementAt(0);
header2 = item.ElementAt(1);
header3 = item.ElementAt(2);
}
return _info;
}
any help on how i could do that would be great.
Upvotes: 1
Views: 29809
Reputation: 1047
I was recently working on the same problem statement, where I had to compare two Excel files and print any differences between the two files.
I adopted the approach of exporting both Excel files to a DataTable and then comparing them cell by cell.
C# code snippet using Syncfusion.XlsIO:
public static DataTable SaveExcelToDataTable(string filePath)
{
//Create an instance of ExcelEngine
ExcelEngine excelEngine = new ExcelEngine();
//Initialize application
IApplication application = excelEngine.Excel;
application.DefaultVersion = ExcelVersion.Xlsx;
//Open existing workbook with data entered
FileStream inputStream = new FileStream(filePath, FileMode.Open, FileAccess.Read);
IWorkbook workbook = application.Workbooks.Open(inputStream);
//Access first worksheet from the workbook instance
IWorksheet worksheet = workbook.Worksheets[0];
//Export Excel to DataTable
DataTable dataTable = worksheet.ExportDataTable(worksheet.UsedRange, ExcelExportDataTableOptions.ColumnNames);
return dataTable;
}
public static bool CompareDataTables(DataTable dt1, DataTable dt2)
{
if (dt1.Rows.Count != dt2.Rows.Count || dt1.Columns.Count != dt2.Columns.Count)
return false;
for (int i = 0; i < dt1.Rows.Count; i++)
{
for (int c = 0; c < dt1.Columns.Count; c++)
{
if (!Equals(dt1.Rows[i][c], dt2.Rows[i][c]))
return false;
}
}
return true;
}
Upvotes: -1
Reputation: 10862
I suggest that you calculate a Hash for each row of the excel file, then you can go ahead and compare the hash of each row to see if it matches any hash on the other file (see comments in source code)
I have also provided some classes to store the contents of your Excel files
using System.Security.Cryptography;
private void compare_btn_Click(object sender, EventArgs e)
{
string firstFile = firstExcel_txt.Text;
ExcelInfo file1 = ReadExcel(openFileDialog1);
string secondFile = secondExcel_txt.Text;
ExcelInfo file2 = ReadExcel(openFileDialog2);
CompareExcels(file1,file2) ;
}
public void CompareExcels(ExcelInfo fileA, ExcelInfo fileB)
{
foreach(ExcelRow rowA in fileA.excelRows)
{
//If the current hash of a row of fileA does not exists in fileB then it was removed
if(! fileB.ContainsHash(rowA.hash))
{
Console.WriteLine("Row removed" + rowA.ToString());
}
}
foreach(ExcelRow rowB in fileB.excelRows)
{
//If the current hash of a row of fileB does not exists in fileA then it was added
if(! fileA.ContainsHash(rowB.hash))
{
Console.WriteLine("Row added" + rowB.ToString());
}
}
}
public Class ExcelRow
{
public List<String> lstCells ;
public byte[] hash
public ExcelRow()
{
lstCells = new List<String>() ;
}
public override string ToString()
{
string resp ;
resp = string.Empty ;
foreach(string cellText in lstCells)
{
if(resp != string.Empty)
{
resp = resp + "," + cellText ;
}
else
{
resp = cellText ;
}
}
return resp ;
}
public void CalculateHash()
{
byte[] rowBytes ;
byte[] cellBytes ;
int pos ;
int numRowBytes ;
//Determine how much bytes are required to store a single excel row
numRowBytes = 0 ;
foreach(string cellText in lstCells)
{
numRowBytes += NumBytes(cellText) ;
}
//Allocate space to calculate the HASH of a single row
rowBytes= new byte[numRowBytes]
pos = 0 ;
//Concatenate the cellText of each cell, converted to bytes,into a single byte array
foreach(string cellText in lstCells)
{
cellBytes = GetBytes(cellText) ;
System.Buffer.BlockCopy(cellBytes, 0, rowBytes, pos, cellBytes.Length);
pos = cellBytes.Length ;
}
hash = new MD5CryptoServiceProvider().ComputeHash(rowBytes);
}
static int NumBytes(string str)
{
return str.Length * sizeof(char);
}
static byte[] GetBytes(string str)
{
byte[] bytes = new byte[NumBytes(str)];
System.Buffer.BlockCopy(str.ToCharArray(), 0, bytes, 0, bytes.Length);
return bytes;
}
}
public Class ExcelInfo
{
public List<ExcelRow> excelRows ;
public ExcelInfo()
{
excelRows = new List<ExcelRow>();
}
public bool ContainsHash(byte[] hashToLook)
{
bool found ;
found = false ;
foreach(ExcelRow eRow in excelRows)
{
found = EqualHash(eRow.hash, hashToLook) ;
if(found)
{
break ;
}
}
return found ;
}
public static EqualHash(byte[] hashA, byte[] hashB)
{
bool bEqual ;
int i ;
bEqual = false;
if (hashA.Length == hashB.Length)
{
i = 0;
while ((i < hashA.Length) && (hashA[i] == hashB[i]))
{
i++ ;
}
if (i == hashA.Length)
{
bEqual = true;
}
}
return bEqual ;
}
}
public ExcelInfo ReadExcel(OpenFileDialog openFileDialog)
{
var _excelFile = new ExcelQueryFactory(openFileDialog.FileName);
var _info = from c in _excelFile.WorksheetNoHeader() select c;
ExcelRow excelRow ;
ExcelInfo resp ;
resp = new ExcelInfo() ;
foreach (var item in _info)
{
excelRow = new ExcelRow() ;
//Add all the cells (with a for each)
excelRow.lstCells.Add(item.ElementAt(0));
excelRow.lstCells.Add(item.ElementAt(1));
....
//Add the last cell of the row
excelRow.lstCells.Add(item.ElementAt(N));
//Calculate the hash of the row
excelRow.CalculateHash() ;
//Add the row to the ExcelInfo object
resp.excelRows.Add(excelRow) ;
}
return resp ;
}
Upvotes: 2
Reputation: 13388
Most accurate way would be to convert them both to byte arrays, check for differences when both are converted to an array, use following link for a simple example on how to convert excel sheets to a byte arrays:
Now you have converted both of your excel sheets to a byte[], you should check them for differences by checking if the byte arrays are equal yes or no.
The check can be done by several ways like the following using linq
:
using System.Linq; //SequenceEqual
byte[] FirstExcelFileBytes = null;
byte[] SecondExcelFileBytes = null;
FirstExcelFileBytes = GetFirstExcelFile();
SecondExcelFileBytes = GetSecondExcelFile();
if (FirstExcelFileBytes.SequenceEqual<byte>(SecondExcelFileBytes) == true)
{
MessageBox.Show("Arrays are equal");
}
else
{
MessageBox.Show("Arrays don't match");
}
There are enough other ways to find compare byte arrays, you should do some research about which will suit you best.
Use the following link, to check for things like Row added
, row removed
etc.
Upvotes: 0