Reputation: 11393
If i have two data tables with same structure,the same primary key and the same number of columns .
How to compare their content
and detect the cells which are not the same in the two data tables ?
ex:
TB_Offline
emp_num(key) salary ov
455 3000 67.891
677 5000 89.112
778 6000 12.672
TB_Online
emp_num(key) salary ov
455 3000 67.891
677 5000 50.113
778 5500 12.672
I want to get result like this(or some structure to show the differences) :
emp_num(key)| salary_off |salary_on|s1 | ov_off | ov_on |s2
677 | 5000 | 5000 | 1 | 89.112 | 50.113 | 0
778 | 6000 | 5500 | 0 | 12.672 | 12.672 | 1
NOTE:
455 doesn't exist in the result because it was the exact in all columns among the two datatables.
Upvotes: 7
Views: 12164
Reputation: 47
Solution : Absolute Basic You can create Table structure Dynamically as well.
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
namespace WebApplication1
{
public partial class Default : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
//Table 1
DataTable table1 = new DataTable();
table1.Columns.Add("emp_num(key)", typeof(Int32));
table1.Columns.Add("salary", typeof(float));
table1.Columns.Add("ov", typeof(float));
table1.Rows.Add(455, 3000, 67.56);
table1.Rows.Add(456, 4000, 77.56);
table1.Rows.Add(457, 6000, 87.56);
grdTable1.DataSource = table1;
grdTable1.DataBind();
//Table 2
DataTable table2 = new DataTable();
table2.Columns.Add("emp_num(key)", typeof(Int32));
table2.Columns.Add("salary", typeof(float));
table2.Columns.Add("ov", typeof(float));
table2.Rows.Add(455, 3000, 67.56);
table2.Rows.Add(456, 4000, 27.56);
table2.Rows.Add(457, 5000, 87.56);
grdTable2.DataSource = table2;
grdTable2.DataBind();
//Compare
DataTable result = new DataTable();
result.Columns.Add("emp_num(key)", typeof(Int32));
result.Columns.Add("salary_Table1", typeof(float));
result.Columns.Add("salary_Table2", typeof(float));
result.Columns.Add("same_Salary", typeof(string));
result.Columns.Add("ov_Table1", typeof(float));
result.Columns.Add("ov_Table2", typeof(float));
result.Columns.Add("Same_OV", typeof(string));
foreach (DataRow t1r in table1.Rows)
{
foreach (DataRow t2r in table2.Rows)
{
var array1 = t1r.ItemArray;
var array2 = t2r.ItemArray;
//Check ID Column to decide if compare needs to be done
if (array1[0].ToString() == array2[0].ToString())
{
if (array1.SequenceEqual(array2))
{
break;
}
else
{
int SalSame = 0;
int OvSame = 0;
if (array1[1].ToString() == array2[1].ToString())
{
SalSame = 1;
}
if (array1[2].ToString() == array2[2].ToString())
{
OvSame = 1;
}
result.Rows.Add(array1[0].ToString(), array1[1].ToString(), array2[1].ToString(), SalSame, array1[2].ToString(), array2[2].ToString(), OvSame);
}
}
}
}
grdresult.DataSource = result;
grdresult.DataBind();
}
}
}
<%@ Page Language="C#" AutoEventWireup="true" CodeBehind="Default.aspx.cs" Inherits="WebApplication1.Default" %>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
<title></title>
</head>
<body>
<form id="form1" runat="server">
<div>
<h3>First Table </h3>
<asp:GridView runat="server" ID="grdTable1"></asp:GridView>
<h3>Second Table </h3>
<asp:GridView runat="server" ID="grdTable2"></asp:GridView>
<h3>Result Table </h3>
<asp:GridView runat="server" ID="grdresult"></asp:GridView>
</div>
</form>
</body>
</html>
<div>
<h3>First Table </h3>
<div>
<table cellspacing="0" rules="all" border="1" id="grdTable1" style="border-collapse:collapse;">
<tr>
<th scope="col">emp_num(key)</th><th scope="col">salary</th><th scope="col">ov</th>
</tr><tr>
<td>455</td><td>3000</td><td>67.56</td>
</tr><tr>
<td>456</td><td>4000</td><td>77.56</td>
</tr><tr>
<td>457</td><td>6000</td><td>87.56</td>
</tr>
</table>
</div>
<h3>Second Table </h3>
<div>
<table cellspacing="0" rules="all" border="1" id="grdTable2" style="border-collapse:collapse;">
<tr>
<th scope="col">emp_num(key)</th><th scope="col">salary</th><th scope="col">ov</th>
</tr><tr>
<td>455</td><td>3000</td><td>67.56</td>
</tr><tr>
<td>456</td><td>4000</td><td>27.56</td>
</tr><tr>
<td>457</td><td>5000</td><td>87.56</td>
</tr>
</table>
</div>
<h3>Result Table </h3>
<div>
<table cellspacing="0" rules="all" border="1" id="grdresult" style="border-collapse:collapse;">
<tr>
<th scope="col">emp_num(key)</th><th scope="col">salary_Table1</th><th scope="col">salary_Table2</th><th scope="col">same_Salary</th><th scope="col">ov_Table1</th><th scope="col">ov_Table2</th><th scope="col">Same_OV</th>
</tr><tr>
<td>456</td><td>4000</td><td>4000</td><td>1</td><td>77.56</td><td>27.56</td><td>0</td>
</tr><tr>
<td>457</td><td>6000</td><td>5000</td><td>0</td><td>87.56</td><td>87.56</td><td>1</td>
</tr>
</table>
</div>
Upvotes: 0
Reputation: 6570
Here is my take on it:
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
namespace SO_DataTableCompare
{
class Program
{
static void Main(string[] args)
{
/// Build data and test the underlying method.
Dictionary<string,Type> columns = new Dictionary<string, Type>();
columns.Add("emp_num", typeof(int));
columns.Add("salary", typeof(int));
columns.Add("ov", typeof(double));
DataTable left = new DataTable();
foreach (KeyValuePair<string,Type> column in columns)
{
left.Columns.Add(column.Key, column.Value);
}
left.Rows.Add(455, 3000, 67.891);
left.Rows.Add(677, 5000, 89.112);
left.Rows.Add(778, 6000, 12.672);
left.Rows.Add(9001, 5500, 12.672);
left.Rows.Add(4, null, 9.2);
//left.Dump("Left");
DataTable right = new DataTable();
right.Columns.Add("outlier", typeof(string));
foreach (KeyValuePair<string, Type> column in columns)
{
right.Columns.Add(column.Key, column.Value);
}
right.Columns.Add("float", typeof(float));
right.Rows.Add(0, 455, 3000, 67.891, 5);
right.Rows.Add(1, 677, 5000, 50.113, 5);
right.Rows.Add(2, 778, 5500, 12.672, 6);
right.Rows.Add(2, 9000, 5500, 12.672, 6);
right.Rows.Add(3, 4, 10, 9.2, 7);
//right.Dump("Right");
// Compare.
DataTable results = Compare(left, right, "emp_num");
//results.Dump("Results"); // Fancy table output via LINQPad.
// Get the comparison columns for display.
List<string> comparedColumns = new List<string>();
foreach (DataColumn column in results.Columns)
{
comparedColumns.Add(column.ColumnName);
}
// Display the comparison rows.
Console.WriteLine(string.Join(", ", comparedColumns));
foreach (DataRow row in results.Rows)
{
Console.WriteLine(string.Join(", ", row.ItemArray));
}
Console.ReadKey();
}
private static DataTable Compare(DataTable left, DataTable right, string keyColumn, string suffix1="_off",string suffix2="_on")
{
var columns = left.Columns.OfType<DataColumn>().Select(c => c.ColumnName).ToList();
var updated = left.Rows.OfType<DataRow>()
.Join(right.Rows.OfType<DataRow>(), row => row[keyColumn], row => row[keyColumn], (row1, row2) => new { key = row1[keyColumn], row1, row2 })
.Where(o => o.row2!=null && !DataRowSame(o.row1, o.row2, columns));
//var deleted = left.Rows.OfType<DataRow>().Except(right.Rows.OfType<DataRow>(), new DataRowKeyComparer(keyColumn));
//var inserted = right.Rows.OfType<DataRow>().Except(left.Rows.OfType<DataRow>(), new DataRowKeyComparer(keyColumn));
var result = new DataTable();
result.Columns.Add(keyColumn, left.Columns[keyColumn].DataType);
int k = 0;
foreach (var name in columns.Where(c=>c!=keyColumn))
{
k++;
result.Columns.Add(name + suffix1, left.Columns[name].DataType);
result.Columns.Add(name + suffix2, right.Columns[name].DataType);
result.Columns.Add("s"+k, typeof(int));
}
result.BeginLoadData();
foreach (var upd in updated)
{
var vals = new[] { upd.key }.Concat(
columns.Where(c => c != keyColumn)
.Select(c => new
{
l = upd.row1[c],
r = upd.row2[c]
})
.SelectMany(o => new object[] { o.l, o.r, object.Equals(o.l, o.r) ? 1 : 0 })
).ToArray();
result.LoadDataRow(vals, LoadOption.OverwriteChanges);
}
result.EndLoadData();
return result;
}
private static bool DataRowSame(DataRow row1, DataRow row2, List<string> columns)
{
foreach (var name in columns)
{
if (!object.Equals(row1[name], row2[name])) return false;
}
return true;
}
}
internal class DataRowKeyComparer : IEqualityComparer<DataRow>
{
private string keyColumn;
public DataRowKeyComparer(string keyColumn)
{
this.keyColumn = keyColumn;
}
public bool Equals(DataRow x, DataRow y)
{
return object.Equals(x[keyColumn], y[keyColumn]);
}
public int GetHashCode(DataRow obj)
{
return obj.GetHashCode();
}
}
}
Upvotes: 1
Reputation: 71
Here's an implementation in a fairly generic method that compares two DataTables and returns another DataTable with the differences shown.
Populating the DataTables...
/// Build data and test the underlying method. public void Main() { Dictionary columns = new Dictionary(); columns.Add("emp_num", typeof(int)); columns.Add("salary", typeof(int)); columns.Add("ov", typeof(double)); DataTable left = new DataTable(); foreach(KeyValuePair column in columns) { left.Columns.Add(column.Key, column.Value); } left.Rows.Add(455, 3000, 67.891); left.Rows.Add(677, 5000, 89.112); left.Rows.Add(778, 6000, 12.672); left.Rows.Add(9001, 5500, 12.672); left.Rows.Add(4, null, 9.2); //left.Dump("Left"); DataTable right = new DataTable(); right.Columns.Add("outlier", typeof(string)); foreach (KeyValuePair column in columns) { right.Columns.Add(column.Key, column.Value); } right.Columns.Add("float", typeof(float)); right.Rows.Add(0, 455, 3000, 67.891, 5); right.Rows.Add(1, 677, 5000, 50.113, 5); right.Rows.Add(2, 778, 5500, 12.672, 6); right.Rows.Add(2, 9000, 5500, 12.672, 6); right.Rows.Add(3, 4, 10, 9.2, 7); //right.Dump("Right"); // Compare. DataTable results = Compare(left, right, "emp_num"); //results.Dump("Results"); // Fancy table output via LINQPad. // Get the comparison columns for display. List comparedColumns = new List(); foreach (DataColumn column in results.Columns) { comparedColumns.Add(column.ColumnName); } // Display the comparison rows. Console.WriteLine(string.Join(", ", comparedColumns)); foreach(DataRow row in results.Rows) { Console.WriteLine(string.Join(", ", row.ItemArray)); } }
Generic Method: DataTable Compare(DataTable, DataTable)
/// Compares the values of each row in the provided DataTables and returns any rows that have a difference based on a provided 'key' column. /// the 'pre' data. /// the 'post' data. /// Name of the column to use for matching rows. /// New DataTable populated with difference rows only. public DataTable Compare(DataTable left, DataTable right, string keyColumn) { const string Pre = "_Pre"; const string Post = "_Post"; DataColumn leftKey = left.Columns.Contains(keyColumn) ? left.Columns[keyColumn] : null; DataColumn rightKey = right.Columns.Contains(keyColumn) ? right.Columns[keyColumn] : null; if (leftKey == null || rightKey == null) { return null; } // Get the matching columns between the two tables for doing comparisons. List comparisonColumns = new List(); DataTable results = new DataTable(); // Adding the key column to the front for sake of ease of viewing. results.Columns.Add(new DataColumn(leftKey.ColumnName, leftKey.DataType)); foreach (DataColumn column in left.Columns) { if(column == leftKey) { continue; } // Remove any columns that are not present in the compare table. foreach (DataColumn compareColumn in right.Columns) { if (column.ColumnName == compareColumn.ColumnName && column.DataType == compareColumn.DataType) { comparisonColumns.Add(column.ColumnName); results.Columns.Add(new DataColumn(column.ColumnName + Pre, column.DataType)); results.Columns.Add(new DataColumn(column.ColumnName + Post, column.DataType)); break; } } } foreach (DataRow leftRow in left.Rows) { object key = leftRow.Field(leftKey); string filterExpression = string.Format("{0} = {1}", keyColumn, key); DataRow rightRow = right.Select(filterExpression).SingleOrDefault(); // Need a row for a comparison to be valid. if (rightRow == null) { continue; } List comparison = new List(); comparison.Add(key); bool isDiff = false; foreach (string comparisonColumn in comparisonColumns) { object pre = leftRow.ItemArray[left.Columns.IndexOf(comparisonColumn)]; comparison.Add(pre); object post = rightRow.ItemArray[right.Columns.IndexOf(comparisonColumn)]; comparison.Add(post); // Only need the row if the values differ in at least one column. isDiff |= (pre == null && post != null) || (pre != null && post == null) || (!pre.Equals(post)); } if (isDiff) { results.Rows.Add(comparison.ToArray()); } } return results; }
Output:
emp_num, salary_Pre, salary_Post, ov_Pre, ov_Post 677, 5000, 5000, 89.112, 50.113 778, 6000, 5500, 12.672, 12.672 4, , 10, 9.2, 9.2
Upvotes: 7
Reputation: 2881
Here is one way to achieve your task:
var dt1 = new DataTable();
dt1.Columns.Add("emp_num", typeof(int));
dt1.Columns.Add("salary", typeof(int));
dt1.Columns.Add("ov", typeof(double));
dt1.Columns[0].Unique = true;
dt1.Rows.Add(455, 3000, 67.891);
dt1.Rows.Add(677, 6000, 50.113);
dt1.Rows.Add(778, 5500, 12.650);
dt1.Rows.Add(779, 5500, 12.672);
var dt2 = new DataTable();
dt2.Columns.Add("emp_num", typeof(int));
dt2.Columns.Add("salary", typeof(int));
dt2.Columns.Add("ov", typeof(double));
dt2.Columns[0].Unique = true;
dt2.Rows.Add(455, 3000, 67.891);
dt2.Rows.Add(677, 5000, 50.113);
dt2.Rows.Add(778, 5500, 12.672);
dt2.Rows.Add(779, 5500, 12.672);
var dtListValues1 = new List<List<string>>();
for (int j = 0; j < dt2.Rows.Count; j++)
{
var list = new List<string>();
for (var i = 0; i < dt2.Columns.Count; i++)
{
list.Add(dt2.Rows[j][i].ToString());
list.Add("===");
list.Add(dt1.Rows[j][i].ToString());
list.Add("||");
if(dt2.Rows[j][i].ToString() == dt1.Rows[j][i].ToString())
{
list.Add("true");
}
else
{
list.Add("false");
}
}
dtListValues1.Add(list);
}
var rowsWithDifferentCells = dtListValues1.Where(x => x.Contains("false"));
foreach (var item in dtListValues1)
{
Console.WriteLine("Row-->> "+ string.Join(",",item));
}
Console.WriteLine("----------------------------------------");
foreach (var item in rowsWithDifferentCells)
{
Console.WriteLine("Row with different cell-->> "+string.Join(",", item));
}
I put all test data in a List<List<string>>()
, in the process I also made check if the values are the same. Then I put a filter on false, to show me only the rows which contain cells with different values.
You just need to put this code in a ConsoleApp. and test it.
For sure is not the best approach,but is a quick way to check your data.
Upvotes: 3
Reputation: 18646
According you the new requirements you might try this.
It first gets the primary key name - the Unique
column, then the other columns names, creates a new DataTable
with new column names, fills the values and creates an expression to compare the them:
void Main()
{
// create some test data
var dt1 = new DataTable();
dt1.Columns.Add("emp_num", typeof(int));
dt1.Columns.Add("salary", typeof(int));
dt1.Columns.Add("ov", typeof(double));
dt1.Columns[0].Unique = true;
dt1.Rows.Add(455, 3000, 67.891);
dt1.Rows.Add(677, 5000, 89.112);
dt1.Rows.Add(778, 6000, 12.672);
var dt2 = new DataTable();
dt2.Columns.Add("emp_num", typeof(int));
dt2.Columns.Add("salary", typeof(int));
dt2.Columns.Add("ov", typeof(double));
dt2.Columns[0].Unique = true;
dt2.Rows.Add(455, 3000, 67.891);
dt2.Rows.Add(677, 5000, 50.113);
dt2.Rows.Add(778, 5500, 12.672);
dt2.Rows.Add(779, 5500, 12.672);
var result = CompareDataTables(dt1, dt2);
result.Dump("Result");
}
CompareDataTables
method:
static DataTable CompareDataTables(DataTable dt1, DataTable dt2)
{
var keyName = dt1.Columns.Cast<DataColumn>().Single (x => x.Unique).ColumnName;
var dt1Cols = dt1.Columns.Cast<DataColumn>().Where (x => !x.Unique).Select (x =>x.ColumnName );
var dt2Cols = dt1.Columns.Cast<DataColumn>().Where (x => !x.Unique).Select (x =>x.ColumnName );
// get keys from both data tables
var keys = new HashSet<int>(dt1.AsEnumerable().Select (x => (int)x[keyName]));
keys.UnionWith(dt2.AsEnumerable().Select (x => (int)x[keyName]));
keys.Dump("keys");
// create a new data table that will hold the results
var result = new DataTable();
result.Columns.Add(keyName, typeof(int));
result.Columns[0].Unique = true;
// initialize data and comparison columns
foreach (var name in dt1Cols)
{
result.Columns.Add(name + "_off", dt1.Columns[name].DataType);
result.Columns.Add(name + "_on", dt1.Columns[name].DataType);
result.Columns.Add(name + "_same", typeof(bool), name + "_off = " + name + "_on");
}
foreach (var key in keys)
{
// get a row from each data table with the current key
var rowOff = dt1.Select(keyName + " = " + key).FirstOrDefault();
var rowOn = dt2.Select(keyName + " = " + key).FirstOrDefault();
// create a new row
var newRow = result.NewRow();
// fill the new row with off data
if (rowOff != null)
{
newRow[keyName] = rowOff[keyName];
foreach (var name in dt1Cols)
{
newRow[name + "_off"] = rowOff[name];
}
}
// fill the new row with on data
if (rowOn != null)
{
foreach (var name in dt1Cols)
{
newRow[name + "_on"] = rowOn[name];
}
newRow[keyName] = rowOn[keyName];
}
// add the row to the result data table
result.Rows.Add(newRow);
}
return result;
}
It's not bullet proof. It'd be a good idea to check if the data tables have the same structure.
Upvotes: 2
Reputation: 18646
You can get all keys first, then create a new Summary
objects, put the data there and let it do the comparison work. Finally you can do whatever you want with it:
void Main()
{
var dt1 = new DataTable();
dt1.Columns.Add("emp_num", typeof(int));
dt1.Columns.Add("salary", typeof(int));
dt1.Columns.Add("ov", typeof(double));
dt1.Rows.Add(455, 3000, 67.891);
dt1.Rows.Add(677, 5000, 89.112);
dt1.Rows.Add(778, 6000, 12.672);
var dt2 = new DataTable();
dt2.Columns.Add("emp_num", typeof(int));
dt2.Columns.Add("salary", typeof(int));
dt2.Columns.Add("ov", typeof(double));
dt2.Rows.Add(455, 3000, 67.891);
dt2.Rows.Add(677, 5000, 50.113);
dt2.Rows.Add(778, 5500, 12.672);
dt2.Rows.Add(779, 5500, 12.672);
var keys = new HashSet<int>(dt1.AsEnumerable().Select (x => (int)x["emp_num"]));
keys.UnionWith(dt2.AsEnumerable().Select (x => (int)x["emp_num"]));
keys.Dump("emp_num (keys)");
var results = keys.Select (emp_num =>
{
var rowOff = dt1.Select("emp_num = " + emp_num).FirstOrDefault();
var rowOn = dt2.Select("emp_num = " + emp_num).FirstOrDefault();
return new Summary(emp_num, rowOff, rowOn);
});
results.Dump("Summary");
}
Summary
helper class:
class Summary
{
public Summary(int emp_num, DataRow rowOff, DataRow rowOn)
{
this.emp_num = emp_num;
if (rowOff != null)
{
salary_off = (int)rowOff["salary"];
ov_off = (double)rowOff["ov"];
}
if (rowOn != null)
{
salary_on = (int)rowOn["salary"];
ov_on = (double)rowOn["ov"];
}
}
public int emp_num;
public int salary_off ;
public int salary_on;
public bool salarySame { get { return salary_off == salary_on; } }
public double ov_off ;
public double ov_on;
public bool ovSame { get { return ov_off == ov_on; } }
}
Upvotes: 3
Reputation: 1289
In SQL query, you can JOIN two tables and using SELECT-CASE statement to determine the difference value for each column: e.g
SELECT
TB_Online.emp_num,
TB_Online.salary_on,
TB_Offline.salary_off,
SELECT CASE TB_Online.salary_on = Tb_offline.salary_off then 0 ELSE 1 END AS salary_same
...
FROM TB_Online INNER JOIN TB_Offline
ON TB_Online.emp_num = TB_Offline.emp_num
To remove all rows have exact values, you can SELECT again base on above result table
Upvotes: -1