Reputation: 129
I have written a code to compare two worksheets WS1 and Ws2. The code reads the primary key of each row from ws1 and finds the corresponding row with the same primary key in ws2 then all the other column attributes are matched between the two worksheets and reported accordingly.
The code is :
Sub DataComparator(ws1 As Worksheet, ws2 As Worksheet)
Dim ws1row As Long, ws2row As Long, ws1col As Integer, ws2col As Integer
Dim maxrow As Long, maxcol As Integer, colval1 As String, colval2 As String
Dim difference As Long, reportrow As Long, reportcol As Long, flag As Boolean
Dim row As Long, col As Long, pki As Long, pk As String, counter As Long
Dim PctDone As Single, cell1 As String, cell2 As String, bfailed As Boolean
TestDataComparator.FrameProgress.Visible = True
TestDataComparator.LabelProgress.Visible = True
'UserForm1.Visible = True
'Application.ScreenUpdating = False
DoEvents
With ws1.UsedRange
ws1row = .Rows.Count
ws1col = .Columns.Count
End With
With ws2.UsedRange
ws2row = .Rows.Count
ws2col = .Columns.Count
End With
maxrow = ws1row
maxcol = ws1col
pk = UCase(TestDataComparator.TextBox1.Value)
For col = 1 To maxcol
If pk = UCase(ws1.Cells(1, col).Formula) Then
pki = col
End If
Next col
If maxrow < ws2row Then maxrow = ws2row
If maxcol < ws2col Then maxcol = ws2col
difference = 0
reportrow = 0
For row = 2 To maxrow
keyval = ws1.Cells(row, 1).Formula
flag = False
bfailed = False
'reportcol = 1
For col = 2 To maxcol
'If col = pki Then
'Exit For
'End If
counter = counter + 1
cell1 = ""
cell2 = ""
cell1 = ws1.Cells(row, col).Formula
On Error Resume Next
'Set Rng = Range("A2:" & Cells(ws2row, "A").Address)
cell2 = Application.WorksheetFunction.VLookup(keyval, ws2.UsedRange, col, False)
If Err.Number <> 0 Then bfailed = True
On Error GoTo 0
If bfailed = True Then
Exit For
End If
If cell1 <> cell2 Then
flag = True
'difference = difference + 1
diffcolname = ws1.Cells(1, col)
ws1.Cells(row, col).Interior.Color = RGB(255, 255, 0)
ws1.Cells(1, col).Interior.Color = RGB(255, 0, 0)
ws1.Cells(row, col).Font.Bold = True
ws1.Cells(1, pki).Interior.Color = RGB(0, 255, 0)
ws1.Cells(row, pki).Interior.Color = RGB(255, 255, 0)
ws1.Cells(row, pki).Font.Color = RGB(255, 0, 0)
ws1.Cells(row, pki).Font.Bold = True
End If
Next col
If flag = True Then
reportrow = reportrow + 1
End If
PctDone = counter / (maxrow * maxcol)
TestDataComparator.FrameProgress.Caption = "Progress..." & Format(PctDone, "0%")
TestDataComparator.LabelProgress.Width = PctDone * (TestDataComparator.FrameProgress.Width - 10)
DoEvents
Next row
TestDataComparator.Totalcount.Value = row - 2
TestDataComparator.mismatchCount.Value = reportrow
TestDataComparator.mismatchCount.Font = Bold
difference = 0
For col = 1 To maxcol
If ws1.Cells(1, col).Interior.Color = RGB(255, 0, 0) Then
difference = difference + 1
TestDataComparator.AttributeNameList.AddItem (ws1.Cells(1, col))
End If
Next col
TestDataComparator.FrameProgress.Visible = False
TestDataComparator.LabelProgress.Visible = False
'TestDataComparator.PleaseWait.Visible = False
MsgBox difference & " columns contain different data! ", vbInformation, "Comparing Two Worksheets"
Application.ScreenUpdating = True
End Sub
I want the vlookup function to search for the match only in the entire column of WS2 which has the primary key (index pki) rather than ws2.UsedRange. Please provide suggestions. Is there any option which will perform better than vlookup? The use of ws2.UsedRange is making it difficult to search in large datasets that is why I want to reduce search space. My dataset has above 40K rows and 155 columns in excel. Also suggest me a formula for calculating the progress in the progress bar if you think it is not appropriate.
Sample data from OP's comment:
Name Height Weight
Jane 5'6'' 78
Mike 5'4'' 89
Monica 5'2'' 56
Upvotes: 0
Views: 429
Reputation:
I've reduced your VLOOKUP
for every column to a single MATCH
to verify that it exists and one MATCH
to set the WS2
row where the match occurs. Everything else is done with direct addressing.
Sub DataComparator(ws1 As Worksheet, ws2 As Worksheet)
Dim ws1row As Long, ws2row As Long, ws1col As Long, ws2col As Long
Dim maxrow As Long, maxcol As Long, colval1 As String, colval2 As String
Dim difference As Long, reportrow As Long, reportcol As Long, flag As Boolean
Dim rw As Long, cl As Long, pki As Long, pk As String, counter As Long
Dim cell1 As String, cell2 As String, bfailed As Boolean
Dim iPCT As Long, ws2rw As Long, rWS1cr As Range, rWS2cr As Range, keyval As Variant, app As Application
Set app = Application
'UserForm1.Visible = True
app.ScreenUpdating = False
'DoEvents
With ws1.Cells(1, 1).CurrentRegion
Set rWS1cr = .Cells
ws1row = .Rows.Count
ws1col = .Columns.Count
End With
With ws2.Cells(1, 1).CurrentRegion
Set rWS2cr = .Cells
ws2row = .Rows.Count
ws2col = .Columns.Count
End With
maxrow = ws1row
maxcol = ws1col
'pk = UCase(TestDataComparator.TextBox1.Value)
For cl = 1 To maxcol
If pk = UCase(rWS1cr.Cells(1, cl).Value) Then
pki = cl
Exit For
End If
Next cl
If maxrow < ws2row Then maxrow = ws2row
If maxcol < ws2col Then maxcol = ws2col
difference = 0
reportrow = 0
With rWS1cr
For rw = 2 To maxrow
keyval = ws1.Cells(rw, 1).Value
If Not IsError(app.Match(keyval, rWS2cr.Columns(1), 0)) Then
ws2rw = app.Match(keyval, rWS2cr.Columns(1), 0)
flag = False
For cl = 2 To maxcol
counter = counter + 1
cell1 = vbNullString
cell2 = vbNullString
cell1 = .Cells(rw, cl).Value
cell2 = rWS2cr.Cells(ws2rw, cl).Value
If cell1 <> cell2 Then
flag = True
'diffcolname = .Cells(1, cl)
.Cells(rw, cl).Interior.Color = RGB(255, 255, 0)
.Cells(1, cl).Interior.Color = RGB(255, 0, 0)
.Cells(rw, cl).Font.Bold = True
.Cells(1, pki).Interior.Color = RGB(0, 255, 0)
.Cells(rw, pki).Interior.Color = RGB(255, 255, 0)
.Cells(rw, pki).Font.Color = RGB(255, 0, 0)
.Cells(rw, pki).Font.Bold = True
End If
Next cl
reportrow = reportrow - CLng(flag)
If iPCT <> CLng((rw / maxrow) * 100) Then
iPCT = CLng((rw / maxrow) * 100)
app.StatusBar = "Progress - " & Format(iPCT, "0\%")
End If
End If
Next rw
For cl = 1 To maxcol
If .Cells(1, cl).Interior.Color = RGB(255, 0, 0) Then
difference = difference + 1
'TestDataComparator.AttributeNameList.AddItem (ws1.Cells(1, col))
End If
Next cl
MsgBox difference & " columns contain different data! ", vbInformation, "Comparing Two Worksheets"
End With
difference = 0
app.ScreenUpdating = True
app.StatusBar = vbNullString
Set app = Nothing
End Sub
I prefer .CurrentRegion
to .UsedRange
as I find it more reliable. This code was not tested but it does compile and I had to comment out a number of external references to get that to happen.
Upvotes: 0
Reputation: 1175
I think using a Dictionary (aka Hashtable in other languages) can make it faster. You will need to reference the Microsoft Scripting Runtime library.
You need to read ws2 key values with their row numbers into the Dictionary in one loop before you start going through ws1 row by row. Then in your loop you just look up the value in your dictionary to get its row number on ws2. Something like this:
Dim ws2keys As Dictionary
Set ws2keys = New Dictionary
' assuming you have a header row
For row = 2 To ws2.UsedRange.Rows.Count
keyValue = ws1.Cells(row, 1).Value
If keyValue <> "" Then ws2keys.Add(keyValue, row)
Next
' your dictionary is ready
Then in your loop, instead of using VLookup when going row by row on ws1:
ws2RowIndex = ws2keys.Item(ws1KeyValueYouAreLookingFor)
(The code might not be perfect, I do not have anything Microsoft related on this machine to check the syntax, sorry.)
Upvotes: 2