Reputation: 61
I have text in two cells with different text. I'm attempting to identify the diff (difference between the text: text that is added or missing) between the two cells
A1
I have a paragraph of text.B1
contains a paragraph that is similar, however there are minor differences.I'm attempting to identify the difference between these strings, please help me to identify those differences in both cells with color using VBA
Upvotes: 3
Views: 8731
Reputation: 14764
I have a solution for your problem and have uploaded a workbook with your sample string pairs. Here is the workbook.
My code is based on the Needleman–Wunsch algorithm, which was first developed in 1970 and is still used today to align DNA sequences in science tech. However I modified the alogrithm and added additional post-processing to work with your sample data string pairs.
Here is how to work the process. Enter your two strings to compare in A1 and A2.
Press Alt-F8 and run the macro, AlignStrings
.
The results will be displayed in cells A5 and A6.
Note that other sample string pairs can be found further down the sheet, beginning in cell A21.
Here is the code from the workbook that accomplishes the string pair alignment and highlighting of the differences:
Public Sub AlignStrings()
Dim a() As Byte, b() As Byte, a_$, b_$, i&, j&, d&, u&, l&, x&, y&, f&()
Const GAP = -1
Const PAD = "_"
a = [a1].Text: b = [a2].Text
[a3:a6].Clear
[a1:a6].Font.Name = "Courier New"
ReDim f(0 To UBound(b) \ 2 + 1, 0 To UBound(a) \ 2 + 1)
For i = 1 To UBound(f, 1)
For j = 1 To UBound(f, 2)
x = j - 1: y = i - 1
If a(x * 2) = b(y * 2) Then
d = 1 + f(y, x)
u = 0 + f(y, j)
l = 0 + f(i, x)
Else
d = -1 + f(y, x)
u = GAP + f(y, j)
l = GAP + f(i, x)
End If
f(i, j) = Max(d, u, l)
Next
Next
i = UBound(f, 1): j = UBound(f, 2)
On Error Resume Next
Do
x = j - 1: y = i - 1
d = f(y, x)
u = f(y, j)
l = f(i, x)
Select Case True
Case Err
If y < 0 Then GoTo left Else GoTo up
Case d >= u And d >= l Or Mid$(a, j, 1) = Mid$(b, i, 1)
diag:
a_ = Mid$(a, j, 1) & a_
b_ = Mid$(b, i, 1) & b_
i = i - 1: j = j - 1
Case u > l
up:
a_ = PAD & a_
b_ = Mid$(b, i, 1) & b_
i = i - 1
Case l > u
left:
a_ = Mid$(a, j, 1) & a_
b_ = PAD & b_
j = j - 1
End Select
Loop Until i < 1 And j < 1
DecorateStrings a_, b_, [a5], [a6], PAD
End Sub
Private Function Max(a&, b&, c&) As Long
Max = a
If b > a Then Max = b
If c > b Then Max = c
End Function
Private Sub DecorateStrings(a$, b$, rOutA As Range, rOutB As Range, PAD$)
Dim i&, j&
FloatArtifacts a, b, PAD
FloatArtifacts b, a, PAD
rOutA = a
rOutB = b
For i = 1 To Len(a)
If Mid$(a, i, 1) <> Mid$(b, i, 1) Then
If Mid$(a, i, 1) <> PAD Then
rOutA.Characters(i, 1).Font.Color = vbRed
End If
End If
Next
For i = 1 To Len(b)
If Mid$(a, i, 1) <> Mid$(b, i, 1) Then
If Mid$(b, i, 1) <> PAD Then
rOutB.Characters(i, 1).Font.Color = vbRed
End If
End If
Next
End Sub
Private Sub FloatArtifacts(s1$, s2$, PAD$)
Dim c&, k&, i&, p&
For i = 1 To Len(s1)
c = InStr(i, s1, PAD)
If c Then
k = 0
Do
k = k + 1
If Mid$(s1, c + k, 1) <> PAD Then
If Mid$(s2, c, 1) = Mid$(s1, c + k, 1) Then
p = InStr(c + k, s1, PAD)
If p < (c + k + 6) And p > 0 Then
Mid$(s1, c, 1) = Mid$(s1, c + k, 1)
Mid$(s1, c + k, 1) = PAD
i = c
Exit Do
Else
i = c + k
Exit Do
End If
Else
i = c + k
Exit Do
End If
End If
If c + k > Len(s1) Then Exit Do
Loop
Else
Exit For
End If
Next
End Sub
Upvotes: 8