my Name is Ammu
my Name is Ammu

Reputation: 61

Compare (diff) strings in two cells by character

I have text in two cells with different text. I'm attempting to identify the diff (difference between the text: text that is added or missing) between the two cells

  1. In A1I have a paragraph of text.
  2. B1 contains a paragraph that is similar, however there are minor differences.

I'm attempting to identify the difference between these strings, please help me to identify those differences in both cells with color using VBA

Upvotes: 3

Views: 8731

Answers (1)

Excel Hero
Excel Hero

Reputation: 14764

I have a solution for your problem and have uploaded a workbook with your sample string pairs. Here is the workbook.

My code is based on the Needleman–Wunsch algorithm, which was first developed in 1970 and is still used today to align DNA sequences in science tech. However I modified the alogrithm and added additional post-processing to work with your sample data string pairs.

Here is how to work the process. Enter your two strings to compare in A1 and A2.

Press Alt-F8 and run the macro, AlignStrings.

The results will be displayed in cells A5 and A6.

Note that other sample string pairs can be found further down the sheet, beginning in cell A21.

Here is the code from the workbook that accomplishes the string pair alignment and highlighting of the differences:

Public Sub AlignStrings()
    Dim a() As Byte, b() As Byte, a_$, b_$, i&, j&, d&, u&, l&, x&, y&, f&()
    Const GAP = -1
    Const PAD = "_"

    a = [a1].Text: b = [a2].Text
    [a3:a6].Clear
    [a1:a6].Font.Name = "Courier New"

    ReDim f(0 To UBound(b) \ 2 + 1, 0 To UBound(a) \ 2 + 1)

    For i = 1 To UBound(f, 1)
        For j = 1 To UBound(f, 2)
            x = j - 1: y = i - 1
            If a(x * 2) = b(y * 2) Then
                d = 1 + f(y, x)
                u = 0 + f(y, j)
                l = 0 + f(i, x)
            Else
                d = -1 + f(y, x)
                u = GAP + f(y, j)
                l = GAP + f(i, x)
            End If
            f(i, j) = Max(d, u, l)
        Next
    Next

    i = UBound(f, 1): j = UBound(f, 2)
    On Error Resume Next
    Do
        x = j - 1: y = i - 1
        d = f(y, x)
        u = f(y, j)
        l = f(i, x)
        Select Case True
            Case Err
                If y < 0 Then GoTo left Else GoTo up
            Case d >= u And d >= l Or Mid$(a, j, 1) = Mid$(b, i, 1)
diag:
                a_ = Mid$(a, j, 1) & a_
                b_ = Mid$(b, i, 1) & b_
                i = i - 1: j = j - 1
            Case u > l
up:
                a_ = PAD & a_
                b_ = Mid$(b, i, 1) & b_
                i = i - 1
            Case l > u
left:
                a_ = Mid$(a, j, 1) & a_
                b_ = PAD & b_
                j = j - 1
        End Select
    Loop Until i < 1 And j < 1

    DecorateStrings a_, b_, [a5], [a6], PAD

End Sub


Private Function Max(a&, b&, c&) As Long
    Max = a
    If b > a Then Max = b
    If c > b Then Max = c
End Function


Private Sub DecorateStrings(a$, b$, rOutA As Range, rOutB As Range, PAD$)
    Dim i&, j&

    FloatArtifacts a, b, PAD
    FloatArtifacts b, a, PAD

    rOutA = a
    rOutB = b

    For i = 1 To Len(a)
        If Mid$(a, i, 1) <> Mid$(b, i, 1) Then
            If Mid$(a, i, 1) <> PAD Then
                rOutA.Characters(i, 1).Font.Color = vbRed
            End If
        End If
    Next
    For i = 1 To Len(b)
        If Mid$(a, i, 1) <> Mid$(b, i, 1) Then
            If Mid$(b, i, 1) <> PAD Then
                rOutB.Characters(i, 1).Font.Color = vbRed
            End If
        End If
    Next

End Sub


Private Sub FloatArtifacts(s1$, s2$, PAD$)
    Dim c&, k&, i&, p&
    For i = 1 To Len(s1)
        c = InStr(i, s1, PAD)
        If c Then
            k = 0
            Do
                k = k + 1
                If Mid$(s1, c + k, 1) <> PAD Then
                    If Mid$(s2, c, 1) = Mid$(s1, c + k, 1) Then
                        p = InStr(c + k, s1, PAD)
                        If p < (c + k + 6) And p > 0 Then
                            Mid$(s1, c, 1) = Mid$(s1, c + k, 1)
                            Mid$(s1, c + k, 1) = PAD
                            i = c
                            Exit Do
                        Else
                            i = c + k
                            Exit Do
                        End If
                    Else
                        i = c + k
                        Exit Do
                    End If
                End If
                If c + k > Len(s1) Then Exit Do
            Loop
        Else
            Exit For
        End If
    Next
End Sub

Upvotes: 8

Related Questions