sdpitzer
sdpitzer

Reputation: 11

Compare two words and return the number of letter differences

The macro is written to return the number of letter differences (insertions, replacements, or deletions) of two words (case sensitive).

It is suppose to format and output in phrases
1-2 Letters off,
1-2 Letters off, Same Starting Letter,
3-4 Letters off,
3-4 Letters off, Same Starting Letter and
5 or more letters off, CHECK

enter image description here

enter image description here

It is only outputting
1-2 Letters off, Same Starting Letter,
3-4 Letters off, Same Starting Letter and
5 or more Letters off, CHECK

I would like the formatting to stay the same for now.

Sub Test_HW_Formatter()
'declare the variables
Dim i As Integer
Dim j As Integer
Dim k As Integer
Dim testNames As Integer
Dim responses As Integer
Dim printRow As Integer
Dim name As String
Dim count As Integer
Dim coding As String
Dim statLetter As Boolean
Dim tempCount As Integer
Dim tempResp As String
'the queues for the entries, the respective counts, and respective codes
Dim words As Object
Set words = CreateObject("System.Collections.Queue")
Dim counts As Object
Set counts = CreateObject("System.Collections.Queue")
Dim codes As Object
Set codes = CreateObject("System.Collections.Queue")

'set the variables
printRow = 3
testNames = Selection.Columns.count
responses = Selection.Rows.count - 1

Cells(4, 3).Value = Selection(4)
startLetter = True
'make the header
Cells(1, 1).Value = "Name"
Cells(1, 2).Value = "Response"
Cells(1, 3).Value = "Count"
Cells(1, 4).Value = "Code"
Cells(1, 5).Value = "Agency close matches"
Cells(1, 6).Value = "N=" + Trim(Str(responses))
Cells(1, 6).Interior.Color = RGB(255, 255, 204)
Cells(1, 6).HorizontalAlignment = xlCenter
For i = 1 To 5
    Cells(1, i).Interior.Color = RGB(1, 139, 175)
    Cells(1, i).Font.Color = RGB(255, 255, 255)
    Cells(1, i).HorizontalAlignment = xlCenter
Next i

'get the information and put it in the queues
For i = 0 To (testNames - 1)
    name = Selection(i + 1).Value
    For j = 1 To responses
        count = 1
        If Not Selection(j * testNames + i + 1) = "" Then
            For k = 1 To (responses - j)
                If Not Selection((j + k) * testNames + i + 1).Value = "" Then
                    If Trim(UCase(Selection(j * testNames + i + 1).Value)) = Trim(UCase(Selection((j + k) * testNames + i + 1).Value)) Then
                        count = count + 1
                        Selection((j + k) * testNames + i + 1).Value = ""
                    End If
                End If
            Next k
            'get the coding
            coding = ""
            ld = Levenshtein(name, Trim(UCase(Selection(j * testNames + i + 1))))
        If Mid(testName, 1, 1) = Mid(sample, 1, 1) Then
            startLetter = True
        Else
            startLetter = False
            End If 'if for starting letter
            Select Case ld
            Case 0
                coding = "Exact Match"
            Case 1
                If startLetter = True Then
                    coding = "1-2 Letters off, Same Starting Letter"
                Else
                    coding = "1-2 Letters off"
                End If
            Case 2
                If startLetter = True Then
                    coding = "1-2 Letters off, Same Starting Letter"
                Else
                    coding = "1-2 Letters off"
                End If
            Case 3
                If startLetter = True Then
                    coding = "3-4 Letters off, Same Starting Letter"
                Else
                    coding = "3-4 Letters off"
                End If
            Case 4
                If startLetter = True Then
                    coding = "3-4 Letters off, Same Starting Letter"
                Else
                coding = "3-4 Letters off"
                End If
            Case Else
                coding = "5 or more Letters off, CHECK"
            End Select
            'enqueue the values
            tempResp = UCase(Mid(Selection(j * testNames + i + 1).Value, 1, 1)) + LCase(Mid(Selection(j * testNames + i + 1).Value, 2, Len(Selection(j * testNames + i + 1).Value)))
            words.enqueue (tempResp)
            counts.enqueue (count)
            codes.enqueue (coding)
        End If 'if the cell is not blank
    Next j
    'print the queues from the ith column
    'start the section header
    Cells(printRow, 1).Value = name
    Cells(printRow, 1).Font.Color = RGB(255, 255, 255)
    For k = 1 To 5
        Cells(printRow, k).Interior.Color = RGB(1, 139, 175)
        Cells(printRow, k).HorizontalAlignment = xlCenter
    Next k
    tempCount = counts.count
    Cells(150, 20 + i).Value = tempCount
    For k = 1 To tempCount
        Cells(printRow + k, 2).Value = words.dequeue
        Cells(printRow + k, 3).Value = counts.dequeue
        Cells(printRow + k, 4).Value = codes.dequeue
        If Cells(printRow + k, 4).Value = "Exact Match" Then
            Cells(printRow + k, 4).Interior.Color = RGB(236, 239, 218)
        End If
    Next k
    printRow = printRow + tempCount + 2
Next i

End Sub

Upvotes: 0

Views: 141

Answers (1)

Tim Williams
Tim Williams

Reputation: 166271

Edited to add counting replicates of the same name, and skip empty values:

Sub Test_HW_Formatter()

    Dim arr, numReps As Long, ws As Worksheet, col As Long, c As Range
    Dim nm As String, rep As Long, cmp As String
    Dim i As Long, dict As Object, tmp
    
    arr = Selection.Value                    'inputs
    numReps = UBound(arr, 1) - 1             'reps per column
    
    Set ws = Selection.Parent                'sheet with selection
    With ws.Range("A1:E1")
        .Value = Array("Name", "Response", "Count", "Code", "Agency Close match")
        doHeaders .Cells
    End With
    ws.Range("F1").Value = "N=" & numReps
    
    
    Set c = ws.Range("A3")                   'start of output sections
    For col = 1 To UBound(arr, 2)            'loop columns of selection
        
        nm = arr(1, col)
        c.Value = nm
        doHeaders c.Resize(1, 5)             'format headers
        i = 0
        Set dict = CreateObject("scripting.dictionary")
        
        For rep = 1 To numReps               'loop values to compare
            
            cmp = arr(rep + 1, col)
            If Len(cmp) > 0 Then
                If Not dict.exists(cmp) Then
                    i = i + 1
                    dict.Add cmp, i
                    c.Offset(i, 1).Value = cmp
                    c.Offset(i, 2) = 1
                    c.Offset(i, 3).Value = MatchCoding(nm, cmp) 'now in separate function
                Else
                    'increment count for existing line
                    c.Offset(dict(cmp), 2).Value = c.Offset(dict(cmp), 2).Value + 1
                End If
            
            End If 'not zero-length
        Next rep
        
        Set c = c.Offset(i + 2, 0) 'next set
    Next col
    
End Sub

'return a string summarizing how closeley two terms match
Function MatchCoding(nm As String, cmp As String)
    Dim ld As Long, firstMatch As Boolean
    firstMatch = (Left(nm, 1) = Left(cmp, 1))
                    
    ld = Levenshtein(nm, cmp)
    
    Select Case ld
        Case 0: MatchCoding = "Exact Match"
        Case 1, 2: MatchCoding = "1-2 Letters off"
        Case 3, 4: MatchCoding = "3-4 Letters off"
        Case Else: MatchCoding = "5 or more Letters off, CHECK"
    End Select
    If ld > 0 And ld < 5 Then MatchCoding = MatchCoding & _
            IIf(firstMatch, ", Same Starting Letter", "")
End Function

'utility sub for formatting headers
Sub doHeaders(rng As Range)
    With rng
        .Interior.Color = RGB(1, 139, 175)
        .Font.Color = RGB(255, 255, 255)
        .HorizontalAlignment = xlCenter
    End With
End Sub

Upvotes: 1

Related Questions