Nick
Nick

Reputation: 45

Delete value from array then resize

For reference, here is the whole code.

  Sub CalculateOutliers()

                Dim n As Long
                Dim mean As Double
                Dim SD As Double
                Dim k As Long
                Dim DataSet As Variant
                Dim LowConf As Double
                Dim HighConf As Double


            '--------------------------------------------------------
                DataSet = Selection.Value
            'Copies highlighted data into DataSet variable
            'Cell A1 is (1,1) Because it starts at 0 which is out of range
            '--------------------------------------------------------



            '--------------------------------------------------------
                n = Selection.CountLarge
            'Counts number of entries
            '--------------------------------------------------------



            '--------------------------------------------------------
             'DEFINES 95(LowConf) AND 99(HighConf) PERCENT CONFIDENCES BASED ON
             'SAMPLE SIZE

                If n <= 5 Then
                        LowConf = 1.72
                        HighConf = 1.76
                End If

                If n = 6 Then
                        LowConf = 1.89
                        HighConf = 1.97
                End If

                If n = 7 Then
                        LowConf = 2.02
                        HighConf = 2.14
                End If

                If n = 8 Then
                        LowConf = 2.13
                        HighConf = 2.28
                End If

                If n = 9 Then
                        LowConf = 2.21
                        HighConf = 2.39
                End If

                If n = 10 Then
                        LowConf = 2.29
                        HighConf = 2.48
                End If

                If n = 11 Then
                        LowConf = 2.36
                        HighConf = 2.56
                End If

                If n = 12 Then
                        LowConf = 2.41
                        HighConf = 2.64
                End If

                If n = 13 Then
                        LowConf = 2.46
                        HighConf = 2.7
                End If

                If n = 14 Then
                        LowConf = 2.51
                        HighConf = 2.75
                End If

                If n = 15 Then
                        LowConf = 2.55
                        HighConf = 2.81
                End If

                If n = 16 Then
                        LowConf = 2.59
                        HighConf = 2.85
                End If

                If n = 17 Then
                        LowConf = 2.62
                        HighConf = 2.9
                End If

                If n = 18 Then
                        LowConf = 2.65
                        HighConf = 2.93
                End If

                If n = 19 Then
                        LowConf = 2.68
                        HighConf = 2.97
                End If

                If n = 20 Then
                        LowConf = 2.71
                        HighConf = 3
                End If

                If n = 21 Then
                        LowConf = 2.73
                        HighConf = 3.03
                End If

                If n = 22 Then
                        LowConf = 2.76
                        HighConf = 3.06
                End If

                If n = 23 Then
                        LowConf = 2.78
                        HighConf = 3.08
                End If

                If n = 24 Then
                        LowConf = 2.8
                        HighConf = 3.11
                End If

                If n = 25 Then
                        LowConf = 2.82
                        HighConf = 3.14
                End If

                If n = 26 Then
                        LowConf = 2.84
                        HighConf = 3.16
                End If

                If n = 27 Then
                        LowConf = 2.86
                        HighConf = 3.18
                End If

                If n = 28 Then
                        LowConf = 2.88
                        HighConf = 3.2
                End If

                If n = 29 Then
                        LowConf = 2.89
                        HighConf = 3.22
                End If

                If n = 30 Then
                        LowConf = 2.91
                        HighConf = 3.24
                End If

                If n <= 35 And n > 30 Then
                        LowConf = 2.98
                        HighConf = 3.32
                End If

                If n <= 40 And n > 35 Then
                        LowConf = 3.04
                        HighConf = 3.38
                End If

                If n <= 45 And n > 40 Then
                        LowConf = 3.09
                        HighConf = 3.44
                End If

                If n <= 50 And n > 45 Then
                        LowConf = 3.13
                        HighConf = 3.48
                End If

                If n <= 60 And n > 50 Then
                        LowConf = 3.2
                        HighConf = 3.56
                End If

                If n <= 70 And n > 60 Then
                        LowConf = 3.26
                        HighConf = 3.62
                End If

                If n <= 80 And n > 70 Then
                        LowConf = 3.31
                        HighConf = 3.67
                End If

                If n <= 90 And n > 80 Then
                        LowConf = 3.35
                        HighConf = 3.72
                End If

                If n <= 100 And n > 90 Then
                        LowConf = 3.38
                        HighConf = 3.75
                End If

                If n <= 150 And n > 100 Then
                        LowConf = 3.52
                        HighConf = 3.89
                End If

                If n <= 200 And n > 150 Then
                        LowConf = 3.61
                        HighConf = 3.98
                End If

                If n <= 300 And n > 200 Then
                        LowConf = 3.72
                        HighConf = 4.09
                End If

                If n <= 400 And n > 300 Then
                        LowConf = 3.8
                        HighConf = 4.17
                End If

                If n <= 500 And n > 400 Then
                        LowConf = 3.86
                        HighConf = 4.32
                End If

                If n > 500 Then
                        MsgBox "Sample size cannot exceed 500."
                End If
            '--------------------------------------------------------



            '--------------------------------------------------------
                If n < 50 Then
                    k = Int(n / 10)
                Else
                    k = 5
                End If
            'determines k = number of possible outliers
            '--------------------------------------------------------

            Dim i As Long

            For i = 1 To k
            '--------------------------------------------------------
                mean = Application.WorksheetFunction.Average(DataSet)
            'Calculates mean of Data Set
            '--------------------------------------------------------



            '--------------------------------------------------------
                SD = Application.WorksheetFunction.StDev(DataSet)
            'Calculates Standard Deviation of Data Set
            '--------------------------------------------------------



            '--------------------------------------------------------
                Dim Suspect As Double


                If (Abs(Application.WorksheetFunction.Max(DataSet) - mean)) > (Abs(Application.WorksheetFunction.Min(DataSet) - mean)) Then

                    Suspect = Application.WorksheetFunction.Max(DataSet)

                End If

                If (Abs(Application.WorksheetFunction.Max(DataSet) - mean)) < (Abs(Application.WorksheetFunction.Min(DataSet) - mean)) Then

                    Suspect = Application.WorksheetFunction.Min(DataSet)

                End If
            'Defines what the most outlying value is
            '--------------------------------------------------------



            '--------------------------------------------------------
                Dim Retest As Boolean

                If (Abs(Suspect - mean) / SD) > LowConf Then
                    MsgBox "95% outlier: " & Suspect
                    Retest = True
                End If

                If (Abs(Suspect - mean) / SD) > HighConf Then
                    MsgBox "99% outlier: " & Suspect
                    Retest = True
                End If

                If Retest = True Then

            '--------------------------------------------------------
            Next i


            End Sub

Talking about this chunk of code, I need to delete Suspect from DataSet then shrink DataSet by 1 but I am not sure how to go about this. Is there a function that will delete the Max or Min value and is there a way to resize DataSet given that it is a variant?

                        Dim Retest As Boolean

                        If (Abs(Suspect - mean) / SD) > LowConf Then

                            MsgBox "95% outlier: " & Suspect
                            Retest = True

                    End If

                        If (Abs(Suspect - mean) / SD) > HighConf Then

                            MsgBox "99% outlier: " & Suspect
                            Retest = True

                    End If

                        If Retest = True Then

Upvotes: 0

Views: 676

Answers (2)

paul bica
paul bica

Reputation: 10715

Here is how I would implement the solution (similar to @Tim's comment)

Sub calculateOutliers()
    Dim retest As Boolean, dataSet As Variant, n As Long, found As Range
    Dim lowConf As Double, highConf As Double, suspect As Double, tmp As Double
    Dim mean As Double, stdDev As Double, dataMin As Double, dataMax As Double

    retest = True
    While retest
        dataSet = Selection.Value
        n = Selection.CountLarge
        If Not IsEmpty(dataSet) Then
            updateConfidences dataSet, n, lowConf, highConf
            retest = False
            With Application.WorksheetFunction
                mean = .Average(dataSet)
                stdDev = .StDev(dataSet)
                dataMin = .Min(dataSet)
                dataMax = .Max(dataSet)
            End With
            suspect = IIf(Abs(dataMax - mean) > Abs(dataMin - mean), dataMax, dataMin)
            tmp = Abs(suspect - mean) / stdDev
            Set found = Selection.Find(What:=suspect, LookAt:=xlWhole)
            If tmp > lowConf Then
                With Selection
                    found.Interior.Color = RGB(255, 255, 0)
                    .Value = removeItemFromArray(dataSet, suspect)
                End With
                retest = True
            End If
            If tmp > highConf Then
                found.Interior.Color = RGB(255, 0, 0)
                retest = True
            End If
        End If
    Wend
End Sub

'updates lowConf and highConf (byref)
Private Sub updateConfidences(ByRef dataSet As Variant, ByVal n As Long, _
                              ByRef lowConf As Double, ByRef highConf As Double)

    'DEFINES 95(LowConf) AND 99(HighConf) PERCENT CONFIDENCES BASED ON SAMPLE SIZE
    Select Case True
        Case n <= 5:   lowConf = 1.72:   highConf = 1.76
        Case n = 6:    lowConf = 1.89:   highConf = 1.97
        Case n = 7:    lowConf = 2.02:   highConf = 2.14
        Case n = 8:    lowConf = 2.13:   highConf = 2.28
        Case n = 9:    lowConf = 2.21:   highConf = 2.39
        Case n = 10:   lowConf = 2.29:   highConf = 2.48
        Case n = 11:   lowConf = 2.36:   highConf = 2.56
        Case n = 12:   lowConf = 2.41:   highConf = 2.64
        Case n = 13:   lowConf = 2.46:   highConf = 2.7
        Case n = 14:   lowConf = 2.51:   highConf = 2.75
        Case n = 15:   lowConf = 2.55:   highConf = 2.81
        Case n = 16:   lowConf = 2.59:   highConf = 2.85
        Case n = 17:   lowConf = 2.62:   highConf = 2.9
        Case n = 18:   lowConf = 2.65:   highConf = 2.93
        Case n = 19:   lowConf = 2.68:   highConf = 2.97
        Case n = 20:   lowConf = 2.71:   highConf = 3
        Case n = 21:   lowConf = 2.73:   highConf = 3.03
        Case n = 22:   lowConf = 2.76:   highConf = 3.06
        Case n = 23:   lowConf = 2.78:   highConf = 3.08
        Case n = 24:   lowConf = 2.8:    highConf = 3.11
        Case n = 25:   lowConf = 2.82:   highConf = 3.14
        Case n = 26:   lowConf = 2.84:   highConf = 3.16
        Case n = 27:   lowConf = 2.86:   highConf = 3.18
        Case n = 28:   lowConf = 2.88:   highConf = 3.2
        Case n = 29:   lowConf = 2.89:   highConf = 3.22
        Case n = 30:   lowConf = 2.91:   highConf = 3.24
        Case n <= 35:  lowConf = 2.98:   highConf = 3.32
        Case n <= 40:  lowConf = 3.04:   highConf = 3.38
        Case n <= 45:  lowConf = 3.09:   highConf = 3.44
        Case n <= 50:  lowConf = 3.13:   highConf = 3.48
        Case n <= 60:  lowConf = 3.2:    highConf = 3.56
        Case n <= 70:  lowConf = 3.26:   highConf = 3.62
        Case n <= 80:  lowConf = 3.31:   highConf = 3.67
        Case n <= 90:  lowConf = 3.35:   highConf = 3.72
        Case n <= 100: lowConf = 3.38:   highConf = 3.75
        Case n <= 150: lowConf = 3.52:   highConf = 3.89
        Case n <= 200: lowConf = 3.61:   highConf = 3.98
        Case n <= 300: lowConf = 3.72:   highConf = 4.09
        Case n <= 400: lowConf = 3.8:    highConf = 4.17
        Case n <= 500: lowConf = 3.86:   highConf = 4.32
        Case n > 500:  MsgBox "Sample size cannot exceed 500."
    End Select
End Sub

Private Function removeItemFromArray(ByRef initialArray As Variant, _
                                     ByVal suspect As Double) As Variant

    Const dex As String = "#,###.00000"
    Dim i As Long, j As Long, dim1 As Long, dim2 As Long, arrayCopy As Variant

    dim1 = UBound(initialArray, 1)
    dim2 = UBound(initialArray, 2)

    ReDim arrayCopy(1 To dim1, 1 To dim2)

    For i = 1 To dim1
        For j = 1 To dim2
            If Format(initialArray(i, j), dex) <> Format(suspect, dex) Then
                arrayCopy(i, j) = initialArray(i, j)
            End If
        Next j
    Next i

    removeItemFromArray = arrayCopy

End Function

enter image description hereenter image description here

Upvotes: 1

xidgel
xidgel

Reputation: 3145

One approach is to sort the data, either in excel before reading it into the vba routine, or in vba see for example Chip Pearson. With sorted data, you can remove outliers by changing array bounds, e.g. DataSet(3) to DataSet(n-5) has removed 2 outliers on the low side and 5 on the high side.

Hope that helps

Upvotes: 1

Related Questions