Catalin Vasilescu
Catalin Vasilescu

Reputation: 51

Insert a new value if there is a duplicate

I have a table with multiple duplicates (Log in) for an id. Normally after each "log in" there must be a "log out". If yes, then I do not have to do anything. If after a "log in" there is no "log out" then I have to create one at the and of the day (23:59:59).

I have the following table:

  Id      Status     Date    
  A      Log in      01.01.2018  01:44:03
  A      Log out     01.01.2018  02:57:03
  C      Log in      01.01.2018  01:55:03
 ser     Log in      01.01.2018  01:59:55
 ser     Log out     03.01.2018  01:59:55
  M      Log in      04.01.2018  01:59:55

The table should look like this:

 Id      Status     Date    
 A      Log in      01.01.2018  01:44:03
 A      Log out     01.01.2018  02:57:03
 C      Log in      01.01.2018  01:59:03
 C      Log out     01.01.2018  23:59:59  
ser     Log in      01.01.2018  01:59:55
ser     Log out     03.01.2018  01:59:55
 M      Log in      04.01.2018  01:59:55
 M      Log out     04.01.2018  23:59:59

A formula like this

=IF(OR(AND(A2=A3,B2="Log in",B3="Log out"),AND(A2=A1,B2="Log Out",B1="Log in")),"Keep","You need to insert a log out")

could help me to see if afer a "log in" exists a "log out", however she does not help me to insert a a new line in sheet. Any idea how I can do this? Do you think I need vba?

*if there is a "log out" after a "log out" for the same id, both "log outs" will be removed

Upvotes: 1

Views: 109

Answers (2)

z32a7ul
z32a7ul

Reputation: 3777

You can use VBA for this:

Option Explicit

' Tools > References > Microsoft Scripting Runtime

' dctIds
'   Id => dctSessions
'           LogIn => dctSession
'                      "Id" => String
'                      "LogIn" => Date
'                      "LogOut" => Date

Public Sub ExtendData()
    Dim dctIds As Dictionary: Set dctIds = New Dictionary
    ReadData dctIds, ThisWorkbook.Worksheets("Input")
    WriteData_v1 dctIds, ThisWorkbook.Worksheets("Output_v1")
    WriteData_v2 dctIds, ThisWorkbook.Worksheets("Output_v2")
End Sub

Private Sub ReadData(dctIds As Dictionary, ewsInput As Worksheet)
    ' Assumption: header in first row, data starts in second row
    Dim r As Long: For r = 2 To ewsInput.UsedRange.Row + ewsInput.UsedRange.Rows.Count - 1
        ' Assumption: Id is in first column
        Dim strId As String: strId = ewsInput.Cells(r, 1).Value
        ' Assumption: Status is in second column
        Dim strStatus As String: strStatus = ewsInput.Cells(r, 2).Value
        ' Assumption: Date is in third column, and ms precision is enough, change data type to Double and use Value2 instead of Value otherwise
        Dim datDate As Date: datDate = ewsInput.Cells(r, 3).Value

        Dim dctSessions As Dictionary
        If dctIds.Exists(strId) = False Then
            Set dctSessions = New Dictionary
            dctIds.Add strId, dctSessions
        Else
            Set dctSessions = dctIds(strId)
        End If

        If strStatus = "Log in" Then
            Dim dctSessionNew As Dictionary: Set dctSessionNew = New Dictionary
            dctSessionNew.Add "Id", strId
            dctSessionNew.Add "Status", strStatus
            dctSessionNew.Add "LogIn", datDate
            dctSessions.Add datDate, dctSessionNew
        ElseIf strStatus = "Log out" Then
            Dim dctSessionLast As Dictionary: Set dctSessionLast = Nothing
            Dim varSessionFound As Variant: For Each varSessionFound In dctSessions.Items
                Dim dctSessionFound As Dictionary: Set dctSessionFound = varSessionFound
                If dctSessionLast Is Nothing Then
                    Set dctSessionLast = dctSessionFound
                ElseIf dctSessionLast("LogIn") <= dctSessionFound("LogIn") Then
                    Set dctSessionLast = dctSessionFound
                End If
            Next varSessionFound
            If Not dctSessionLast Is Nothing Then
                dctSessionLast.Add "LogOut", datDate
            Else
'                Debug.Print "No Log in before Log out in row " & r
                Dim dctSessionOvernight As Dictionary: Set dctSessionOvernight = New Dictionary
                dctSessionOvernight.Add "Id", strId
                dctSessionOvernight.Add "Status", strStatus
                dctSessionOvernight.Add "LogIn", DateValue(datDate) + TimeSerial(0, 0, 0)
                dctSessionOvernight.Add "LogOut", datDate
                dctSessions.Add dctSessionOvernight("LogIn"), dctSessionOvernight

            End If
        Else
            Debug.Print "Invalid Status in row " & r
        End If
    Next r
End Sub

Private Sub WriteData_v1(dctIds As Dictionary, ewsOutput As Worksheet)
    ' Assumption: header in first row, data starts in second row
    Dim r As Long: r = 2
    Dim varSessions As Variant: For Each varSessions In dctIds.Items
        Dim dctSessions As Dictionary: Set dctSessions = varSessions
        Dim varSession As Variant: For Each varSession In dctSessions.Items
            Dim dctSession As Dictionary: Set dctSession = varSession
            ' Assumption: Id is in first column
            ewsOutput.Cells(r, 1).Value = dctSession("Id")
            ' Assumption: Status is in second column
            ewsOutput.Cells(r, 2).Value = dctSession("Status")
            ' Assumption: Date is in third column, and ms precision is enough, change data type to Double and use Value2 instead of Value otherwise
            ewsOutput.Cells(r, 3).Value = dctSession("LogIn")
            r = r + 1
            ' Assumption: Id is in first column
            ewsOutput.Cells(r, 1).Value = dctSession("Id")
            ' Assumption: Status is in second column
            ewsOutput.Cells(r, 2).Value = dctSession("Status")
            ' Assumption: Date is in third column, and ms precision is enough, change data type to Double and use Value2 instead of Value otherwise
            With ewsOutput.Cells(r, 3)
                If dctSessions.Exists("LogOut") Then
                    .Value = dctSession("LogOut")
                Else
                    .Value = DateValue(dctSession("LogIn")) + TimeSerial(23, 59, 59)
                End If
            End With
            r = r + 1
        Next varSession
    Next varSessions
End Sub


Private Sub WriteData_v2(dctIds As Dictionary, ewsOutput As Worksheet)
    ' Assumption: header in first row, data starts in second row
    Dim r As Long: r = 2
    Dim varSessions As Variant: For Each varSessions In dctIds.Items
        Dim dctSessions As Dictionary: Set dctSessions = varSessions
        Dim varSession As Variant: For Each varSession In dctSessions.Items
            Dim dctSession As Dictionary: Set dctSession = varSession
            ' Assumption: Id is in first column
            ewsOutput.Cells(r, 1).Value = dctSession("Id")
            ' Assumption: Status is in second column
            ewsOutput.Cells(r, 2).Value = dctSession("Status")
            ' Assumption: LogIn is in third column, and ms precision is enough, change data type to Double and use Value2 instead of Value otherwise
            ewsOutput.Cells(r, 3).Value = dctSession("LogIn")
            ' Assumption: LogOut is in fourth column, and ms precision is enough, change data type to Double and use Value2 instead of Value otherwise
            With ewsOutput.Cells(r, 4)
                If dctSessions.Exists("LogOut") Then
                    .Value = dctSession("LogOut")
                Else
                    .Value = DateValue(dctSession("LogIn")) + TimeSerial(23, 59, 59)
                End If
            End With
            r = r + 1
        Next varSession
    Next varSessions
End Sub

As you can see, my macro can create to outputs:

v1: The way you asked for: the original rows + additional rows to close sessions at the end of the day

v2: The table format recommended by me and others: each session is a row with two dates (log in and log out), where the second date is the end of the day, if missing from the original table

It will look like this after running the macro:

Log In and Out Picture

Note: The header (Id, Status, etc.) was not created by the macro but manually.

Update:

Having read the OP's comments to PEH's solution, I modified the error handling code ("No Log in before Log out in row x").

This way, the code will also enter log in dates if it finds log out dates. This is useful because if you allow overnight sessions, it's not enough to close log in events that will end the next day but you should also open the sessions that were started the day before.

Sessions that last several days are still not managed by this code (it would require to analyse the logs of all days).

Regarding the three consecutive log outs: this should be considered an error, and should not be hidden by a program code because it requires further investigation (Why did it happen?).

Upvotes: 2

Pᴇʜ
Pᴇʜ

Reputation: 57683

Imagine the following data. The blue column is what we would assume the code should do:

enter image description here

Option Explicit

Public Sub AddMissingLogoutLines()
    Dim ws As Worksheet
    Set ws = ThisWorkbook.Worksheets("Data")

    Dim iRow As Long
    iRow = 2 'start in row 2

    Do Until ws.Cells(iRow, "A").Value = vbNullString
        If ws.Cells(iRow, "B").Value = "Log in" Then 'we are in a login line …
            If ws.Cells(iRow + 1, "B").Value = "Log out" And ws.Cells(iRow + 1, "A").Value = ws.Cells(iRow, "A").Value Then
                'login line followed by its logout line
                'this is what we want so we do nothing
            Else 'login line followed by a login line or a mismatching logout line
                'logout is missing add it
                ws.Rows(iRow + 1).Insert Shift:=xlDown
                ws.Cells(iRow + 1, "A").Value = ws.Cells(iRow, "A").Value
                ws.Cells(iRow + 1, "B").Value = "Log out"
                ws.Cells(iRow + 1, "C").Value = DateValue(ws.Cells(iRow, "C").Value) + (1 - (1 / 24 / 60 / 60))
            End If
            iRow = iRow + 2
        Else  'we are in a logout line …
            If ws.Cells(iRow + 1, "B").Value = "Log out" Then 'logout line followed by a logout line
                'logout after logout so delete both
                ws.Range(iRow & ":" & iRow + 1).Delete
            Else 'everything is ok go to next line
                iRow = iRow + 1
                'if you want to remove single `log out` lines with no login line too, then replace the iRow = iRow + 1 above with ws.Rows(iRow).Delete here
            End If
        End If
    Loop
End Sub

After the code is run we see that 2 Log out lines were deleted and the 2 missing Log out lines that were missing for the Log in lines were created.

enter image description here

Upvotes: 2

Related Questions