iwuzborn
iwuzborn

Reputation: 13

R code to identify whether individuals in groups have previously been in groups together

I have a dataset of individuals who belong to different groups over time, sometimes with the same group members, sometimes with new members.

I'm trying to create a variable which displays a count of the number of times each individual in a group has been in an earlier group with any of the current group members, using the Date information to make sure the count is in chronological order (i.e. not counting shared membership from groups in the future).

Example data:

SampleData <- tribble(~ID, ~GROUP_NUM, ~Date,
               "abc", 22,"2022-01-15", 
               "def", 22,"2022-01-15", 
               "ghi", 22,"2022-01-15", 
               "jkl", 22,"2022-01-15", 
               "abc", 14,"2022-02-19", 
               "mno", 14,"2022-02-19", 
               "pqr", 14,"2022-02-19", 
               "stv", 14,"2022-02-19", 
               "abc", 18,"2022-05-11", 
               "stv", 18,"2022-05-11", 
               "wxy", 18,"2022-05-11", 
               "zzz", 18,"2022-05-11", 
               "abc", 35,"2022-10-06", 
               "def", 35,"2022-10-06", 
               "pqr", 35,"2022-10-06", 
               "bbb", 35,"2022-10-06", 
               "abc", 44,"2021-04-14", 
               "stv", 44,"2021-04-14", 
               "pqr", 44,"2021-04-14", 
               "bbb", 44,"2021-04-14")

Here's my desired output:

AimedData <- tribble(~ID, ~GROUP_NUM, ~Date, ~NPrevKnown,
                      "abc", 22,"2022-01-15", 0,
                      "def", 22,"2022-01-15", 0,
                      "ghi", 22,"2022-01-15", 0,
                      "jkl", 22,"2022-01-15", 0,
                      "abc", 14,"2022-02-19", 2,
                      "mno", 14,"2022-02-19", 0,
                      "pqr", 14,"2022-02-19", 2,
                      "stv", 14,"2022-02-19", 2,
                      "abc", 18,"2022-05-11", 2,
                      "stv", 18,"2022-05-11", 2,
                      "wxy", 18,"2022-05-11", 0,
                      "zzz", 18,"2022-05-11", 0,
                      "abc", 35,"2022-10-06", 4,
                      "def", 35,"2022-10-06", 1,
                      "pqr", 35,"2022-10-06", 3,
                      "bbb", 35,"2022-10-06", 2,
                      "abc", 44,"2021-04-14", 0,
                      "stv", 44,"2021-04-14", 0,
                      "pqr", 44,"2021-04-14", 0,
                      "bbb", 44,"2021-04-14", 0)

Upvotes: 1

Views: 42

Answers (1)

Brandon Rose MD MPH
Brandon Rose MD MPH

Reputation: 722

Not the most efficient but this might work!

SampleData <- dplyr::tribble(~ID, ~GROUP_NUM, ~Date,
                             "abc", 22,"2022-01-15", 
                             "def", 22,"2022-01-15", 
                             "ghi", 22,"2022-01-15", 
                             "jkl", 22,"2022-01-15", 
                             "abc", 14,"2022-02-19", 
                             "mno", 14,"2022-02-19", 
                             "pqr", 14,"2022-02-19", 
                             "stv", 14,"2022-02-19", 
                             "abc", 18,"2022-05-11", 
                             "stv", 18,"2022-05-11", 
                             "wxy", 18,"2022-05-11", 
                             "zzz", 18,"2022-05-11", 
                             "abc", 35,"2022-10-06", 
                             "def", 35,"2022-10-06", 
                             "pqr", 35,"2022-10-06", 
                             "bbb", 35,"2022-10-06", 
                             "abc", 44,"2021-04-14", 
                             "stv", 44,"2021-04-14", 
                             "pqr", 44,"2021-04-14", 
                             "bbb", 44,"2021-04-14"
)
SampleData$Date <-as.Date(SampleData$Date)

AimedData <- SampleData

AimedData$NPrevKnown <- sapply(1:nrow(SampleData),function(ROW){
  individual<-AimedData$ID[ROW]
  others <- unique(AimedData$ID[which(
    AimedData$GROUP_NUM==AimedData$GROUP_NUM[ROW]&
      AimedData$Date==AimedData$Date[ROW]&
      AimedData$ID!=AimedData$ID[ROW]
  )])
  if(length(others)>0){
    old_groups <- AimedData[which(
      AimedData$Date<AimedData$Date[ROW]& # added a less than here
        AimedData$ID==AimedData$ID[ROW]
    ),]
    old_pals<-NULL
    if(length(old_groups)>0){
      for (i in 1:nrow(old_groups)){
        old_pals<-append(
          old_pals,
          (AimedData$ID[which(
            AimedData$GROUP_NUM==old_groups$GROUP_NUM[i]&
              AimedData$Date==old_groups$Date[i]&
              AimedData$ID!=AimedData$ID[ROW])])
        )
      }
    }
  }
  length(which(old_pals %in% others))
})

AimedData$WhoPrevKnown <- sapply(1:nrow(SampleData),function(ROW){
  individual<-AimedData$ID[ROW]
  others <- unique(AimedData$ID[which(
    AimedData$GROUP_NUM==AimedData$GROUP_NUM[ROW]&
      AimedData$Date==AimedData$Date[ROW]&
      AimedData$ID!=AimedData$ID[ROW]
  )])
  if(length(others)>0){
    old_groups <- AimedData[which(
      AimedData$Date<AimedData$Date[ROW]& # added a less than here
        AimedData$ID==AimedData$ID[ROW]
    ),]
    old_pals<-NULL
    if(length(old_groups)>0){
      for (i in 1:nrow(old_groups)){
        old_pals<-append(
          old_pals,
          (AimedData$ID[which(
            AimedData$GROUP_NUM==old_groups$GROUP_NUM[i]&
              AimedData$Date==old_groups$Date[i]&
              AimedData$ID!=AimedData$ID[ROW])])
        )
      }
    }
  }
  paste0(old_pals,collapse = " | ")
})

Upvotes: 0

Related Questions