Reputation: 81
I have a dataset containing an ID variable, a date, and several agents (see example below). The agents have been tested several times per patient and I want to filter for every ID the first one to appear and remove all the other tests appearing within 4 weeks after the first. After this, I again want to filter the first one and remove all the others appearing within 4 weeks - throughout the whole dataset. I also generated variables showing the week, month and year.
ID <- rep(1, times = 20)
Date <- c("2010-12-09", "2010-12-09", "2010-12-09", "2010-12-09", "2010-12-09", "2010-12-09", "2010-12-09", "2010-12-09", "2010-12-27", "2010-12-27", "2010-12-27", "2010-12-27", "2011-01-14", "2011-01-14", "2011-01-14", "2011-01-14", "2011-01-14", "2011-01-14", "2011-01-14", "2011-01-14")
Agent <- c("Agent1", "Agent2", "Agent3", "Agent4", "Agent1", "Agent2", "Agent3", "Agent4", "Agent1", "Agent2", "Agent3", "Agent4", "Agent1", "Agent2", "Agent3", "Agent4", "Agent1", "Agent2", "Agent3", "Agent4")
df <- data.frame(ID, Date, Agent)
ID Date Week Month Year Agent
1 1 2010-12-09 49 12 2010 Agent1
2 1 2010-12-09 49 12 2010 Agent2
3 1 2010-12-09 49 12 2010 Agent3
4 1 2010-12-09 49 12 2010 Agent4
5 1 2010-12-09 49 12 2010 Agent1
6 1 2010-12-09 49 12 2010 Agent2
7 1 2010-12-09 49 12 2010 Agent3
8 1 2010-12-09 49 12 2010 Agent4
9 1 2010-12-27 52 12 2010 Agent1
10 1 2010-12-27 52 12 2010 Agent2
11 1 2010-12-27 52 12 2010 Agent3
12 1 2010-12-27 52 12 2010 Agent4
13 1 2011-01-14 2 1 2011 Agent1
14 1 2011-01-14 2 1 2011 Agent2
15 1 2011-01-14 2 1 2011 Agent3
16 1 2011-01-14 2 1 2011 Agent4
17 1 2011-01-14 2 1 2011 Agent1
18 1 2011-01-14 2 1 2011 Agent2
19 1 2011-01-14 2 1 2011 Agent3
20 1 2011-01-14 2 1 2011 Agent4
and what I need is this:
ID Date Week Month Year Agent
1 1 2010-12-09 49 12 2010 Agent1
2 1 2010-12-09 49 12 2010 Agent2
3 1 2010-12-09 49 12 2010 Agent3
4 1 2010-12-09 49 12 2010 Agent4
13 1 2011-01-14 2 1 2011 Agent1
14 1 2011-01-14 2 1 2011 Agent2
15 1 2011-01-14 2 1 2011 Agent3
16 1 2011-01-14 2 1 2011 Agent4
I'm happy about any help!
Upvotes: 5
Views: 661
Reputation: 147
You can try the following. It uses data.table. I hope that you don't mind that I do not include Year, Month, Week and row index.
ID <- rep(1, times = 20)
Date <- c("2010-12-09", "2010-12-09", "2010-12-09", "2010-12-09", "2010-12-09", "2010-12-09", "2010-12-09", "2010-12-09", "2010-12-27", "2010-12-27", "2010-12-27", "2010-12-27", "2011-01-14", "2011-01-14", "2011-01-14", "2011-01-14", "2011-01-14", "2011-01-14", "2011-01-14", "2011-01-14")
Agent <- c("Agent1", "Agent2", "Agent3", "Agent4", "Agent1", "Agent2", "Agent3", "Agent4", "Agent1", "Agent2", "Agent3", "Agent4", "Agent1", "Agent2", "Agent3", "Agent4", "Agent1", "Agent2", "Agent3", "Agent4")
df <- data.frame(ID, Date, Agent)
library(data.table)
df=as.data.table(df)
# convert Date
df$Date=as.Date(df$Date)
# drop duplicate lines
df=unique(df)
df$weeks_from_min_date=5 # checked rows will have weeks_from_min<=0
while (max(df$weeks_from_min_date)>0){
# get min date per Agent-ID of not checked rows
min_date_per_Agent_ID=df[weeks_from_min_date>=5][,.(min_date=min(Date)),by=.(Agent,ID)]
# join with df
df=merge(df,min_date_per_Agent_ID)
# update weeks_from_min_date
df$weeks_from_min_date=difftime( df$Date,df$min_date, units = "weeks")
df=df[df$weeks_from_min_date<=0 | df$weeks_from_min_date>4 ]
df$min_date=NULL
}
df$weeks_from_min_date=NULL
keycol <-c("Date","Agent")
setorderv(df, keycol)
df
Upvotes: 0
Reputation: 388817
You can subtract the minimum Date
for each ID
to create a new group which consists of 4 weeks of data and select rows with minimum date in it for each ID
, group
and Agent
.
library(dplyr)
df %>%
mutate(Date = as.Date(Date)) %>%
group_by(ID) %>%
mutate(group = ceiling(as.integer(difftime(Date, min(Date), units = 'week')/4))) %>%
group_by(ID, group, Agent) %>%
slice(which.min(Date))
# ID Date Agent group
# <dbl> <date> <chr> <dbl>
#1 1 2010-12-09 Agent1 0
#2 1 2010-12-09 Agent2 0
#3 1 2010-12-09 Agent3 0
#4 1 2010-12-09 Agent4 0
#5 1 2011-01-14 Agent1 1
#6 1 2011-01-14 Agent2 1
#7 1 2011-01-14 Agent3 1
#8 1 2011-01-14 Agent4 1
Upvotes: 1