Reputation: 2486
Example data consists of 4 columns:
Group
response
time
match.
In a Treatmentgroup column there are three possibilities of treatment which are:
treatment1
control
treatment2
In response column there is unique response to each ID in a given time. Time is shown in a "Time" column. In a "Match" column there is grouping information. Different groups are:
group_1
group_2
So every treatmentGroup(control, treatment1...) belongs also to matched group (group_1, group_2)
The task is to calculate a new column based on grouping information in a "TreatmentGroup" column and matching information in a "Match" column. New column will contain response variable of treatmentX - control of that matched group.
For example: response of treatment 2 - control(1.2 -1.8 = -0.6) and next row 1.4-2.0=-0.6. So treatment response is compared to control response at a given time (0 or 1). Example data and result table(manually calculated):
TreatmentGroup ID Response Time Match
1 treatment2 ID1 1.2 0 group_1
2 treatment2 ID1 1.4 1 group_1
3 control ID2 1.8 0 group_1
4 control ID2 2.0 1 group_1
5 treatment1 ID3 1.5 0 group_1
6 treatment1 ID3 1.8 1 group_1
7 treatment2 ID4 0.2 0 group_2
8 treatment2 ID4 0.3 1 group_2
9 control ID5 2.5 0 group_2
10 control ID5 2.8 1 group_2
11 treatment1 ID6 3.2 0 group_2
12 treatment1 ID6 3.5 1 group_2
TreatmentGroup ID Response Time Match Paired_sub
1 treatment2 ID1 1.2 0 group_1 -0.6
2 treatment2 ID1 1.4 1 group_1 -0.6
3 control ID2 1.8 0 group_1 0.0
4 control ID2 2.0 1 group_1 0.0
5 treatment1 ID3 1.5 0 group_1 -0.3
6 treatment1 ID3 1.8 1 group_1 0.2
7 treatment2 ID4 0.2 0 group_2 -2.3
8 treatment2 ID4 0.3 1 group_2 -2.5
9 control ID5 2.5 0 group_2 0.0
10 control ID5 2.8 1 group_2 0.0
11 treatment1 ID6 3.2 0 group_2 0.7
12 treatment1 ID6 3.5 1 group_2 0.7
What would be the best approaches(or answer) for this kind of problem?
Code for generating example tables:
df <- data.frame("TreatmentGroup"=c("treatment2", "treatment2", "control", "control", "treatment1", "treatment1"),
"ID" = c("ID1","ID1", "ID2","ID2","ID3","ID3", "ID4","ID4", "ID5","ID5", "ID6","ID6"),
"Response"=c(1.2, 1.4, 1.8, 2.0, 1.5, 1.8, 0.2,0.3,2.5,2.8,3.2,3.5),
"Time" = c(0,1,0,1,0,1),
"Match" = c("group_1", "group_1","group_1", "group_1","group_1", "group_1","group_2", "group_2","group_2", "group_2","group_2", "group_2")
)
result <- data.frame("TreatmentGroup"=c("treatment2", "treatment2", "control", "control", "treatment1", "treatment1"),
"ID" = c("ID1","ID1", "ID2","ID2","ID3","ID3", "ID4","ID4", "ID5","ID5", "ID6","ID6"),
"Response"=c(1.2, 1.4, 1.8, 2.0, 1.5, 1.8, 0.2,0.3,2.5,2.8,3.2,3.5),
"Time" = c(0,1,0,1,0,1),
"Match" = c("group_1", "group_1","group_1", "group_1","group_1", "group_1","group_2", "group_2","group_2", "group_2","group_2", "group_2"),
"Paired_sub" = c(-0.6,-0.6,0,0,-0.3, 0.2,-2.3,-2.5, 0,0, 0.7,0.7)
)
Upvotes: 0
Views: 237
Reputation: 887213
You can use base R
as well
indx <- with(df, paste(Time, Match))
within(df, {indx2 <- TreatmentGroup=='control'
Paired_sub <- Response - setNames(Response[indx2],
indx[indx2])[setNames(indx, 1:nrow(df))]})[,-7]
# TreatmentGroup ID Response Time Match Paired_sub
#1 treatment2 ID1 1.2 0 group_1 -0.6
#2 treatment2 ID1 1.4 1 group_1 -0.6
#3 control ID2 1.8 0 group_1 0.0
#4 control ID2 2.0 1 group_1 0.0
#5 treatment1 ID3 1.5 0 group_1 -0.3
#6 treatment1 ID3 1.8 1 group_1 -0.2
#7 treatment2 ID4 0.2 0 group_2 -2.3
#8 treatment2 ID4 0.3 1 group_2 -2.5
#9 control ID5 2.5 0 group_2 0.0
#10 control ID5 2.8 1 group_2 0.0
#11 treatment1 ID6 3.2 0 group_2 0.7
#12 treatment1 ID6 3.5 1 group_2 0.7
Or using split/unsplit
unsplit(
lapply(split(df, list(df$Match, df$Time), drop=TRUE), function(x)
transform(x, Paired_sub = Response - Response[TreatmentGroup=='control'])),
list(df$Match, df$Time))
# TreatmentGroup ID Response Time Match Paired_sub
#1 treatment2 ID1 1.2 0 group_1 -0.6
#2 treatment2 ID1 1.4 1 group_1 -0.6
#3 control ID2 1.8 0 group_1 0.0
#4 control ID2 2.0 1 group_1 0.0
#5 treatment1 ID3 1.5 0 group_1 -0.3
#6 treatment1 ID3 1.8 1 group_1 -0.2
#7 treatment2 ID4 0.2 0 group_2 -2.3
#8 treatment2 ID4 0.3 1 group_2 -2.5
#9 control ID5 2.5 0 group_2 0.0
#10 control ID5 2.8 1 group_2 0.0
#11 treatment1 ID6 3.2 0 group_2 0.7
#12 treatment1 ID6 3.5 1 group_2 0.7
Another option in base R
would be to merge
the df
with subset
of df
with control
TreatmentGroup
df$id <- 1:nrow(df)
df1 <- merge(df[,-c(1:2)], subset(df, TreatmentGroup=='control')[,-c(1:2,6)],
by=c('Time', 'Match'), sort=FALSE)
df$Paired_sub <- with(df1[order(df1$id),], Response.x-Response.y)
df$Paired_sub
#[1] -0.6 -0.6 0.0 0.0 -0.3 -0.2 -2.3 -2.5 0.0 0.0 0.7 0.7
Upvotes: 2
Reputation: 70276
Many options to do this, one of them is with dplyr:
require(dplyr)
df %>%
group_by(Match, Time) %>%
mutate(Paired_sub = Response - Response[TreatmentGroup == "control"])
#Source: local data frame [12 x 6]
#Groups: Match, Time
#
# TreatmentGroup ID Response Time Match Paired_sub
#1 treatment2 ID1 1.2 0 group_1 -0.6
#2 treatment2 ID1 1.4 1 group_1 -0.6
#3 control ID2 1.8 0 group_1 0.0
#4 control ID2 2.0 1 group_1 0.0
#5 treatment1 ID3 1.5 0 group_1 -0.3
#6 treatment1 ID3 1.8 1 group_1 -0.2
#7 treatment2 ID4 0.2 0 group_2 -2.3
#8 treatment2 ID4 0.3 1 group_2 -2.5
#9 control ID5 2.5 0 group_2 0.0
#10 control ID5 2.8 1 group_2 0.0
#11 treatment1 ID6 3.2 0 group_2 0.7
#12 treatment1 ID6 3.5 1 group_2 0.7
The equivalent data.table approach would be:
require(data.table)
setDT(df)[, Paired_sub := Response - Response[TreatmentGroup == "control"], by = list(Match, Time)]
One more option, using base R:
df <- do.call(rbind, lapply(split(df, interaction(df$Match, df$Time)), function(dd) {
dd$Paired_sub <- with(dd, Response - Response[TreatmentGroup == "control"])
dd}))
rownames(df) <- NULL
Only the ordering should be different here, the numbers in paired_sub are hopefully the same as in the other answers.
Upvotes: 5