Reputation: 77
I have a task to identify unique trial (1,2,3,...) in a dataset. Here is an example:
"source","ID","cultivar","design"
"PDMR_vol_12","CF027","Ambassador","RCBD"
"PDMR_vol_12","CF027","Ambassador","RCBD"
"PDMR_vol_12","CF027","Ambassador","RCBD"
"PDMR_vol_12","CF027","Ambassador","RCBD"
"PDMR_vol_7","CF026","ASG2000","RCBD"
"PDMR_vol_7","CF026","ASG2000","RCBD"
"PDMR_vol_7","CF026","ASG2000","RCBD"
"PDMR_vol_7","CF026","P26R61","RCBD"
"PDMR_vol_7","CF026","P26R61","RCBD"
"PDMR_vol_7","CF026","P26R61","RCBD"
"PDMR_vol_4","CF011","Roane","SP"
"PDMR_vol_4","CF011","Roane","SP"
"PDMR_vol_4","CF011","Tomahawk","SP"
"PDMR_vol_4","CF011","Tomahawk","SP"
"PDMR_vol_4","CF011","Everest","SP"
"PDMR_vol_4","CF011","Everest","SP"
The conditional columns are:
unique_trials_RCBD<- ("source","ID","cultivar","design")
unique_trials_SP<-unique_trials_RCBD[-3]
Using a conditional group_by based on a few columns, we almost get the correct result, with the exception that it does not correctly identify (PDMR_vol_7 CF026) as two trials.
doAGroupBy <- function(data, some_condition) {
if (some_condition == TRUE) {
group_args <- unique_trials_RCBD
} else {
group_args <- unique_trials_SP
}
data %>%
group_by_at(vars(group_args))
}
a<-doAGroupBy(data, FALSE) %>%
mutate(trial_number=cur_group_id())
In total, there should be 4 trials there. Any ideas on how to improve this code? thanks
Upvotes: 0
Views: 62
Reputation: 8811
If I understood the problem correctly, this should work:
df <-
tibble::tribble(~`source`, ~`ID`,~`cultivar`,~`design`,
"PDMR_vol_12", "CF027", "Ambassador", "RCBD",
"PDMR_vol_12", "CF027", "Ambassador", "RCBD",
"PDMR_vol_12", "CF027", "Ambassador", "RCBD",
"PDMR_vol_12", "CF027", "Ambassador", "RCBD",
"PDMR_vol_7", "CF026", "ASG2000", "RCBD",
"PDMR_vol_7", "CF026", "ASG2000", "RCBD",
"PDMR_vol_7", "CF026", "ASG2000", "RCBD",
"PDMR_vol_7", "CF026", "P26R61", "RCBD",
"PDMR_vol_7", "CF026", "P26R61", "RCBD",
"PDMR_vol_7", "CF026", "P26R61", "RCBD",
"PDMR_vol_4", "CF011", "Roane", "SP",
"PDMR_vol_4", "CF011", "Roane", "SP",
"PDMR_vol_4", "CF011", "Tomahawk", "SP",
"PDMR_vol_4", "CF011", "Tomahawk", "SP",
"PDMR_vol_4", "CF011", "Everest", "SP",
"PDMR_vol_4", "CF011", "Everest", "SP"
)
df %>%
# Creating auxiliar variable, consdering cultivar only for a RCBD design
mutate(aux = if_else(design == "RCBD", cultivar,NA_character_)) %>%
# Groupinp by source,ID,design and aux
group_by(source,ID,design,aux) %>%
# Creating index grouped by variables above
mutate(trial = group_indices())
# A tibble: 16 x 6
# Groups: source, ID, design, aux [4]
source ID cultivar design aux trial
<chr> <chr> <chr> <chr> <chr> <int>
1 PDMR_vol_12 CF027 Ambassador RCBD Ambassador 1
2 PDMR_vol_12 CF027 Ambassador RCBD Ambassador 1
3 PDMR_vol_12 CF027 Ambassador RCBD Ambassador 1
4 PDMR_vol_12 CF027 Ambassador RCBD Ambassador 1
5 PDMR_vol_7 CF026 ASG2000 RCBD ASG2000 3
6 PDMR_vol_7 CF026 ASG2000 RCBD ASG2000 3
7 PDMR_vol_7 CF026 ASG2000 RCBD ASG2000 3
8 PDMR_vol_7 CF026 P26R61 RCBD P26R61 4
9 PDMR_vol_7 CF026 P26R61 RCBD P26R61 4
10 PDMR_vol_7 CF026 P26R61 RCBD P26R61 4
11 PDMR_vol_4 CF011 Roane SP NA 2
12 PDMR_vol_4 CF011 Roane SP NA 2
13 PDMR_vol_4 CF011 Tomahawk SP NA 2
14 PDMR_vol_4 CF011 Tomahawk SP NA 2
15 PDMR_vol_4 CF011 Everest SP NA 2
16 PDMR_vol_4 CF011 Everest SP NA 2
Upvotes: 1