Reputation: 71
I have the following dataframe
, I'm not sure how I would do it in dplyr
.
df_test <- data.frame(group=c("A","A","B","C","C"), var=c("1","1","1","2","3"),var2=c("a","a","b","c","c"),flag=c("1","0","0","1","0"))
I want to know for each the variable group and var whether there was a flag and the distinct value of var 2 it corresponds to?
df_result<-data.frame(group=c("A","B","C","C"),var=c("1","1","2","3"),flag_yes=c("1","0","1","0"),var2_distinct=c("a","na","c","na"))
Upvotes: 1
Views: 245
Reputation: 5798
Base R Solution:
data.frame(do.call("rbind", lapply(split(df_test, paste0(df_test$group, df_test$var)),
function(x){
x$flag_yes <- ifelse(sum(as.integer(x$flag)) >= 1, 1, 0)
x$var2_distinct <- as.character(ifelse(x$flag == 1, as.character(x$var2), ""))
return(x)
}
)
),
row.names = NULL
)
Upvotes: 0
Reputation: 887991
We can group by 'group', 'var', check any
'flag' is 1 and paste
the 'var2' elements that corresponds to 'flag' 1 and change the ""
to NA
with na_if
library(dplyr)
df_test %>%
group_by(group, var) %>%
summarise(flag_yes = +(any(flag == "1")),
var2_distinct = na_if(toString(var2[flag == "1"]), ""))
# A tibble: 4 x 4
# Groups: group [3]
# group var flag_yes var2_distinct
# <fct> <fct> <int> <chr>
#1 A 1 1 a
#2 B 1 0 <NA>
#3 C 2 1 c
#4 C 3 0 <NA>
If we need the number of distinct elements as well
df_test %>%
group_by(group, var) %>%
summarise(flag_yes = +(any(flag == "1")),
var2_distinct = na_if(toString(var2[flag == "1"]), ""),
num_distinct = n_distinct(var2[flag == "1"]))
# A tibble: 4 x 5
# Groups: group [3]
# group var flag_yes var2_distinct num_distinct
# <fct> <fct> <int> <chr> <int>
#1 A 1 1 a 1
#2 B 1 0 <NA> 0
#3 C 2 1 c 1
#4 C 3 0 <NA> 0
Or using data.table
library(data.table)
setDT(df_test)[, .(flag_yes = +(any(flag == "1")),
var2_dstinct = na_if(toString(var2[flag == "1"]), "")), .(group, var)]
# group var flag_yes var2_dstinct
#1: A 1 1 a
#2: B 1 0 <NA>
#3: C 2 1 c
#4: C 3 0 <NA>
Upvotes: 2
Reputation: 389355
We can group_by
group
and var
, create a flag_yes
if there is any
flag
as 1 and get the corresponding var2
value.
library(dplyr)
df_test %>%
group_by(group, var) %>%
summarise(flag_yes = as.integer(any(flag == 1)),
var2_distinct = toString(var2[flag == 1]))
# group var flag_yes var2_distinct
# <fct> <fct> <int> <chr>
#1 A 1 1 "a"
#2 B 1 0 ""
#3 C 2 1 "c"
#4 C 3 0 ""
Upvotes: 2