Reputation: 71
Here is some sample data:
movie_df <- data.frame("ID" = c(1,2,3,4,5,6,7,8,9,10),
"movie_type" = c("Action", "Horror", "Comedy", "Thriller", "Comedy",
"Action","Thriller", "Horror", "Action", "Comedy"),
"snack_type" = c("Chocolate", "Popcorn", "Candy", "Popcorn", "Popcorn",
"Candy","Chocolate", "Candy", "Popcorn", "Chocolate"),
"event_type" = c("Solo", "Family", "Date", "Friends", "Solo",
"Family","Date", "Date", "Friends", "Friends"),
"total_cost" = c(50, 35, 20, 50, 30,
60, 25, 35, 20, 50))
What I want to do is go through each column and compare each group to the rest of the groups on total_cost
. For example, I want to see how movie_type == 'Action'
compares to movie_type != 'Action'
for total_cost
. I want to do that for every type in movie_type
then every type in snack_type
and event_type
.
What I ultimately want to get to is this where sd = Standard Deviation
. Ideally this will be done by a tidyverse
method in R (e.g. dplyr
or tidyr
):
> results_df
# A tibble: 11 x 11
Group Grp_1 Grp_2 Grp_1_mean Grp_2_mean Grp_1_sd Grp_2_sd Grp_1_n Grp_2_n Mean_Diff `t-test`
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 movie_type Action Rest of group 43.3 35 20.8 11.5 3 7 8.33 2.84
2 movie_type Horror Rest of group 35 38.1 0 16.0 2 8 -3.12 -2.21
3 movie_type Thriller Rest of group 37.5 37.5 17.7 14.6 2 8 0 0
4 movie_type Comedy Rest of group 33.3 39.3 15.3 14.6 3 7 -5.95 -2.22
5 snack_type Chocolate Rest of group 41.7 35.7 14.4 14.8 3 7 5.95 2.26
6 snack_type Candy Rest of group 38.3 37.1 20.2 12.9 3 7 1.19 0.407
7 snack_type Popcorn Rest of group 33.8 40 12.5 15.8 4 6 -6.25 -2.60
8 event_type Date Rest of group 26.7 42.1 7.64 14.1 3 7 -15.5 -7.25
9 event_type Family Rest of group 47.5 35 17.7 13.4 2 8 12.5 3.86
10 event_type Friends Rest of group 40 36.4 17.3 14.1 3 7 3.57 1.28
11 event_type Solo Rest of group 40 36.9 14.1 15.1 2 8 3.12 1.04
Upvotes: 0
Views: 262
Reputation: 5232
It's same logic as Daniel did using purrr::map and purrr::map2.
library(dplyr)
library(tibble)
library(purrr)
library(stringr)
needed_cols <- c("movie_type", "snack_type", "event_type")
new_names <- 1:2 %>%
map(~str_c(c("group", "mean", "sd", "n"), "_", .x)) %>%
unlist()
my_data <- needed_cols %>%
map(function(df_c)
map(unique(movie_df[[df_c]]),
function(v){
df <- movie_df %>%
mutate(group = ifelse(get(df_c) == v, v, "rest_of_group")) %>%
group_by(group) %>%
summarize(mean = mean(total_cost), sd = sd(total_cost), n = n()) %>%
.[match(.$group, c(v, "rest_of_group")),]
df <- bind_cols(df[1, ], df[2,])
names(df) <- new_names
df
}
)
) %>%
map2(needed_cols, ~bind_rows(.x) %>% mutate(group = .y)) %>%
bind_rows() %>%
select(
str_subset(names(.), "group") %>% sort(),
str_subset(names(.), "mean"),
str_subset(names(.), "sd"),
str_subset(names(.), "n")
) %>%
mutate(mean_diff = mean_1 - mean_2)
Upvotes: 1
Reputation: 4358
Sorry its not in pipes, but in Base R we can:
results_df <- do.call(rbind,unlist(
apply(movie_df[,2:4],2,function(u)
lapply(unique(u), function(x)
data.frame(
group1 = as.character(x),
group2 = "rest",
grp1_mean = mean(movie_df$total_cost[u == x]),
grp2_mean = mean(movie_df$total_cost[u != x]),
grp1_sd = sd(movie_df$total_cost[u == x]),
grp2_sd = sd(movie_df$total_cost[u != x])
)
)
),recursive=F)
)
#add mean differences
results_df$meandiff <- with(results_df, grp1_mean - grp2_mean)
> results_df
group1 group2 grp1_mean grp2_mean grp1_sd grp2_sd meandiff
movie_type1 Action rest 43.33333 35.00000 20.816660 11.54701 8.333333
movie_type2 Horror rest 35.00000 38.12500 0.000000 16.02175 -3.125000
movie_type3 Comedy rest 33.33333 39.28571 15.275252 14.55695 -5.952381
movie_type4 Thriller rest 37.50000 37.50000 17.677670 14.63850 0.000000
snack_type1 Chocolate rest 41.66667 35.71429 14.433757 14.84042 5.952381
snack_type2 Popcorn rest 33.75000 40.00000 12.500000 15.81139 -6.250000
snack_type3 Candy rest 38.33333 37.14286 20.207259 12.86375 1.190476
event_type1 Solo rest 40.00000 36.87500 14.142136 15.10381 3.125000
event_type2 Family rest 47.50000 35.00000 17.677670 13.36306 12.500000
event_type3 Date rest 26.66667 42.14286 7.637626 14.09998 -15.476190
event_type4 Friends rest 40.00000 36.42857 17.320508 14.05770 3.571429
Upvotes: 0