Reputation: 434
I have dataset (my_data) which is a list of length 50. Each of the elements of this list consists of 4 lists of 2 values each, which are generated from a specified bivariate Normal distribution. My goal is to get the means of each of the component from all the lists for the entire data. For instance, I want the mean of the first components of the first lists for all 50 components of the mydata list, the mean of the second components of the first lists for all 50 components of the mydata list, and so on. I have achieved so with the following code.
set.seed(4991)
mean_list <- list(c(0,0),c(3,3),c(6,6),c(0,0))
#Covariance matrix
cov_mat <- matrix(c(1, .8, .8, 1), 2)
my_data <- list()
#50 iterations
for (k in 1:50){
seg_data <- list()
for (i in 1:4) {
seg_data[[i]] <- abs(mvrnorm(1, mu = mean_list[[i]], Sigma = cov_mat) )
}
my_data[[k]] <- seg_data
}
first_list1 <- c()
first_list2 <- c()
second_list1 <- c()
second_list2<- c()
third_list1 <- c()
third_list2 <- c()
fourth_list1 <- c()
fourth_list2 <- c()
for (p in 1:length(my_data)) {
first_list1[p] <- my_data[[p]][[1]][1]
first_list2[p] <- my_data[[p]][[1]][2]
second_list1[p] <- my_data[[p]][[2]][1]
second_list2[p] <- my_data[[p]][[2]][2]
third_list1[p] <- my_data[[p]][[3]][1]
third_list2[p] <- my_data[[p]][[3]][2]
fourth_list1[p] <- my_data[[p]][[4]][1]
fourth_list2[p] <- my_data[[p]][[4]][2]
}
data.frame(first_elements=c(mean(first_list1,na.rm = T),
mean(second_list1,na.rm = T),
mean(third_list1,na.rm = T),
mean(fourth_list1,na.rm = T))
,second_elements=c(mean(first_list2,na.rm = T),
mean(second_list2,na.rm = T),
mean(third_list2,na.rm = T),
mean(fourth_list2,na.rm = T))
)
I am having trouble finding an easier way of coding this. I am sure it is possible with dplyr package in R, but have not been able to figure it out yet. Is it possible to find the elementwise means without having to create so many variables?
Upvotes: 0
Views: 40
Reputation: 2402
Maybe this? I recommend coercing the nested list into a data frame. This will require setting some names for the columns and the list id. Once it's a data frame, performing those calculations is easy.
library(tidyverse)
set.seed(4991)
mean_list <- list(c(0,0),c(3,3),c(6,6),c(0,0))
#Covariance matrix
cov_mat <- matrix(c(1, .8, .8, 1), 2)
my_data <- list()
#50 iterations
for (k in 1:50){
seg_data <- list()
for (i in 1:4) {
seg_data[[i]] <- abs(MASS::mvrnorm(1, mu = mean_list[[i]], Sigma = cov_mat) )
}
my_data[[k]] <- seg_data
}
# names for columns and list elements
list_id_names <- as.character(1:length(my_data))
col_names <- paste0("x", 1:length(my_data[[1]]))
# parse into a single data frame
df <- my_data %>%
set_names(list_id_names) %>%
map(set_names, col_names) %>%
map(as_tibble) %>%
bind_rows(.id = "list_id") %>%
mutate(list_id = as.integer(list_id))
df
#> # A tibble: 100 × 5
#> list_id x1 x2 x3 x4
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 1.09 3.12 4.01 0.768
#> 2 1 1.77 3.51 4.61 0.273
#> 3 2 0.483 4.22 5.18 1.53
#> 4 2 0.171 4.85 5.20 0.385
#> 5 3 0.572 4.54 5.02 0.450
#> 6 3 0.848 4.67 4.18 0.378
#> 7 4 1.09 3.25 4.35 0.460
#> 8 4 0.393 1.96 4.65 1.29
#> 9 5 1.00 2.58 5.39 3.16
#> 10 5 0.178 2.43 5.29 2.82
#> # … with 90 more rows
# average within list element id
df %>%
group_by(list_id) %>%
summarize(across(everything(), mean))
#> # A tibble: 50 × 5
#> list_id x1 x2 x3 x4
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 1.43 3.32 4.31 0.521
#> 2 2 0.327 4.54 5.19 0.958
#> 3 3 0.710 4.61 4.60 0.414
#> 4 4 0.742 2.60 4.50 0.877
#> 5 5 0.589 2.51 5.34 2.99
#> 6 6 1.09 4.73 6.07 0.725
#> 7 7 0.405 3.27 4.90 1.04
#> 8 8 0.772 1.61 5.58 0.563
#> 9 9 0.343 2.18 6.87 0.460
#> 10 10 1.59 3.02 4.40 1.04
#> # … with 40 more rows
Created on 2022-09-14 by the reprex package (v2.0.1)
Upvotes: 0