Arnoneel Sinha
Arnoneel Sinha

Reputation: 434

How to get elementwise means of lists within a list in R

I have dataset (my_data) which is a list of length 50. Each of the elements of this list consists of 4 lists of 2 values each, which are generated from a specified bivariate Normal distribution. My goal is to get the means of each of the component from all the lists for the entire data. For instance, I want the mean of the first components of the first lists for all 50 components of the mydata list, the mean of the second components of the first lists for all 50 components of the mydata list, and so on. I have achieved so with the following code.

set.seed(4991)

mean_list <- list(c(0,0),c(3,3),c(6,6),c(0,0))

#Covariance matrix
cov_mat <- matrix(c(1, .8, .8, 1), 2)  

my_data <- list()

#50 iterations 
for (k in 1:50){
  seg_data <- list()
  for (i in 1:4) {
    seg_data[[i]] <- abs(mvrnorm(1, mu = mean_list[[i]], Sigma = cov_mat) )
  }
  my_data[[k]] <- seg_data
}


first_list1 <- c()
first_list2 <- c()
second_list1 <- c()
second_list2<- c()
third_list1 <- c()
third_list2 <- c()
fourth_list1 <- c()
fourth_list2 <- c()

for (p in 1:length(my_data)) {
  first_list1[p]  <- my_data[[p]][[1]][1]
  first_list2[p]  <- my_data[[p]][[1]][2]
  second_list1[p] <- my_data[[p]][[2]][1]
  second_list2[p] <- my_data[[p]][[2]][2]
  third_list1[p]  <- my_data[[p]][[3]][1]
  third_list2[p]  <- my_data[[p]][[3]][2]
  fourth_list1[p] <- my_data[[p]][[4]][1]
  fourth_list2[p] <- my_data[[p]][[4]][2]
}

data.frame(first_elements=c(mean(first_list1,na.rm = T),
                            mean(second_list1,na.rm = T),
                            mean(third_list1,na.rm = T),
                            mean(fourth_list1,na.rm = T))
          ,second_elements=c(mean(first_list2,na.rm = T),
                            mean(second_list2,na.rm = T),
                            mean(third_list2,na.rm = T),
                            mean(fourth_list2,na.rm = T))
          ) 

I am having trouble finding an easier way of coding this. I am sure it is possible with dplyr package in R, but have not been able to figure it out yet. Is it possible to find the elementwise means without having to create so many variables?

Upvotes: 0

Views: 40

Answers (1)

Arthur
Arthur

Reputation: 2402

Maybe this? I recommend coercing the nested list into a data frame. This will require setting some names for the columns and the list id. Once it's a data frame, performing those calculations is easy.

library(tidyverse)
  
set.seed(4991)

mean_list <- list(c(0,0),c(3,3),c(6,6),c(0,0))

#Covariance matrix
cov_mat <- matrix(c(1, .8, .8, 1), 2)  

my_data <- list()

#50 iterations 
for (k in 1:50){
  seg_data <- list()
  for (i in 1:4) {
    seg_data[[i]] <- abs(MASS::mvrnorm(1, mu = mean_list[[i]], Sigma = cov_mat) )
  }
  my_data[[k]] <- seg_data
}

# names for columns and list elements
list_id_names <- as.character(1:length(my_data))
col_names <- paste0("x", 1:length(my_data[[1]]))

# parse into a single data frame
df <- my_data %>%
  set_names(list_id_names) %>%
  map(set_names, col_names) %>%
  map(as_tibble) %>%
  bind_rows(.id = "list_id") %>%
  mutate(list_id = as.integer(list_id))

df 
#> # A tibble: 100 × 5
#>    list_id    x1    x2    x3    x4
#>      <int> <dbl> <dbl> <dbl> <dbl>
#>  1       1 1.09   3.12  4.01 0.768
#>  2       1 1.77   3.51  4.61 0.273
#>  3       2 0.483  4.22  5.18 1.53 
#>  4       2 0.171  4.85  5.20 0.385
#>  5       3 0.572  4.54  5.02 0.450
#>  6       3 0.848  4.67  4.18 0.378
#>  7       4 1.09   3.25  4.35 0.460
#>  8       4 0.393  1.96  4.65 1.29 
#>  9       5 1.00   2.58  5.39 3.16 
#> 10       5 0.178  2.43  5.29 2.82 
#> # … with 90 more rows

# average within list element id
df %>% 
  group_by(list_id) %>%
  summarize(across(everything(), mean))
#> # A tibble: 50 × 5
#>    list_id    x1    x2    x3    x4
#>      <int> <dbl> <dbl> <dbl> <dbl>
#>  1       1 1.43   3.32  4.31 0.521
#>  2       2 0.327  4.54  5.19 0.958
#>  3       3 0.710  4.61  4.60 0.414
#>  4       4 0.742  2.60  4.50 0.877
#>  5       5 0.589  2.51  5.34 2.99 
#>  6       6 1.09   4.73  6.07 0.725
#>  7       7 0.405  3.27  4.90 1.04 
#>  8       8 0.772  1.61  5.58 0.563
#>  9       9 0.343  2.18  6.87 0.460
#> 10      10 1.59   3.02  4.40 1.04 
#> # … with 40 more rows

Created on 2022-09-14 by the reprex package (v2.0.1)

Upvotes: 0

Related Questions