Emy
Emy

Reputation: 977

r: looping through a list of lists and appending the results to a data frame

I have the following lists, list1 and list2:

library(tidyverse)

df1a <- data.frame(
  index = c(1, 1, 2, 2),
  first_column = c(1, 2, 3, 4),
  second_column = c(5, 6, 7, 8)
)

df1b <- data.frame(
  index = c(1, 1, 2, 2),
  first_column = c(4, 2, 3, 1),
  second_column = c(8, 6, 7, 5)
)

list1 <- dplyr::lst(df1a, df1b)

df2a <- data.frame(
  index = c(1, 1, 2, 2),
  first_column = c(4, 3, 2, 1),
  second_column = c(8, 7, 6, 5)
)

df2b <- data.frame(
  index = c(1, 1, 2, 2),
  first_column = c(8, 2, 6, 5),
  second_column = c(4, 3, 7, 1)
)

list2 <- dplyr::lst(df2a, df2b)

This is the function that I want to run on list1 and list2:

output_mean <- function(subset, name) {
  subset %>%
    group_by(index) %>%
    summarize(across(c(first_column, second_column), ~ mean(.x, na.rm = TRUE))) %>%
    mutate(type = name) %>%
    print()
}

Now, I can loop through one list:

x <- list()
for (i in names(list1)) {
   ps <- output_mean(list1[[i]], i)
   x[[paste0(i)]] <- ps
  }

#> # A tibble: 2 x 4
#>   index first_column second_column type 
#>   <dbl>        <dbl>         <dbl> <chr>
#> 1     1          1.5           5.5 df1a 
#> 2     2          3.5           7.5 df1a 
#> # A tibble: 2 x 4
#>   index first_column second_column type 
#>   <dbl>        <dbl>         <dbl> <chr>
#> 1     1            3             7 df1b 
#> 2     2            2             6 df1b

And then I can put the results into one dataframe:

all1 <- do.call(rbind, x)
all1
#> # A tibble: 4 x 4
#>   index first_column second_column type 
#> * <dbl>        <dbl>         <dbl> <chr>
#> 1     1          1.5           5.5 df1a 
#> 2     2          3.5           7.5 df1a 
#> 3     1          3             7   df1b 
#> 4     2          2             6   df1b

But what if I want to put list1 and list2 into a big_list and loop through it? This is what I have tried:

big_list <- list(list1, list2)

y <- list()
for (j in big_list){
  x <- list()
    for (i in names(j)) {
       ps <- output_mean(j[[i]], i)
       x[[paste0(i)]] <- ps
      }

  all = do.call(rbind, x)
}

The loop works, but then only two data frames are appended in all, as it is understandable, as the outer loop overwrites all.

all
#> # A tibble: 4 x 4
#>   index first_column second_column type 
#> * <dbl>        <dbl>         <dbl> <chr>
#> 1     1          3.5           7.5 df2a 
#> 2     2          1.5           5.5 df2a 
#> 3     1          5             3.5 df2b 
#> 4     2          5.5           4   df2b

I have tried many different things, but I can't manage to get the four data frames appended together in a single 4 by 8 dataframe.

Created on 2021-05-06 by the reprex package (v2.0.0)

Upvotes: 2

Views: 1441

Answers (2)

Ronak Shah
Ronak Shah

Reputation: 389355

I would suggest to store big_list as a concatenated list instead of nested list.

big_list <- c(list1, list2)

If you do this your original code would work as it is -

y <- list()
for (i in names(big_list)) {
    ps <- output_mean(big_list[[i]], i)
    y[[paste0(i)]] <- ps
}

all = do.call(rbind, y)
all

#  index first_column second_column type 
#*  <dbl>        <dbl>         <dbl> <chr>
#1     1          1.5           5.5 df1a 
#2     2          3.5           7.5 df1a 
#3     1          3             7   df1b 
#4     2          2             6   df1b 
#5     1          3.5           7.5 df2a 
#6     2          1.5           5.5 df2a 
#7     1          5             3.5 df2b 
#8     2          5.5           4   df2b 

It is also easier to apply the function using purrr :

purrr::imap_dfr(big_list, output_mean)

and base R :

do.call(rbind, Map(output_mean, big_list, names(big_list)))

Upvotes: 1

akrun
akrun

Reputation: 887991

We could initialize 'y' with length same as the length of big_list, loop over the sequence of 'big_list', (it may be also better to initialize 'x' with the length of the inner list.

y <- vector('list', length(big_list))
for (j in seq_along(big_list)){
  x <- list()
    for (i in seq_along(big_list[[j]])) {
       ps <- output_mean(big_list[[j]][[i]], names(big_list[[j]])[i])
       x[[i]] <- ps
      }

  y[[j]]  <- do.call(rbind, x)
}

 out <- do.call(rbind, y)

-output

out
# A tibble: 8 x 4
#  index first_column second_column type 
#  <dbl>        <dbl>         <dbl> <chr>
#1     1          1.5           5.5 df1a 
#2     2          3.5           7.5 df1a 
#3     1          3             7   df1b 
#4     2          2             6   df1b 
#5     1          3.5           7.5 df2a 
#6     2          1.5           5.5 df2a 
#7     1          5             3.5 df2b 
#8     2          5.5           4   df2b 

This can be done more easily with map

library(purrr)
out1 <- map_dfr(big_list, ~ imap_dfr(.x, ~ output_mean(.x, .y)))

-output

out1
# A tibble: 8 x 4
#  index first_column second_column type 
#  <dbl>        <dbl>         <dbl> <chr>
#1     1          1.5           5.5 df1a 
#2     2          3.5           7.5 df1a 
#3     1          3             7   df1b 
#4     2          2             6   df1b 
#5     1          3.5           7.5 df2a 
#6     2          1.5           5.5 df2a 
#7     1          5             3.5 df2b 
#8     2          5.5           4   df2b 

Upvotes: 2

Related Questions