Reputation: 53
I have a list object in R called list_df with a length of 4 . Each 4 element contains one matrix, with one column and four rows and row names as characters. What I want to do is I would like to get means and upper and lower bounds of these means in confidence interval 95% for every row in dataframe that matches with the names of other rows in other dataframes or the same dataframe within the list. I want to put this information to a dataframe where there are the rownames are still the same, and there are three columns with mean, lower bound of CI and upper bound of CI. If a rowname occurs only in one dataframe, I would like the final mean output to be that value. Lets say if the row name is missing, I would like those na's to have no effect on the means. So if say row "blue" occurs only in element 1 and element 5 and I want to mean(blue) to be average of these two. So if the first matrix is row.names= a b c d with values 1 2 3 4 and second one is a b c c 1 ,2,3,3 I want the final df ouput to be row.names = a b c d values 1,2,3,4 Here is a reproducable example:
df1 <- data.frame(
var1 = c(1, 2, 3, 4),
var2 = c(5, 6, 7, 8)
)
m1 <- as.matrix(df1)
rownames(m1) <- c("red", "green", "black", "white")
df2 <- data.frame(
var1 = c(9, 10, 11, 12),
var2 = c(13, 14, 15, 16)
)
m2 <- as.matrix(df2)
rownames(m2) <- c("red", "green", "black", "white")
df3 <- data.frame(
var1 = c(17, 18, 19, 20),
var2 = c(21, 22, 23, 24)
)
m3 <- as.matrix(df3)
rownames(m3) <- c("red", "green", "white", "white")
df4 <- data.frame(
var1 = c(25, 26, 27, 28),
var2 = c(29, 30, 31, 32)
)
m4 <- as.matrix(df4)
rownames(m4) <- c("black", "green", "white", "white")
# Combine the matrices into a list
list_df <- list(m1, m2, m3, m4)
I want the output to be :
dffinal <- data.frame(
mean = c(mean(red), mean(green), mean(black), mean(white)),
lower = c(lowerCI(red), lowerCI(green), lowerCI(black), lowerCI(white))
upper c(upperCI(red), upperCI(green), upperCI(black), lowerCI(white))
)
#with rownames being like this
rownames(dffinal) <- c("red", "green", "black", "white")
Upvotes: 0
Views: 75
Reputation: 79184
Something like this:
library(dplyr)
library(tidyr)
library(tibble)
bind_rows(list(df1 = df1, df2 = df2, df3 = df3, df4=df4), .id = 'id') %>%
rownames_to_column("colors") %>%
mutate(colors = gsub("([A-Za-z]+).*", "\\1", colors)) %>%
pivot_longer(starts_with("var")) %>%
group_by(id, colors) %>%
# group_by(colors) %>%
summarise( n = sum(value),
mean_value = mean(value, na.rm = TRUE),
se = sd(value, na.rm = TRUE) / sqrt(n),
ci = 1.96 * se,
lb = mean_value - ci,
ub = mean_value + ci)
)
id colors n mean_value se ci lb ub
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 df1 black 10 5 0.894 1.75 3.25 6.75
2 df1 blue 6 3 1.15 2.26 0.737 5.26
3 df1 purple 8 4 1 1.96 2.04 5.96
4 df1 white 12 6 0.816 1.60 4.40 7.60
5 df2 black 26 13 0.555 1.09 11.9 14.1
6 df2 green 24 12 0.577 1.13 10.9 13.1
7 df2 red 22 11 0.603 1.18 9.82 12.2
8 df2 white 28 14 0.535 1.05 13.0 15.0
9 df3 orange 42 21 0.436 0.855 20.1 21.9
10 df3 purple 38 19 0.459 0.899 18.1 19.9
11 df3 white 44 22 0.426 0.836 21.2 22.8
12 df3 yellow 40 20 0.447 0.877 19.1 20.9
13 df4 black 56 28 0.378 0.741 27.3 28.7
14 df4 blue 54 27 0.385 0.754 26.2 27.8
15 df4 pink 58 29 0.371 0.728 28.3 29.7
16 df4 white 60 30 0.365 0.716 29.3 30.7
Upvotes: 1
Reputation: 887681
We could do
library(purrr)
library(dplyr) #version >= 1.1.0
library(qwraps2)
library(tidyr)
map_dfr(list_df, ~ as_tibble(.x) %>%
mutate(color = row.names(.x), .before = 1) %>%
mutate(rn = row_number())) %>%
pivot_longer(cols = c(var1, var2)) %>%
group_by(color, rn) %>%
reframe(value = mean(value)) %>%
group_by(color) %>%
reframe(out = list(mean_ci(value))) %>%
unnest_wider(out)
-output
# A tibble: 4 × 4
color mean lcl ucl
<chr> <qwrps2__> <qwrps2__> <qwrps2__>
1 black 18.0 0.3603241 35.63968
2 green 16.0 NA NA
3 red 11.0 NA NA
4 white 21.5 14.6401261 28.35987
list_df <- list(structure(c(1, 2, 3, 4, 5, 6, 7, 8),
dim = c(4L, 2L), dimnames = list(
c("red", "green", "black", "white"), c("var1", "var2"))),
structure(c(9, 10, 11, 12, 13, 14, 15, 16), dim = c(4L, 2L
), dimnames = list(c("red", "green", "black", "white"), c("var1",
"var2"))), structure(c(17, 18, 19, 20, 21, 22, 23, 24), dim = c(4L,
2L), dimnames = list(c("red", "green", "white", "white"),
c("var1", "var2"))), structure(c(25, 26, 27, 28, 29,
30, 31, 32), dim = c(4L, 2L), dimnames = list(c("black",
"green", "white", "white"), c("var1", "var2"))))
Upvotes: 3