Reputation: 7526
I have dataframe X01
whose columns I should summarize with mean
, max
and min
> head(X01)
B01002e2 B01002e3
1 39.6 47.3
2 37.0 44.8
3 52.6 49.8
4 35.5 26.7
5 39.4 23.9
6 40.8 39.8
My objective is to add min
, max
, and mean
following each column. So far, I have done this manually by rearranging column order, but I will soon have data with many columns which makes this approach very slow:
X01$B01002e2_min <- min(X01$B01002e2, na.rm = TRUE)
X01$B01002e2_max <- max(X01$B01002e2, na.rm = TRUE)
X01$B01002e2_mean <- mean(X01$B01002e2, na.rm = TRUE)
X01$B01002e3_min <- min(X01$B01002e3, na.rm = TRUE)
X01$B01002e3_max <- max(X01$B01002e3, na.rm = TRUE)
X01$B01002e3_mean <- mean(X01$B01002e3, na.rm = TRUE)
X01 <- X01[ , c(1,3,4,5,2,6,7,8)]
> head(X01)
B01002e2 B01002e2_min B01002e2_max B01002e2_mean B01002e3 B01002e3_min B01002e3_max
1 39.6 6 83.7 35.3427547 47.3 8.9 90.8
2 37.0 6 83.7 35.3427547 44.8 8.9 90.8
3 52.6 6 83.7 35.3427547 49.8 8.9 90.8
4 35.5 6 83.7 35.3427547 26.7 8.9 90.8
5 39.4 6 83.7 35.3427547 23.9 8.9 90.8
6 40.8 6 83.7 35.3427547 39.8 8.9 90.8
B01002e3_mean
1 37.6894248
2 37.6894248
3 37.6894248
4 37.6894248
5 37.6894248
6 37.6894248
Is there a solution in R to add these columns after each column being processed in one step, for example with addmargins()
?
dput(head(X01))
structure(list(B01002e2 = c(39.6, 37, 52.6, 35.5, 39.4, 40.8),
B01002e3 = c(47.3, 44.8, 49.8, 26.7, 23.9, 39.8)), .Names = c("B01002e2",
"B01002e3"), row.names = c(NA, 6L), class = "data.frame")
Upvotes: 0
Views: 71
Reputation: 93791
Here's a dplyr
approach:
library(dplyr)
X01 %>% mutate_all(funs(max, mean, min))
B01002e2 B01002e3 B01002e2_max B01002e3_max B01002e2_mean B01002e3_mean B01002e2_min B01002e3_min 1 39.6 47.3 52.6 49.8 40.81667 38.71667 35.5 23.9 2 37.0 44.8 52.6 49.8 40.81667 38.71667 35.5 23.9 3 52.6 49.8 52.6 49.8 40.81667 38.71667 35.5 23.9 4 35.5 26.7 52.6 49.8 40.81667 38.71667 35.5 23.9 5 39.4 23.9 52.6 49.8 40.81667 38.71667 35.5 23.9 6 40.8 39.8 52.6 49.8 40.81667 38.71667 35.5 23.9
If you want to ignore NA
then you can add na.rm=TRUE
:
X01[3,1] = NA
X01 %>% mutate_all(funs(max, mean, min), na.rm=TRUE)
B01002e2 B01002e3 B01002e2_max B01002e3_max B01002e2_mean B01002e3_mean B01002e2_min B01002e3_min 1 39.6 47.3 40.8 49.8 38.46 38.71667 35.5 23.9 2 37.0 44.8 40.8 49.8 38.46 38.71667 35.5 23.9 3 NA 49.8 40.8 49.8 38.46 38.71667 35.5 23.9 4 35.5 26.7 40.8 49.8 38.46 38.71667 35.5 23.9 5 39.4 23.9 40.8 49.8 38.46 38.71667 35.5 23.9 6 40.8 39.8 40.8 49.8 38.46 38.71667 35.5 23.9
If you just want the summary values as a new data frame, you can do this:
X01 %>% summarise_all(funs(max, mean, min), na.rm=TRUE)
B01002e2_max B01002e3_max B01002e2_mean B01002e3_mean B01002e2_min B01002e3_min 1 40.8 49.8 38.46 38.71667 35.5 23.9
Upvotes: 1
Reputation: 93813
Here's an attempt using a functional approach to loop over each column and function:
funs <- c("min","max","mean")
cbind(
dat,
unlist(Map(function(f,d) lapply(d,f), mget(funs, inherits=TRUE), list(dat) ), rec=FALSE)
)
# B01002e2 B01002e3 min.B01002e2 min.B01002e3 max.B01002e2 max.B01002e3 mean.B01002e2 mean.B01002e3
#1 39.6 47.3 35.5 23.9 52.6 49.8 40.81667 38.71667
#2 37.0 44.8 35.5 23.9 52.6 49.8 40.81667 38.71667
#3 52.6 49.8 35.5 23.9 52.6 49.8 40.81667 38.71667
#4 35.5 26.7 35.5 23.9 52.6 49.8 40.81667 38.71667
#5 39.4 23.9 35.5 23.9 52.6 49.8 40.81667 38.71667
#6 40.8 39.8 35.5 23.9 52.6 49.8 40.81667 38.71667
Upvotes: 1