iskandarblue
iskandarblue

Reputation: 7526

Adding summary columns programmatically

I have dataframe X01 whose columns I should summarize with mean, max and min

> head(X01)
  B01002e2 B01002e3
1     39.6     47.3
2     37.0     44.8
3     52.6     49.8
4     35.5     26.7
5     39.4     23.9
6     40.8     39.8

My objective is to add min, max, and mean following each column. So far, I have done this manually by rearranging column order, but I will soon have data with many columns which makes this approach very slow:

X01$B01002e2_min <- min(X01$B01002e2, na.rm = TRUE)
X01$B01002e2_max <- max(X01$B01002e2, na.rm = TRUE)
X01$B01002e2_mean <- mean(X01$B01002e2, na.rm = TRUE)
X01$B01002e3_min <- min(X01$B01002e3, na.rm = TRUE)
X01$B01002e3_max <- max(X01$B01002e3, na.rm = TRUE)
X01$B01002e3_mean <- mean(X01$B01002e3, na.rm = TRUE)

X01 <- X01[ , c(1,3,4,5,2,6,7,8)]

> head(X01)
  B01002e2 B01002e2_min B01002e2_max B01002e2_mean B01002e3 B01002e3_min B01002e3_max
1     39.6            6         83.7    35.3427547     47.3          8.9         90.8
2     37.0            6         83.7    35.3427547     44.8          8.9         90.8
3     52.6            6         83.7    35.3427547     49.8          8.9         90.8
4     35.5            6         83.7    35.3427547     26.7          8.9         90.8
5     39.4            6         83.7    35.3427547     23.9          8.9         90.8
6     40.8            6         83.7    35.3427547     39.8          8.9         90.8
  B01002e3_mean
1    37.6894248
2    37.6894248
3    37.6894248
4    37.6894248
5    37.6894248
6    37.6894248

Is there a solution in R to add these columns after each column being processed in one step, for example with addmargins() ?

dput(head(X01))
    structure(list(B01002e2 = c(39.6, 37, 52.6, 35.5, 39.4, 40.8), 
        B01002e3 = c(47.3, 44.8, 49.8, 26.7, 23.9, 39.8)), .Names = c("B01002e2", 
    "B01002e3"), row.names = c(NA, 6L), class = "data.frame")

Upvotes: 0

Views: 71

Answers (2)

eipi10
eipi10

Reputation: 93791

Here's a dplyr approach:

library(dplyr)

X01 %>% mutate_all(funs(max, mean, min))
  B01002e2 B01002e3 B01002e2_max B01002e3_max B01002e2_mean B01002e3_mean B01002e2_min B01002e3_min
1     39.6     47.3         52.6         49.8      40.81667      38.71667         35.5         23.9
2     37.0     44.8         52.6         49.8      40.81667      38.71667         35.5         23.9
3     52.6     49.8         52.6         49.8      40.81667      38.71667         35.5         23.9
4     35.5     26.7         52.6         49.8      40.81667      38.71667         35.5         23.9
5     39.4     23.9         52.6         49.8      40.81667      38.71667         35.5         23.9
6     40.8     39.8         52.6         49.8      40.81667      38.71667         35.5         23.9

If you want to ignore NA then you can add na.rm=TRUE:

X01[3,1] = NA

X01 %>% mutate_all(funs(max, mean, min), na.rm=TRUE)
  B01002e2 B01002e3 B01002e2_max B01002e3_max B01002e2_mean B01002e3_mean B01002e2_min B01002e3_min
1     39.6     47.3         40.8         49.8         38.46      38.71667         35.5         23.9
2     37.0     44.8         40.8         49.8         38.46      38.71667         35.5         23.9
3       NA     49.8         40.8         49.8         38.46      38.71667         35.5         23.9
4     35.5     26.7         40.8         49.8         38.46      38.71667         35.5         23.9
5     39.4     23.9         40.8         49.8         38.46      38.71667         35.5         23.9
6     40.8     39.8         40.8         49.8         38.46      38.71667         35.5         23.9

If you just want the summary values as a new data frame, you can do this:

X01 %>% summarise_all(funs(max, mean, min), na.rm=TRUE)
  B01002e2_max B01002e3_max B01002e2_mean B01002e3_mean B01002e2_min B01002e3_min
1         40.8         49.8         38.46      38.71667         35.5         23.9

Upvotes: 1

thelatemail
thelatemail

Reputation: 93813

Here's an attempt using a functional approach to loop over each column and function:

funs <- c("min","max","mean")
cbind(
  dat,
  unlist(Map(function(f,d) lapply(d,f), mget(funs, inherits=TRUE), list(dat) ), rec=FALSE)
)
#  B01002e2 B01002e3 min.B01002e2 min.B01002e3 max.B01002e2 max.B01002e3 mean.B01002e2 mean.B01002e3
#1     39.6     47.3         35.5         23.9         52.6         49.8      40.81667      38.71667
#2     37.0     44.8         35.5         23.9         52.6         49.8      40.81667      38.71667
#3     52.6     49.8         35.5         23.9         52.6         49.8      40.81667      38.71667
#4     35.5     26.7         35.5         23.9         52.6         49.8      40.81667      38.71667
#5     39.4     23.9         35.5         23.9         52.6         49.8      40.81667      38.71667
#6     40.8     39.8         35.5         23.9         52.6         49.8      40.81667      38.71667

Upvotes: 1

Related Questions