Ktass
Ktass

Reputation: 47

Creating a dataframe from vectors for mean, min, and max in r

I'm trying to create a new dataframe that contains the mean, min, and max from an existing dataframe (top_SIM_weekly) coupled with another dataframe (df_obs_weekly).

I've included reproducible datasets: dput(top_SIM_weekly)

structure(list(X1 = c(18.3039606, 18.3039606, 18.3039606, 18.3039606, 
    18.3039606, 18.3039606), X2 = c(30.73478281, 30.73478281, 30.73478281, 
    30.73478281, 30.73478281, 30.73478281), X3 = c(50.88565679, 50.88565679, 
    50.88565679, 50.88565679, 50.88565679, 50.88565679), X4 = c(35.40195165, 
    35.40195165, 35.40195165, 35.40195165, 35.40195165, 35.40195165 ), X5 = c(23.12681287, 23.12681287, 23.12681287, 23.12681287, 
    23.12681287, 23.12681287), X6 = c(41.35229054, 41.35229054, 41.35229054, 
    41.35229054, 41.35229054, 41.35229054), X7 = c(67.48505048, 67.48505048, 
    67.48505048, 67.48505048, 67.48505048, 67.48505048), X8 = c(53.04008984, 
    53.04008984, 53.04008984, 53.04008984, 53.04008984, 53.04008984 ), X9 = c(58.26971959, 58.26971959, 58.26971959, 58.26971959, 
    58.26971959, 58.26971959), X10 = c(111.1841342, 111.1841342, 
    111.1841342, 111.1841342, 111.1841342, 111.1841342), X11 = c(100.7146155, 
    100.7146155, 100.7146155, 100.7146155, 100.7146155, 100.7146155 ), X12 = c(224.9074216, 224.9074216, 224.9074216, 224.9074216, 
    224.9074216, 224.9074216), X13 = c(255.2490203, 255.2490203, 
    255.2490203, 255.2490203, 255.2490203, 255.2490203), X14 = c(404.1588161, 
    404.1588161, 404.1588161, 404.1588161, 404.1588161, 404.1588161 ), X15 = c(443.9414483, 443.9414483, 443.9414483, 443.9414483, 
    443.9414483, 443.9414483), X16 = c(886.3440158, 886.3440158, 
    886.3440158, 886.3440158, 886.3440158, 886.3440158), X17 = c(1120.305186, 
    1120.305186, 1120.305186, 1120.305186, 1120.305186, 1120.305186 ), X18 = c(1295.250279, 1295.250279, 1295.250279, 1295.250279, 
    1295.250279, 1295.250279), X19 = c(1227.257398, 1227.257398, 
    1227.257398, 1227.257398, 1227.257398, 1227.257398), X20 = c(462.4336653, 
    462.4336653, 462.4336653, 462.4336653, 462.4336653, 462.4336653 ), X21 = c(533.812084, 533.812084, 533.812084, 533.812084, 533.812084, 
    533.812084), X22 = c(-0.895926113, -0.895926113, -0.895926113, 
    -0.895926113, -0.895926113, -0.895926113), X23 = c(0, 0, 0, 0,  0, 0), X24 = c(0.00587596, 0.00587596, 0.00587596, 0.00587596, 
    0.00587596, 0.00587596), X25 = c(0, 0, 0, 0, 0, 0), X26 = c(0,  0, 0, 0, 0, 0), X27 = c(0, 0, 0, 0, 0, 0), X28 = c(0, 0, 0, 0,  0, 0), X29 = c(0, 0, 0, 0, 0, 0), X30 = c(0, 0, 0, 0, 0, 0), 
        X31 = c(0, 0, 0, 0, 0, 0), X32 = c(0, 0, 0, 0, 0, 0), X33 = c(0, 
        0, 0, 0, 0, 0), X34 = c(0, 0, 0, 0, 0, 0), X35 = c(0, 0, 
        0, 0, 0, 0), X36 = c(0, 0, 0, 0, 0, 0), X37 = c(0, 0, 0, 
        0, 0, 0), X38 = c(0, 0, 0, 0, 0, 0), X39 = c(0, 0, 0, 0, 
        0, 0), X40 = c(0, 0, 0, 0, 0, 0), X41 = c(0.076302837, 0.076302837, 
        0.076302837, 0.076302837, 0.076302837, 0.076302837), X42 = c(0.04497379, 
        0.04497379, 0.04497379, 0.04497379, 0.04497379, 0.04497379
        ), X43 = c(0.47941265, 0.47941265, 0.47941265, 0.47941265, 
        0.47941265, 0.47941265), X44 = c(1.70405395, 1.70405395, 
        1.70405395, 1.70405395, 1.70405395, 1.70405395), X45 = c(1.069342585, 
        1.069342585, 1.069342585, 1.069342585, 1.069342585, 1.069342585
        ), X46 = c(0.642559657, 0.642559657, 0.642559657, 0.642559657, 
        0.642559657, 0.642559657), X47 = c(1.423669878, 1.423669878, 
        1.423669878, 1.423669878, 1.423669878, 1.423669878), X48 = c(2.947726076, 
        2.947726076, 2.947726076, 2.947726076, 2.947726076, 2.947726076
        ), X49 = c(2.083167482, 2.083167482, 2.083167482, 2.083167482, 
        2.083167482, 2.083167482), X50 = c(3.001766438, 3.001766438, 
        3.001766438, 3.001766438, 3.001766438, 3.001766438), X51 = c(4.02263428, 
        4.02263428, 4.02263428, 4.02263428, 4.02263428, 4.02263428
        ), X52 = c(5.236041751, 5.236041751, 5.236041751, 5.236041751, 
        5.236041751, 5.236041751), year = c(2018, 2018, 2018, 2018, 
        2018, 2018), corr = c(0.598566601816163, 0.598566601816163, 
        0.598566601816163, 0.598566601816163, 0.598566601816163, 
        0.598566601816163)), row.names = 272:277, class = "data.frame")

And dput(df_obs_weekly)

structure(list(Epiweek = structure(1L, .Label = "n", class = "factor"), 
X1 = 119L, X2 = 103L, X3 = 96L, X4 = 99L, X5 = 53L, X6 = 91L, 
X7 = 94L, X8 = 101L, X9 = 106L, X10 = 132L, X11 = 134L, X12 = 164L, 
X13 = 189L, X14 = 275L, X15 = 302L, X16 = 416L, X17 = 493L, 
X18 = 201L, X19 = NaN, X20 = NaN, X21 = 410L, X22 = 370L, 
X23 = 426L, X24 = 311L, X25 = 253L, X26 = 264L, X27 = 175L, 
X28 = 158L, X29 = 128L, X30 = 124L, X31 = 96L, X32 = 83L, 
X33 = 69L, X34 = 47L, X35 = 72L, X36 = 64L, X37 = 62L, X38 = 61L, 
X39 = 53L, X40 = 70L, X41 = 75L, X42 = 62L, X43 = 75L, X44 = 44L, 
X45 = 66L, X46 = 86L, X47 = 91L, X48 = 88L, X49 = 86L, X50 = 135L, 
X51 = 95L, X52 = 99L, year = 2018, corr = 1), row.names = 1L, class = "data.frame")

So far I've attempted to create a matrix from vectors for mean, min and max using the following code:

mean <- top_SIM_weekly %>% summarise_if(is.numeric, mean)
max <- top_SIM_weekly %>% summarise_if(is.numeric, max)
min <- top_SIM_weekly %>% summarise_if(is.numeric, min)
y <- rbind(mean,max,min)
row.names(y) <- c("mean","max","min")

but I'm getting the following error :Error: expecting a one sided formula, a function, or a function name. This error wasn't coming up before so I'm not sure what happened. I also would like to switch the rows and columns for the final dataframe with the first row as column names.

Upvotes: 0

Views: 618

Answers (2)

곰가드
곰가드

Reputation: 11

c(mean, sum, min) %>% 
  map_df(function(.x){
          top_SIM_weekly %>% 
            group_by(year,corr) %>% 
            summarise_at(vars(starts_with('X')), .x) %>% 
            return()
        }) %>% 
  bind_rows() 

You can use map_df function with function_names. This code is not fit your request. But I believe it can be help

Upvotes: 0

akrun
akrun

Reputation: 887991

Perhaps, we need

rbind(mean = x1, max = x2, min = x3)

as the mean, max, min objects are not created for to rbind


As the 'year', and 'corr' variables are also numeric, it may be better to use starts_with

library(dplyr)
library(tidyr)
top_SIM_weekly %>%
   summarise(across(starts_with('X'),  ~
           list(c(mean = mean(.), max = max(.), min = min(.))))) %>% 
    unnest(everything())

Another way would be to select the numeric variables, pivot to 'long' format with pivot_longer and do a group by mean, min, max

library(dplyr) #1.0.0
library(tidyr)
library(tibble)
top_SIM_weekly %>%
     select(where(is.numeric))  %>%
     pivot_longer(cols = everything()) %>% 
     group_by(name) %>% 
     summarise(Mean = mean(value), Max = max(value), Min = min(value))%>% 
     column_to_rownames('name') %>% 
     t

Upvotes: 1

Related Questions