Effigy
Effigy

Reputation: 155

DescribeBy formatting output

Since I could not find a suitable solution in other questions, I hope you guys can help me. I would like the output of the describeBy() function from the psych package to not contain the rows "xxxx.Soll" and "xxxx.Transtyp", but just "xxxx.Feuchte" where xxxx is the respective number. Also, it would be nice to have the number in an extra column. I would like to have the results in a neat dataframe so I can export them as .csv or display in a nice table with rmarkdown::paged_table or whatever. This is my df:

df <- structure(list(Datum = structure(c(18703, 18703, 18703, 18703, 
18703, 18703, 18703, 18703, 18724, 18724, 18724, 18724, 18724, 
18724, 18724, 18724, 18730, 18730, 18730, 18730, 18730, 18730, 
18730, 18730, 18744, 18744, 18744, 18744, 18744, 18744, 18744, 
18744, 18758, 18758, 18758, 18758, 18758, 18758, 18758, 18758, 
18774, 18774, 18774, 18774, 18774, 18774, 18774, 18774), class = "Date"), 
    Soll = c("1189", "119", "1192", "1202", "149", "172", "2484", 
    "552", "1189", "119", "1192", "1202", "149", "172", "2484", 
    "552", "1189", "119", "1192", "1202", "149", "172", "2484", 
    "552", "1189", "119", "1192", "1202", "149", "172", "2484", 
    "552", "1189", "119", "1192", "1202", "149", "172", "2484", 
    "552", "1189", "119", "1192", "1202", "149", "172", "2484", 
    "552"), Transtyp = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("2", 
    "5"), class = "factor"), Feuchte = c(33.8375, 35.459375, 
    36.6518518518519, 36.1193548387097, 37.8310344827586, 35.8935483870968, 
    33.3625, 36.5032258064516, 26.775, 26.0064516129032, 30.128125, 
    28.50625, 23.0645161290323, 21.45625, 25.340625, 26.446875, 
    30.4375, 31.0466666666667, 32.15625, 30.715625, 29.9875, 
    31.2290322580645, 29.084375, 29.9387096774194, 12.26875, 
    12.7925925925926, 14.6516129032258, 15.428125, 13.159375, 
    13.70625, 12.89375, 14.4, 14.078125, 10.6387096774194, 13.7896551724138, 
    17.071875, 9.690625, 11.6, 10.21875, 13.225, 19.83125, 17.2851851851852, 
    17.441935483871, 19.15, 20.24375, 22.3125, 14.2741935483871, 
    17.358064516129)), row.names = c(NA, -48L), groups = structure(list(
    Datum = structure(c(18703, 18703, 18703, 18703, 18703, 18703, 
    18703, 18703, 18724, 18724, 18724, 18724, 18724, 18724, 18724, 
    18724, 18730, 18730, 18730, 18730, 18730, 18730, 18730, 18730, 
    18744, 18744, 18744, 18744, 18744, 18744, 18744, 18744, 18758, 
    18758, 18758, 18758, 18758, 18758, 18758, 18758, 18774, 18774, 
    18774, 18774, 18774, 18774, 18774, 18774), class = "Date"), 
    Soll = c("1189", "119", "1192", "1202", "149", "172", "2484", 
    "552", "1189", "119", "1192", "1202", "149", "172", "2484", 
    "552", "1189", "119", "1192", "1202", "149", "172", "2484", 
    "552", "1189", "119", "1192", "1202", "149", "172", "2484", 
    "552", "1189", "119", "1192", "1202", "149", "172", "2484", 
    "552", "1189", "119", "1192", "1202", "149", "172", "2484", 
    "552"), .rows = structure(list(1L, 2L, 3L, 4L, 5L, 6L, 7L, 
        8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
        19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 
        30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 
        41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -48L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

The code I have so far is:

df %>% ungroup() %>%
        select(-c("Datum")) %>%
        describeBy(., list(.$Soll, .$Transtyp))%>%
        do.call("rbind", .)


It just looks really messy and I cant figure out which variable belongs to which group. Also, I tried rownames_to_columns to better work with the names, but I get an error.. Any help is really appreciated! Cheers

Upvotes: 0

Views: 650

Answers (2)

Ronak Shah
Ronak Shah

Reputation: 389175

Wouldn't it be just easier to calculate all the statistics that you need for each group yourself.

library(dplyr)
library(psych)
  
result <-  df %>%
    group_by(Soll, Transtyp) %>%
    summarise(n = n(), 
              mean = mean(Feuchte, na.rm = TRUE), 
              sd = sd(Feuchte, na.rm = TRUE), 
              median = median(Feuchte, na.rm = TRUE),
              mad = mad(Feuchte, na.rm = TRUE), 
              min = min(Feuchte, na.rm = TRUE), 
              max = max(Feuchte, na.rm = TRUE), 
              median = median(Feuchte, na.rm = TRUE), 
              skew = skew(Feuchte, na.rm = TRUE), 
              kurtosis = e1071::kurtosis(Feuchte, na.rm = TRUE), 
              .groups = 'drop'
              )

result

#  Soll  Transtyp     n  mean    sd median   mad   min   max    skew kurtosis
#  <chr> <fct>    <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
#1 1189  5            6  22.9  8.85   23.3  12.1 12.3   33.8 -0.0213    -2.00
#2 119   5            6  22.2 10.1    21.6  13.5 10.6   35.5  0.0916    -2.00
#3 1192  5            6  24.1  9.99   23.8  13.0 13.8   36.7  0.0720    -2.14
#4 1202  5            6  24.5  8.44   23.8  10.1 15.4   36.1  0.157     -1.99
#5 149   5            6  22.3 10.5    21.7  12.5  9.69  37.8  0.200     -1.70
#6 172   5            6  22.7  9.51   21.9  13.0 11.6   35.9  0.159     -1.83
#7 2484  5            6  20.9  9.63   19.8  12.0 10.2   33.4  0.112     -2.07
#8 552   5            6  23.0  9.42   21.9  11.5 13.2   36.5  0.222     -1.91

You may then use write.csv or similar to write the results to csv.

Upvotes: 2

TarJae
TarJae

Reputation: 79184

Are you looking for such a solution?

library(psych)
describeBy(
  df,
 list(df$Soll, df$Transtyp) 
)

output:

 Descriptive statistics by group 
: 1189
: 2
  vars  n mean sd median trimmed mad min max range skew kurtosis se
1   NA NA   NA NA     NA      NA  NA  NA  NA    NA   NA       NA NA
-------------------------------------------------------------------------------------------------------- 
: 119
: 2
  vars  n mean sd median trimmed mad min max range skew kurtosis se
1   NA NA   NA NA     NA      NA  NA  NA  NA    NA   NA       NA NA
-------------------------------------------------------------------------------------------------------- 
: 1192
: 2
  vars  n mean sd median trimmed mad min max range skew kurtosis se
1   NA NA   NA NA     NA      NA  NA  NA  NA    NA   NA       NA NA
-------------------------------------------------------------------------------------------------------- 
: 1202
: 2
  vars  n mean sd median trimmed mad min max range skew kurtosis se
1   NA NA   NA NA     NA      NA  NA  NA  NA    NA   NA       NA NA
-------------------------------------------------------------------------------------------------------- 
: 149
: 2
  vars  n mean sd median trimmed mad min max range skew kurtosis se
1   NA NA   NA NA     NA      NA  NA  NA  NA    NA   NA       NA NA
-------------------------------------------------------------------------------------------------------- 
: 172
: 2
  vars  n mean sd median trimmed mad min max range skew kurtosis se
1   NA NA   NA NA     NA      NA  NA  NA  NA    NA   NA       NA NA
-------------------------------------------------------------------------------------------------------- 
: 2484
: 2
  vars  n mean sd median trimmed mad min max range skew kurtosis se
1   NA NA   NA NA     NA      NA  NA  NA  NA    NA   NA       NA NA
-------------------------------------------------------------------------------------------------------- 
: 552
: 2
  vars  n mean sd median trimmed mad min max range skew kurtosis se
1   NA NA   NA NA     NA      NA  NA  NA  NA    NA   NA       NA NA
-------------------------------------------------------------------------------------------------------- 
: 1189
: 5
          vars n  mean   sd median trimmed   mad   min   max range  skew kurtosis   se
Datum        1 6   NaN   NA     NA     NaN    NA   Inf  -Inf  -Inf    NA       NA   NA
Soll*        2 6  1.00 0.00    1.0    1.00  0.00  1.00  1.00  0.00   NaN      NaN 0.00
Transtyp*    3 6  2.00 0.00    2.0    2.00  0.00  2.00  2.00  0.00   NaN      NaN 0.00
Feuchte      4 6 22.87 8.85   23.3   22.87 12.13 12.27 33.84 21.57 -0.02       -2 3.61
-------------------------------------------------------------------------------------------------------- 
: 119
: 5
          vars n mean    sd median trimmed   mad   min   max range skew kurtosis   se
Datum        1 6  NaN    NA     NA     NaN    NA   Inf  -Inf  -Inf   NA       NA   NA
Soll*        2 6  1.0  0.00   1.00     1.0  0.00  1.00  1.00  0.00  NaN      NaN 0.00
Transtyp*    3 6  2.0  0.00   2.00     2.0  0.00  2.00  2.00  0.00  NaN      NaN 0.00
Feuchte      4 6 22.2 10.15  21.65    22.2 13.53 10.64 35.46 24.82 0.09       -2 4.14
-------------------------------------------------------------------------------------------------------- 
: 1192
: 5
          vars n  mean   sd median trimmed   mad   min   max range skew kurtosis   se
Datum        1 6   NaN   NA     NA     NaN    NA   Inf  -Inf  -Inf   NA       NA   NA
Soll*        2 6  1.00 0.00   1.00    1.00  0.00  1.00  1.00  0.00  NaN      NaN 0.00
Transtyp*    3 6  2.00 0.00   2.00    2.00  0.00  2.00  2.00  0.00  NaN      NaN 0.00
Feuchte      4 6 24.14 9.99  23.79   24.14 12.98 13.79 36.65 22.86 0.07    -2.14 4.08
-------------------------------------------------------------------------------------------------------- 
: 1202
: 5
          vars n mean   sd median trimmed   mad   min   max range skew kurtosis   se
Datum        1 6  NaN   NA     NA     NaN    NA   Inf  -Inf  -Inf   NA       NA   NA
Soll*        2 6  1.0 0.00   1.00     1.0  0.00  1.00  1.00  0.00  NaN      NaN 0.00
Transtyp*    3 6  2.0 0.00   2.00     2.0  0.00  2.00  2.00  0.00  NaN      NaN 0.00
Feuchte      4 6 24.5 8.44  23.83    24.5 10.11 15.43 36.12 20.69 0.16    -1.99 3.44
-------------------------------------------------------------------------------------------------------- 
: 149
: 5
          vars n  mean    sd median trimmed   mad  min   max range skew kurtosis   se
Datum        1 6   NaN    NA     NA     NaN    NA  Inf  -Inf  -Inf   NA       NA   NA
Soll*        2 6  1.00  0.00   1.00    1.00  0.00 1.00  1.00  0.00  NaN      NaN 0.00
Transtyp*    3 6  2.00  0.00   2.00    2.00  0.00 2.00  2.00  0.00  NaN      NaN 0.00
Feuchte      4 6 22.33 10.47  21.65   22.33 12.47 9.69 37.83 28.14  0.2     -1.7 4.27
-------------------------------------------------------------------------------------------------------- 
: 172
: 5
          vars n mean   sd median trimmed   mad  min   max range skew kurtosis   se
Datum        1 6  NaN   NA     NA     NaN    NA  Inf  -Inf  -Inf   NA       NA   NA
Soll*        2 6  1.0 0.00   1.00     1.0  0.00  1.0  1.00  0.00  NaN      NaN 0.00
Transtyp*    3 6  2.0 0.00   2.00     2.0  0.00  2.0  2.00  0.00  NaN      NaN 0.00
Feuchte      4 6 22.7 9.51  21.88    22.7 12.99 11.6 35.89 24.29 0.16    -1.83 3.88
-------------------------------------------------------------------------------------------------------- 
: 2484
: 5
          vars n  mean   sd median trimmed mad   min   max range skew kurtosis   se
Datum        1 6   NaN   NA     NA     NaN  NA   Inf  -Inf  -Inf   NA       NA   NA
Soll*        2 6  1.00 0.00   1.00    1.00   0  1.00  1.00  0.00  NaN      NaN 0.00
Transtyp*    3 6  2.00 0.00   2.00    2.00   0  2.00  2.00  0.00  NaN      NaN 0.00
Feuchte      4 6 20.86 9.63  19.81   20.86  12 10.22 33.36 23.14 0.11    -2.07 3.93
-------------------------------------------------------------------------------------------------------- 
: 552
: 5
          vars n  mean   sd median trimmed   mad   min  max range skew kurtosis   se
Datum        1 6   NaN   NA     NA     NaN    NA   Inf -Inf  -Inf   NA       NA   NA
Soll*        2 6  1.00 0.00    1.0    1.00  0.00  1.00  1.0  0.00  NaN      NaN 0.00
Transtyp*    3 6  2.00 0.00    2.0    2.00  0.00  2.00  2.0  0.00  NaN      NaN 0.00
Feuchte      4 6 22.98 9.42   21.9   22.98 11.52 13.22 36.5 23.28 0.22    -1.91 3.85

Upvotes: 2

Related Questions