Reputation: 155
Since I could not find a suitable solution in other questions, I hope you guys can help me. I would like the output of the describeBy() function from the psych package to not contain the rows "xxxx.Soll" and "xxxx.Transtyp", but just "xxxx.Feuchte" where xxxx is the respective number. Also, it would be nice to have the number in an extra column. I would like to have the results in a neat dataframe so I can export them as .csv or display in a nice table with rmarkdown::paged_table or whatever. This is my df:
df <- structure(list(Datum = structure(c(18703, 18703, 18703, 18703,
18703, 18703, 18703, 18703, 18724, 18724, 18724, 18724, 18724,
18724, 18724, 18724, 18730, 18730, 18730, 18730, 18730, 18730,
18730, 18730, 18744, 18744, 18744, 18744, 18744, 18744, 18744,
18744, 18758, 18758, 18758, 18758, 18758, 18758, 18758, 18758,
18774, 18774, 18774, 18774, 18774, 18774, 18774, 18774), class = "Date"),
Soll = c("1189", "119", "1192", "1202", "149", "172", "2484",
"552", "1189", "119", "1192", "1202", "149", "172", "2484",
"552", "1189", "119", "1192", "1202", "149", "172", "2484",
"552", "1189", "119", "1192", "1202", "149", "172", "2484",
"552", "1189", "119", "1192", "1202", "149", "172", "2484",
"552", "1189", "119", "1192", "1202", "149", "172", "2484",
"552"), Transtyp = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("2",
"5"), class = "factor"), Feuchte = c(33.8375, 35.459375,
36.6518518518519, 36.1193548387097, 37.8310344827586, 35.8935483870968,
33.3625, 36.5032258064516, 26.775, 26.0064516129032, 30.128125,
28.50625, 23.0645161290323, 21.45625, 25.340625, 26.446875,
30.4375, 31.0466666666667, 32.15625, 30.715625, 29.9875,
31.2290322580645, 29.084375, 29.9387096774194, 12.26875,
12.7925925925926, 14.6516129032258, 15.428125, 13.159375,
13.70625, 12.89375, 14.4, 14.078125, 10.6387096774194, 13.7896551724138,
17.071875, 9.690625, 11.6, 10.21875, 13.225, 19.83125, 17.2851851851852,
17.441935483871, 19.15, 20.24375, 22.3125, 14.2741935483871,
17.358064516129)), row.names = c(NA, -48L), groups = structure(list(
Datum = structure(c(18703, 18703, 18703, 18703, 18703, 18703,
18703, 18703, 18724, 18724, 18724, 18724, 18724, 18724, 18724,
18724, 18730, 18730, 18730, 18730, 18730, 18730, 18730, 18730,
18744, 18744, 18744, 18744, 18744, 18744, 18744, 18744, 18758,
18758, 18758, 18758, 18758, 18758, 18758, 18758, 18774, 18774,
18774, 18774, 18774, 18774, 18774, 18774), class = "Date"),
Soll = c("1189", "119", "1192", "1202", "149", "172", "2484",
"552", "1189", "119", "1192", "1202", "149", "172", "2484",
"552", "1189", "119", "1192", "1202", "149", "172", "2484",
"552", "1189", "119", "1192", "1202", "149", "172", "2484",
"552", "1189", "119", "1192", "1202", "149", "172", "2484",
"552", "1189", "119", "1192", "1202", "149", "172", "2484",
"552"), .rows = structure(list(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L,
41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -48L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
The code I have so far is:
df %>% ungroup() %>%
select(-c("Datum")) %>%
describeBy(., list(.$Soll, .$Transtyp))%>%
do.call("rbind", .)
It just looks really messy and I cant figure out which variable belongs to which group. Also, I tried rownames_to_columns to better work with the names, but I get an error.. Any help is really appreciated! Cheers
Upvotes: 0
Views: 650
Reputation: 389175
Wouldn't it be just easier to calculate all the statistics that you need for each group yourself.
library(dplyr)
library(psych)
result <- df %>%
group_by(Soll, Transtyp) %>%
summarise(n = n(),
mean = mean(Feuchte, na.rm = TRUE),
sd = sd(Feuchte, na.rm = TRUE),
median = median(Feuchte, na.rm = TRUE),
mad = mad(Feuchte, na.rm = TRUE),
min = min(Feuchte, na.rm = TRUE),
max = max(Feuchte, na.rm = TRUE),
median = median(Feuchte, na.rm = TRUE),
skew = skew(Feuchte, na.rm = TRUE),
kurtosis = e1071::kurtosis(Feuchte, na.rm = TRUE),
.groups = 'drop'
)
result
# Soll Transtyp n mean sd median mad min max skew kurtosis
# <chr> <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 1189 5 6 22.9 8.85 23.3 12.1 12.3 33.8 -0.0213 -2.00
#2 119 5 6 22.2 10.1 21.6 13.5 10.6 35.5 0.0916 -2.00
#3 1192 5 6 24.1 9.99 23.8 13.0 13.8 36.7 0.0720 -2.14
#4 1202 5 6 24.5 8.44 23.8 10.1 15.4 36.1 0.157 -1.99
#5 149 5 6 22.3 10.5 21.7 12.5 9.69 37.8 0.200 -1.70
#6 172 5 6 22.7 9.51 21.9 13.0 11.6 35.9 0.159 -1.83
#7 2484 5 6 20.9 9.63 19.8 12.0 10.2 33.4 0.112 -2.07
#8 552 5 6 23.0 9.42 21.9 11.5 13.2 36.5 0.222 -1.91
You may then use write.csv
or similar to write the results to csv.
Upvotes: 2
Reputation: 79184
Are you looking for such a solution?
library(psych)
describeBy(
df,
list(df$Soll, df$Transtyp)
)
output:
Descriptive statistics by group
: 1189
: 2
vars n mean sd median trimmed mad min max range skew kurtosis se
1 NA NA NA NA NA NA NA NA NA NA NA NA NA
--------------------------------------------------------------------------------------------------------
: 119
: 2
vars n mean sd median trimmed mad min max range skew kurtosis se
1 NA NA NA NA NA NA NA NA NA NA NA NA NA
--------------------------------------------------------------------------------------------------------
: 1192
: 2
vars n mean sd median trimmed mad min max range skew kurtosis se
1 NA NA NA NA NA NA NA NA NA NA NA NA NA
--------------------------------------------------------------------------------------------------------
: 1202
: 2
vars n mean sd median trimmed mad min max range skew kurtosis se
1 NA NA NA NA NA NA NA NA NA NA NA NA NA
--------------------------------------------------------------------------------------------------------
: 149
: 2
vars n mean sd median trimmed mad min max range skew kurtosis se
1 NA NA NA NA NA NA NA NA NA NA NA NA NA
--------------------------------------------------------------------------------------------------------
: 172
: 2
vars n mean sd median trimmed mad min max range skew kurtosis se
1 NA NA NA NA NA NA NA NA NA NA NA NA NA
--------------------------------------------------------------------------------------------------------
: 2484
: 2
vars n mean sd median trimmed mad min max range skew kurtosis se
1 NA NA NA NA NA NA NA NA NA NA NA NA NA
--------------------------------------------------------------------------------------------------------
: 552
: 2
vars n mean sd median trimmed mad min max range skew kurtosis se
1 NA NA NA NA NA NA NA NA NA NA NA NA NA
--------------------------------------------------------------------------------------------------------
: 1189
: 5
vars n mean sd median trimmed mad min max range skew kurtosis se
Datum 1 6 NaN NA NA NaN NA Inf -Inf -Inf NA NA NA
Soll* 2 6 1.00 0.00 1.0 1.00 0.00 1.00 1.00 0.00 NaN NaN 0.00
Transtyp* 3 6 2.00 0.00 2.0 2.00 0.00 2.00 2.00 0.00 NaN NaN 0.00
Feuchte 4 6 22.87 8.85 23.3 22.87 12.13 12.27 33.84 21.57 -0.02 -2 3.61
--------------------------------------------------------------------------------------------------------
: 119
: 5
vars n mean sd median trimmed mad min max range skew kurtosis se
Datum 1 6 NaN NA NA NaN NA Inf -Inf -Inf NA NA NA
Soll* 2 6 1.0 0.00 1.00 1.0 0.00 1.00 1.00 0.00 NaN NaN 0.00
Transtyp* 3 6 2.0 0.00 2.00 2.0 0.00 2.00 2.00 0.00 NaN NaN 0.00
Feuchte 4 6 22.2 10.15 21.65 22.2 13.53 10.64 35.46 24.82 0.09 -2 4.14
--------------------------------------------------------------------------------------------------------
: 1192
: 5
vars n mean sd median trimmed mad min max range skew kurtosis se
Datum 1 6 NaN NA NA NaN NA Inf -Inf -Inf NA NA NA
Soll* 2 6 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN NaN 0.00
Transtyp* 3 6 2.00 0.00 2.00 2.00 0.00 2.00 2.00 0.00 NaN NaN 0.00
Feuchte 4 6 24.14 9.99 23.79 24.14 12.98 13.79 36.65 22.86 0.07 -2.14 4.08
--------------------------------------------------------------------------------------------------------
: 1202
: 5
vars n mean sd median trimmed mad min max range skew kurtosis se
Datum 1 6 NaN NA NA NaN NA Inf -Inf -Inf NA NA NA
Soll* 2 6 1.0 0.00 1.00 1.0 0.00 1.00 1.00 0.00 NaN NaN 0.00
Transtyp* 3 6 2.0 0.00 2.00 2.0 0.00 2.00 2.00 0.00 NaN NaN 0.00
Feuchte 4 6 24.5 8.44 23.83 24.5 10.11 15.43 36.12 20.69 0.16 -1.99 3.44
--------------------------------------------------------------------------------------------------------
: 149
: 5
vars n mean sd median trimmed mad min max range skew kurtosis se
Datum 1 6 NaN NA NA NaN NA Inf -Inf -Inf NA NA NA
Soll* 2 6 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN NaN 0.00
Transtyp* 3 6 2.00 0.00 2.00 2.00 0.00 2.00 2.00 0.00 NaN NaN 0.00
Feuchte 4 6 22.33 10.47 21.65 22.33 12.47 9.69 37.83 28.14 0.2 -1.7 4.27
--------------------------------------------------------------------------------------------------------
: 172
: 5
vars n mean sd median trimmed mad min max range skew kurtosis se
Datum 1 6 NaN NA NA NaN NA Inf -Inf -Inf NA NA NA
Soll* 2 6 1.0 0.00 1.00 1.0 0.00 1.0 1.00 0.00 NaN NaN 0.00
Transtyp* 3 6 2.0 0.00 2.00 2.0 0.00 2.0 2.00 0.00 NaN NaN 0.00
Feuchte 4 6 22.7 9.51 21.88 22.7 12.99 11.6 35.89 24.29 0.16 -1.83 3.88
--------------------------------------------------------------------------------------------------------
: 2484
: 5
vars n mean sd median trimmed mad min max range skew kurtosis se
Datum 1 6 NaN NA NA NaN NA Inf -Inf -Inf NA NA NA
Soll* 2 6 1.00 0.00 1.00 1.00 0 1.00 1.00 0.00 NaN NaN 0.00
Transtyp* 3 6 2.00 0.00 2.00 2.00 0 2.00 2.00 0.00 NaN NaN 0.00
Feuchte 4 6 20.86 9.63 19.81 20.86 12 10.22 33.36 23.14 0.11 -2.07 3.93
--------------------------------------------------------------------------------------------------------
: 552
: 5
vars n mean sd median trimmed mad min max range skew kurtosis se
Datum 1 6 NaN NA NA NaN NA Inf -Inf -Inf NA NA NA
Soll* 2 6 1.00 0.00 1.0 1.00 0.00 1.00 1.0 0.00 NaN NaN 0.00
Transtyp* 3 6 2.00 0.00 2.0 2.00 0.00 2.00 2.0 0.00 NaN NaN 0.00
Feuchte 4 6 22.98 9.42 21.9 22.98 11.52 13.22 36.5 23.28 0.22 -1.91 3.85
Upvotes: 2