Reputation: 11
I have a dataset, which looks like
df <- data.frame(A=rnorm(10), B=rnorm(10), C=rnorm(10), D=rnorm(10), E=rnorm(10))
I need to find the four lowest entries for every row and then build the average across the four.
I tried
df %>% rowwise() %>% mutate(Min = min(A, B, C, D, E))
but I struggle to find a way for the second lowest entry, third lowest entry etc.
As I have 36 column entries and always need the work with the 4 lowest row entries, I tried to work with percentiles(4/36=1/9..) but that seems way to complicated to do.
Is there any easier way I am overlooking?
Upvotes: 0
Views: 220
Reputation: 6206
This sort of thing tends to be easier if you get the data in long form first.
library(tidyr)
library(dplyr)
df %>%
pivot_longer(everything()) %>%
group_by(name) %>%
top_n(-4) %>%
group_by(name) %>%
summarise(mean_val = mean(value))
# A tibble: 5 x 2
name mean_val
<chr> <dbl>
1 A -0.620
2 B -1.87
3 C -1.91
4 D -1.17
5 E -0.36
Upvotes: 2
Reputation: 887158
We may use pmap
to loop over the rows, sort
the vector
(c(...)
), get the first 'n' values with head
and return with the mean
to create the new column 'Average_4' in mutate
library(purrr)
library(dplyr)
df %>%
mutate(Average_4 = pmap_dbl(cur_data(), ~ mean(head(sort(c(...)), 4))))
A B C D E Average_4
1 -0.56047565 1.2240818 -1.0678237 0.42646422 -0.69470698 -0.47413553
2 -0.23017749 0.3598138 -0.2179749 -0.29507148 -0.20791728 -0.23778529
3 1.55870831 0.4007715 -1.0260044 0.89512566 -1.26539635 -0.24887592
4 0.07050839 0.1106827 -0.7288912 0.87813349 2.16895597 0.08260834
5 0.12928774 -0.5558411 -0.6250393 0.82158108 1.20796200 -0.05750290
6 1.71506499 1.7869131 -1.6866933 0.68864025 -1.12310858 -0.10152416
7 0.46091621 0.4978505 0.8377870 0.55391765 -0.40288484 0.27744988
8 -1.26506123 -1.9666172 0.1533731 -0.06191171 -0.46665535 -0.94006136
9 -0.68685285 0.7013559 -1.1381369 -0.30596266 0.77996512 -0.35739914
10 -0.44566197 -0.4727914 1.2538149 -0.38047100 -0.08336907 -0.34557336
Or this can be done in a faster way with fmean/dapply
from collapse
library(collapse)
df$Average_4 <- dapply(df, MARGIN = 1, FUN = \(x) fmean(ss(sort(x), 1:4)))
df$Average_4
#[1] -0.47413553 -0.23778529 -0.24887592 0.08260834 -0.05750290 -0.10152416 0.27744988 -0.94006136 -0.35739914 -0.34557336
set.seed(123)
df <- data.frame(A=rnorm(10), B=rnorm(10), C=rnorm(10), D=rnorm(10), E=rnorm(10))
Upvotes: 0
Reputation: 3755
With Base R,
df$New <- colMeans(apply(df,1,sort)[1:4,])
df
gives,
A B C D E New
1 -0.56047565 1.2240818 -1.0678237 0.42646422 -0.69470698 -0.47413553
2 -0.23017749 0.3598138 -0.2179749 -0.29507148 -0.20791728 -0.23778529
3 1.55870831 0.4007715 -1.0260044 0.89512566 -1.26539635 -0.24887592
4 0.07050839 0.1106827 -0.7288912 0.87813349 2.16895597 0.08260834
5 0.12928774 -0.5558411 -0.6250393 0.82158108 1.20796200 -0.05750290
6 1.71506499 1.7869131 -1.6866933 0.68864025 -1.12310858 -0.10152416
7 0.46091621 0.4978505 0.8377870 0.55391765 -0.40288484 0.27744988
8 -1.26506123 -1.9666172 0.1533731 -0.06191171 -0.46665535 -0.94006136
9 -0.68685285 0.7013559 -1.1381369 -0.30596266 0.77996512 -0.35739914
10 -0.44566197 -0.4727914 1.2538149 -0.38047100 -0.08336907 -0.34557336
Data:
set.seed(123)
df <- data.frame(A=rnorm(10), B=rnorm(10), C=rnorm(10), D=rnorm(10), E=rnorm(10))
Upvotes: 2
Reputation: 2949
Use sort function and subset the second lowest
df %>% rowwise() %>% mutate(Min = sort(c(A, B, C, D, E), FALSE)[1],
Min2 = sort(c(A, B, C, D, E), FALSE)[2])
# A tibble: 10 x 7
# Rowwise:
A B C D E Min Min2
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0.252 -1.69 -0.612 -1.54 1.50 -1.69 -1.54
2 0.642 1.25 -0.372 -1.32 -0.289 -1.32 -0.372
3 -0.347 -1.77 -0.515 -0.676 1.92 -1.77 -0.676
4 -0.515 0.223 -0.342 -0.367 0.519 -0.515 -0.367
5 0.398 -0.561 0.684 -0.514 0.477 -0.561 -0.514
6 0.954 -0.508 0.798 -0.959 0.303 -0.959 -0.508
7 0.260 -0.939 -0.800 -0.122 0.821 -0.939 -0.800
8 -2.05 1.64 0.230 0.666 0.393 -2.05 0.230
9 0.288 -0.653 1.69 1.24 -1.44 -1.44 -0.653
10 0.362 0.839 -0.768 1.18 -0.869 -0.869 -0.768
If you want to find the average of four lowest values:
df %>% rowwise() %>% mutate(Average_Min = mean(sort(c(A, B, C, D, E), FALSE)[1:4]))
# A tibble: 10 x 6
# Rowwise:
A B C D E Average_Min
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 -2.99 0.279 0.999 -0.131 0.128 -0.678
2 -0.0505 -0.0756 -1.09 -0.417 -0.225 -0.452
3 1.30 0.123 0.854 0.652 0.670 0.575
4 -1.35 -1.03 -0.627 -1.65 0.538 -1.16
5 -0.950 0.0897 0.421 -0.677 -0.0553 -0.398
6 1.69 1.32 -0.396 -1.31 0.502 0.0307
7 0.244 -0.308 -0.390 -0.405 -0.640 -0.436
8 -1.39 1.48 0.384 1.36 1.80 0.458
9 0.887 -0.470 1.66 -0.661 0.999 0.189
10 0.0282 -0.866 -1.13 -0.915 0.878 -0.720
Upvotes: 0
Reputation: 388982
You can use c_across
, sort the values, select first 4 of them and get average of them.
library(dplyr)
df <- df %>%
rowwise() %>%
mutate(Average_4 = mean(head(sort(c_across()), 4)))
df
# A B C D E Average_4
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 -0.560 1.22 -1.07 0.426 -0.695 -0.474
# 2 -0.230 0.360 -0.218 -0.295 -0.208 -0.238
# 3 1.56 0.401 -1.03 0.895 -1.27 -0.249
# 4 0.0705 0.111 -0.729 0.878 2.17 0.0826
# 5 0.129 -0.556 -0.625 0.822 1.21 -0.0575
# 6 1.72 1.79 -1.69 0.689 -1.12 -0.102
# 7 0.461 0.498 0.838 0.554 -0.403 0.277
# 8 -1.27 -1.97 0.153 -0.0619 -0.467 -0.940
# 9 -0.687 0.701 -1.14 -0.306 0.780 -0.357
#10 -0.446 -0.473 1.25 -0.380 -0.0834 -0.346
Or in base R -
df$Average_4 <- apply(df, 1, function(x) mean(head(sort(x), 4)))
data
set.seed(123)
df <- data.frame(A=rnorm(10), B=rnorm(10), C=rnorm(10), D=rnorm(10), E=rnorm(10))
Upvotes: 2