Reputation: 306
I have a dataframe that looks like this:
data <- as.data.frame(cbind('01-01-2018' = c(1.2,3.1,0.7,-0.3,2.0), '02-01-2018' = c(-0.1, 2.4, 4.9,-3.3,-2.7), '03-01-2018' = c(3.4, -2.6, -1.8, 0.1, 0.3)))
01-01-2018 02-01-2018 03-01-2018
1 1.2 -0.1 3.4
2 3.1 2.4 -2.6
3 0.7 4.9 -1.8
4 -0.3 -3.3 0.1
5 2.0 -2.7 0.3
I want to count how many times per each row, a value is bigger than the average of the corresponding row.
data$mn <- apply(data, 1, mean)
01-01-2018 02-01-2018 03-01-2018 mn
1 1.2 -0.1 3.4 1.5000000
2 3.1 2.4 -2.6 0.9666667
3 0.7 4.9 -1.8 1.2666667
4 -0.3 -3.3 0.1 -1.1666667
5 2.0 -2.7 0.3 -0.1333333
My last attempt was the following:
df$events <- apply(data, 1, function(x) sum(x > data$mn))
uhi_events <- numeric(nrow(data))
for (i in 1:nrow(data)) {
uhi <- data[[6]][[i]][["values"]]
uhi_events[i] <- sum(uhi)
}
data$uhi_events <- uhi_events
Is there a more efficient option?
EDIT:
What if the condition is on another column, let's say data$c1, that is not obtained through a simple formula?
data$md <- apply(data, 1, median)
01-01-2018 02-01-2018 03-01-2018 md
1 1.2 -0.1 3.4 1.5000000
2 3.1 2.4 -2.6 0.9666667
3 0.7 4.9 -1.8 1.2666667
4 -0.3 -3.3 0.1 -1.1666667
5 2.0 -2.7 0.3 -0.1333333
Upvotes: 1
Views: 775
Reputation: 56229
Using rowMeans and rowSums:
data$cnt <- rowSums(data > rowMeans(data))
data
# 01-01-2018 02-01-2018 03-01-2018 cnt
# 1 1.2 -0.1 3.4 1
# 2 3.1 2.4 -2.6 2
# 3 0.7 4.9 -1.8 1
# 4 -0.3 -3.3 0.1 2
# 5 2.0 -2.7 0.3 2
If the column was already computed replace rowMeans with existing column data$c1:
#get index excluding "c1":
ix <- grep("c1", colnames(data), invert = TRUE)
data$cnt <- rowSums(data[, ix ] > data$c1)
Upvotes: 5
Reputation: 25473
Using a dplyr
approach:
library(dplyr)
data <- as.data.frame(cbind('01-01-2018' = c(1.2,3.1,0.7,-0.3,2.0), '02-01-2018' = c(-0.1, 2.4, 4.9,-3.3,-2.7), '03-01-2018' = c(3.4, -2.6, -1.8, 0.1, 0.3)))
data$mm <- apply(data,1,median)
data %>%
rowwise %>%
mutate(count = sum(c_across(1:3) > mm))
#> # A tibble: 5 × 5
#> # Rowwise:
#> `01-01-2018` `02-01-2018` `03-01-2018` mm count
#> <dbl> <dbl> <dbl> <dbl> <int>
#> 1 1.2 -0.1 3.4 1.2 1
#> 2 3.1 2.4 -2.6 2.4 1
#> 3 0.7 4.9 -1.8 0.7 1
#> 4 -0.3 -3.3 0.1 -0.3 1
#> 5 2 -2.7 0.3 0.3 1
Upvotes: 2
Reputation: 27802
library(data.table)
setDT(data)
data[, above_mean := rowSums(.SD > rowMeans(.SD))]
# 01-01-2018 02-01-2018 03-01-2018 above_mean
# 1: 1.2 -0.1 3.4 1
# 2: 3.1 2.4 -2.6 2
# 3: 0.7 4.9 -1.8 1
# 4: -0.3 -3.3 0.1 2
# 5: 2.0 -2.7 0.3 2
edit for question in comments
compare to value in first column
data[, above_col1 := rowSums(.SD > `01-01-2018`)]
# 01-01-2018 02-01-2018 03-01-2018 above_col1
# 1: 1.2 -0.1 3.4 1
# 2: 3.1 2.4 -2.6 0
# 3: 0.7 4.9 -1.8 1
# 4: -0.3 -3.3 0.1 1
# 5: 2.0 -2.7 0.3 0
Upvotes: 3
Reputation: 4169
Using a user defined function to sum from a logical operation (logical vector is coerced by sum()
to an integer vector such that TRUE
= 1 and FALSE
= 0)
data$uhi_events <-
apply(data, 1, function(i){
sum(i>mean(i))
})
Upvotes: 3