nflore
nflore

Reputation: 306

How to count number of columns by condition on another column

I have a dataframe that looks like this:

data <- as.data.frame(cbind('01-01-2018' = c(1.2,3.1,0.7,-0.3,2.0), '02-01-2018' = c(-0.1, 2.4, 4.9,-3.3,-2.7), '03-01-2018' = c(3.4, -2.6, -1.8, 0.1, 0.3)))

  01-01-2018  02-01-2018  03-01-2018
1      1.2       -0.1        3.4
2      3.1        2.4       -2.6
3      0.7        4.9       -1.8
4     -0.3       -3.3        0.1
5      2.0       -2.7        0.3

I want to count how many times per each row, a value is bigger than the average of the corresponding row.

data$mn <- apply(data, 1, mean) 

  01-01-2018 02-01-2018 03-01-2018         mn
1        1.2       -0.1        3.4  1.5000000
2        3.1        2.4       -2.6  0.9666667
3        0.7        4.9       -1.8  1.2666667
4       -0.3       -3.3        0.1 -1.1666667
5        2.0       -2.7        0.3 -0.1333333

My last attempt was the following:

df$events <- apply(data, 1, function(x) sum(x > data$mn))

uhi_events <- numeric(nrow(data))

for (i in 1:nrow(data)) {
  
  uhi <- data[[6]][[i]][["values"]]
  uhi_events[i] <- sum(uhi)
  
}

data$uhi_events <- uhi_events

Is there a more efficient option?

EDIT:

What if the condition is on another column, let's say data$c1, that is not obtained through a simple formula?

data$md <- apply(data, 1, median) 

  01-01-2018 02-01-2018 03-01-2018         md
1        1.2       -0.1        3.4  1.5000000
2        3.1        2.4       -2.6  0.9666667
3        0.7        4.9       -1.8  1.2666667
4       -0.3       -3.3        0.1 -1.1666667
5        2.0       -2.7        0.3 -0.1333333

Upvotes: 1

Views: 775

Answers (4)

zx8754
zx8754

Reputation: 56229

Using rowMeans and rowSums:

data$cnt <- rowSums(data > rowMeans(data))

data
#   01-01-2018 02-01-2018 03-01-2018 cnt
# 1        1.2       -0.1        3.4   1
# 2        3.1        2.4       -2.6   2
# 3        0.7        4.9       -1.8   1
# 4       -0.3       -3.3        0.1   2
# 5        2.0       -2.7        0.3   2

If the column was already computed replace rowMeans with existing column data$c1:

#get index excluding "c1":
ix <- grep("c1", colnames(data), invert = TRUE)
data$cnt <- rowSums(data[, ix ] > data$c1)

Upvotes: 5

PaulS
PaulS

Reputation: 25473

Using a dplyr approach:

library(dplyr)

data <- as.data.frame(cbind('01-01-2018' = c(1.2,3.1,0.7,-0.3,2.0), '02-01-2018' = c(-0.1, 2.4, 4.9,-3.3,-2.7), '03-01-2018' = c(3.4, -2.6, -1.8, 0.1, 0.3)))

data$mm <- apply(data,1,median)

data %>% 
  rowwise %>% 
  mutate(count = sum(c_across(1:3) > mm))

#> # A tibble: 5 × 5
#> # Rowwise: 
#>   `01-01-2018` `02-01-2018` `03-01-2018`    mm count
#>          <dbl>        <dbl>        <dbl> <dbl> <int>
#> 1          1.2         -0.1          3.4   1.2     1
#> 2          3.1          2.4         -2.6   2.4     1
#> 3          0.7          4.9         -1.8   0.7     1
#> 4         -0.3         -3.3          0.1  -0.3     1
#> 5          2           -2.7          0.3   0.3     1

Upvotes: 2

Wimpel
Wimpel

Reputation: 27802

library(data.table)
setDT(data)
data[, above_mean := rowSums(.SD > rowMeans(.SD))]
#    01-01-2018 02-01-2018 03-01-2018 above_mean
# 1:        1.2       -0.1        3.4          1
# 2:        3.1        2.4       -2.6          2
# 3:        0.7        4.9       -1.8          1
# 4:       -0.3       -3.3        0.1          2
# 5:        2.0       -2.7        0.3          2

edit for question in comments
compare to value in first column

data[, above_col1 := rowSums(.SD > `01-01-2018`)]
#    01-01-2018 02-01-2018 03-01-2018      above_col1
# 1:        1.2       -0.1        3.4               1
# 2:        3.1        2.4       -2.6               0
# 3:        0.7        4.9       -1.8               1
# 4:       -0.3       -3.3        0.1               1
# 5:        2.0       -2.7        0.3               0

Upvotes: 3

rg255
rg255

Reputation: 4169

Using a user defined function to sum from a logical operation (logical vector is coerced by sum() to an integer vector such that TRUE = 1 and FALSE = 0)

data$uhi_events <- 
  apply(data, 1, function(i){
    sum(i>mean(i))
  })

Upvotes: 3

Related Questions