Maya_Cent
Maya_Cent

Reputation: 481

Count number of columns with specific value for each distict group

I look for to know how many columns (df[,3:length(df)]) are not all zero for each unique group of column ID. The result would be stored as a new df.

> df
ID     INDV     tra1   tr2   tr3  tra2   tr15   tr1b  
ENS777   1       1.2    0     0   1.6    3.3    0
ENS777   2       1.2    0     0   0.0    3.3    0
ENS777   3       1.2    0     0   0.0    3.3    0
ENS777   4       1.2    0     0   1.6    3.3    0
ENS999   1        0     0     0    0      0     0
ENS999   2        0     0     0    0      0     0
ENS999   3        0     0     0    0      0     0
ENS999   4        0     0     0    0      0     0
ENS888   1       1.2    0     0   1.6    3.3    0
ENS888   2       1.2    0     0   1.6    3.3   1.2
ENS888   3       1.2    0     0   1.6    3.3    0
ENS888   4       1.2    0     0   1.6    3.3    0 

 > df_out
ID      non_zero      
ENS777    3 
ENS999    0
ENS888    4

Upvotes: 1

Views: 157

Answers (3)

Sotos
Sotos

Reputation: 51582

You can use agregate to get the max of non-zero rows. Those rows are found by rowSums(df[3:ncol(df)] > 0),

setNames(aggregate(rowSums(df[3:ncol(df)] > 0) ~ ID, df, max), c('ID', 'non_zeros'))

#      ID non_zeros
#1 ENS777         3
#2 ENS888         4
#3 ENS999         0

Upvotes: 1

TarJae
TarJae

Reputation: 78917

We could use all:

library(dplyr)

df %>% 
    group_by(ID) %>% 
    summarise(across(starts_with("tr"), ~all(. == 0))) %>% 
    rowwise() %>%
    mutate(non_zero = sum(across(starts_with("tr")) == FALSE)) %>% 
    select(ID, non_zero)

Output:

  ID     non_zero
  <chr>     <int>
1 ENS777        3
2 ENS888        4
3 ENS999        0

Upvotes: 1

Yuriy Saraykin
Yuriy Saraykin

Reputation: 8880

tidyverse

library(dplyr)
df %>% 
  group_by(ID) %>% 
  summarise(across(starts_with("tr"), ~sum(.x) != 0)) %>% 
  transmute(ID, non_zero = rowSums(select(., starts_with("tr"))))

# A tibble: 3 x 2
  ID     non_zero
  <chr>     <dbl>
1 ENS777        3
2 ENS888        4
3 ENS999        0

data.table

library(data.table)
library(magrittr)
COLS <- grep("^tr", names(df), value = TRUE)
setDT(df)[, lapply(.SD, function(x) sum(x) != 0), by = ID, .SDcols = COLS] %>% 
  .[, list(ID, non_zero = Reduce(`+`, .SD)), .SDcols=COLS] %>% 
  .[]

or

setDT(df)[, lapply(.SD, function(x) sum(x) != 0), by = ID, .SDcols = COLS] %>% 
  .[, list(ID, non_zero = rowSums(.SD)), .SDcols=COLS] %>% 
  .[]
       ID non_zero
1: ENS777        3
2: ENS999        0
3: ENS888        4

Upvotes: 1

Related Questions