Monika Grigorova
Monika Grigorova

Reputation: 143

Counting conditionally for each row of a data frame

I have a data frame with 97 variables. For variables 32 to 97, I need to count the number of instances of the values 1, 2, 3 and 4 appearing in each row (only for variables from 32 to 97).

Here is the structure of the data frame:

 structure(list(subject_label = c("01", "01"), 
 event_code = c("b", "16"), site_id = c(5, 5), site_label = c("a", 
 "a"), event_label = c("Baseline", 
 NA), done = c(1L, NA), ndreason = c(NA_integer_, NA_integer_
 ), rater = c("OLD", NA), carerid = c(1L, NA), ntgph = c(4L, 
 NA), nt1ph = c(2L, NA), ntgmh = c(2L, NA), ntgse = c(0L, 
 NA), ntgxv = c(1L, NA), ntgxb = c(0L, NA), ntgxg = c(0L, 
 NA), ntgxh = c(0L, NA), ntgxd = c(0L, NA), ntgxa = c(0L, 
 NA), ntgxm = c(0L, NA), ntgxw = c(0L, NA), ntgxp = c(0L, 
 NA), ntgcd = c(0L, NA), ntgch = c(0L, NA), ntgcs = c(0L, 
 NA), ntgnr = c(0L, NA), ntgii = c(0L, NA), ntsem = c(0L, 
 NA), ntsei = c(0L, NA), ntsev = c(0L, NA), ntsey = c(NA_integer_, 
 NA_integer_), ntseo = c("Interpersonal conflict with co-workers", 
 NA), ntawb = c(0L, NA), ntahd = c(0L, NA), ntaid = c(0L, 
 NA), ntaui = c(0L, NA), ntahe = c(0L, NA), ntaub = c(0L, 
 NA), ntadi = c(1L, NA), ntatb = c(NA_integer_, NA_integer_
 ), ntcic = c(0L, NA), ntcfw = c(0L, NA), ntcfi = c(0L, NA
 ), ntclc = c(0L, NA), ntcnr = c(0L, NA), ntcnw = c(0L, NA
 ), ntses = c(0L, NA), ntsia = c(0L, NA), ntswf = c(1L, NA
 ), ntscn = c(0L, NA), ntssd = c(0L, NA), ntswn = c(0L, NA
 ), ntswe = c(0L, NA), ntssl = c(0L, NA), ntacw = c(0L, NA
 ), ntalb = c(0L, NA), ntacs = c(NA_integer_, NA_integer_), 
 ntafa = c(0L, NA), ntaaw = c(0L, NA), ntmrp = c(0L, NA), 
 ntmdr = c(0L, NA), ntmre = c(0L, NA), ntmfw = c(0L, NA), 
 ntmlt = c(4L, NA), ntmmo = c(0L, NA), ntmwp = c(0L, NA), 
 ntmpp = c(0L, NA), ntmpl = c(0L, NA), ntbaw = c(0L, NA), 
 ntbws = c(1L, NA), ntbwp = c(4L, NA), ntbla = c(0L, NA), 
 ntbow = c(0L, NA), ntbor = c(4L, NA), ntbhh = c(4L, NA), 
 ntbfo = c(0L, NA), ntbii = c(4L, NA), ntblc = c(0L, NA), 
 ntbaa = c(0L, NA), ntbad = c(0L, NA), ntbva = c(0L, NA), 
 ntbpa = c(0L, NA), ntbtt = c(0L, NA), ntbll = c(0L, NA), 
 ntbts = c(1L, NA), ntrca = c(0L, NA), ntrht = c(0L, NA), 
 ntrst = c(0L, NA), ntrct = c(0L, NA), ntrci = c(1L, NA), 
 ntrcm = c(0L, NA), ntoog = c(0L, NA), ntoop = c(0L, NA), 
 ntoof = c(0L, NA), ntooa = c(0L, NA), ntoow = c(0L, NA), 
 ntoov = c(0L, NA), ntgsp = c("a", 
 NA), update_stamp = c("a", NA), class = "data.frame")

What I was trying to do is this:

 NTG$symptoms_morethanyear  <- length(which(NTG[,c(32:97)] == 1))

However this seems to sum up everything in these fields and put the whole sum in the final column (does not do it individually per row).

Upvotes: 0

Views: 41

Answers (1)

Kay
Kay

Reputation: 2332

This is a solution if you don't mind modifying columns 37 to 97

library(tidyverse)


data_frame%>%
   mutate_at(vars(37:97), list(~if_else(. %in% 1:4, 1, 0)))%>%
   mutate(conts = rowSums(select(., 37:97)))%>%select(97:101)

  ntoov ntgsp update_stamp   conts
1     0     a            a     10
2     0  <NA>         <NA>      0

You could also do this if you do not want to modify the columns 32 through 97:

library(data.table)

data_frame$counts <- rowSums(data_frame[,32:97] %between% c(1,4), na.rm=T)

data_frame%>%select(90:101)

  ntrci ntrcm ntoog ntoop ntoof ntooa ntoow ntoov ntgsp update_stamp   counts
1     1     0     0     0     0     0     0     0     a            a     10
2    NA    NA    NA    NA    NA    NA    NA    NA  <NA>         <NA>     0

Upvotes: 1

Related Questions