Reputation: 143
I have a data frame with 97 variables. For variables 32 to 97, I need to count the number of instances of the values 1, 2, 3 and 4 appearing in each row (only for variables from 32 to 97).
Here is the structure of the data frame:
structure(list(subject_label = c("01", "01"),
event_code = c("b", "16"), site_id = c(5, 5), site_label = c("a",
"a"), event_label = c("Baseline",
NA), done = c(1L, NA), ndreason = c(NA_integer_, NA_integer_
), rater = c("OLD", NA), carerid = c(1L, NA), ntgph = c(4L,
NA), nt1ph = c(2L, NA), ntgmh = c(2L, NA), ntgse = c(0L,
NA), ntgxv = c(1L, NA), ntgxb = c(0L, NA), ntgxg = c(0L,
NA), ntgxh = c(0L, NA), ntgxd = c(0L, NA), ntgxa = c(0L,
NA), ntgxm = c(0L, NA), ntgxw = c(0L, NA), ntgxp = c(0L,
NA), ntgcd = c(0L, NA), ntgch = c(0L, NA), ntgcs = c(0L,
NA), ntgnr = c(0L, NA), ntgii = c(0L, NA), ntsem = c(0L,
NA), ntsei = c(0L, NA), ntsev = c(0L, NA), ntsey = c(NA_integer_,
NA_integer_), ntseo = c("Interpersonal conflict with co-workers",
NA), ntawb = c(0L, NA), ntahd = c(0L, NA), ntaid = c(0L,
NA), ntaui = c(0L, NA), ntahe = c(0L, NA), ntaub = c(0L,
NA), ntadi = c(1L, NA), ntatb = c(NA_integer_, NA_integer_
), ntcic = c(0L, NA), ntcfw = c(0L, NA), ntcfi = c(0L, NA
), ntclc = c(0L, NA), ntcnr = c(0L, NA), ntcnw = c(0L, NA
), ntses = c(0L, NA), ntsia = c(0L, NA), ntswf = c(1L, NA
), ntscn = c(0L, NA), ntssd = c(0L, NA), ntswn = c(0L, NA
), ntswe = c(0L, NA), ntssl = c(0L, NA), ntacw = c(0L, NA
), ntalb = c(0L, NA), ntacs = c(NA_integer_, NA_integer_),
ntafa = c(0L, NA), ntaaw = c(0L, NA), ntmrp = c(0L, NA),
ntmdr = c(0L, NA), ntmre = c(0L, NA), ntmfw = c(0L, NA),
ntmlt = c(4L, NA), ntmmo = c(0L, NA), ntmwp = c(0L, NA),
ntmpp = c(0L, NA), ntmpl = c(0L, NA), ntbaw = c(0L, NA),
ntbws = c(1L, NA), ntbwp = c(4L, NA), ntbla = c(0L, NA),
ntbow = c(0L, NA), ntbor = c(4L, NA), ntbhh = c(4L, NA),
ntbfo = c(0L, NA), ntbii = c(4L, NA), ntblc = c(0L, NA),
ntbaa = c(0L, NA), ntbad = c(0L, NA), ntbva = c(0L, NA),
ntbpa = c(0L, NA), ntbtt = c(0L, NA), ntbll = c(0L, NA),
ntbts = c(1L, NA), ntrca = c(0L, NA), ntrht = c(0L, NA),
ntrst = c(0L, NA), ntrct = c(0L, NA), ntrci = c(1L, NA),
ntrcm = c(0L, NA), ntoog = c(0L, NA), ntoop = c(0L, NA),
ntoof = c(0L, NA), ntooa = c(0L, NA), ntoow = c(0L, NA),
ntoov = c(0L, NA), ntgsp = c("a",
NA), update_stamp = c("a", NA), class = "data.frame")
What I was trying to do is this:
NTG$symptoms_morethanyear <- length(which(NTG[,c(32:97)] == 1))
However this seems to sum up everything in these fields and put the whole sum in the final column (does not do it individually per row).
Upvotes: 0
Views: 41
Reputation: 2332
This is a solution if you don't mind modifying columns 37 to 97
library(tidyverse)
data_frame%>%
mutate_at(vars(37:97), list(~if_else(. %in% 1:4, 1, 0)))%>%
mutate(conts = rowSums(select(., 37:97)))%>%select(97:101)
ntoov ntgsp update_stamp conts
1 0 a a 10
2 0 <NA> <NA> 0
You could also do this if you do not want to modify the columns 32 through 97:
library(data.table)
data_frame$counts <- rowSums(data_frame[,32:97] %between% c(1,4), na.rm=T)
data_frame%>%select(90:101)
ntrci ntrcm ntoog ntoop ntoof ntooa ntoow ntoov ntgsp update_stamp counts
1 1 0 0 0 0 0 0 0 a a 10
2 NA NA NA NA NA NA NA NA <NA> <NA> 0
Upvotes: 1