Reputation: 79
I have a data frame :
str(HSCtc_all_maxcols)
'data.frame': 12308 obs. of 11 variables:
$ AGI : Factor w/ 27747 levels "AT1G01010","AT1G01020",..: 1 6 7 8 12 14 17 18 19 20 ...
$ k2.5m : num 0 0 0 0 0 0 0 0 0 0 ...
$ k5m : num 0 -0.38 0 0 0 0 0 0 0 0 ...
$ k10m : num 0 -0.6 0 0 0 -0.27 0 0 0 0 ...
$ k15m : num 0 -0.6 0 0 0 0 0 0 0 0 ...
$ k20m : num 0 0.33 0 -0.23 0 0.85 0 0.46 0 0 ...
$ k30m : num 0 0 -0.69 0 0.49 1 0 1.03 0 -1.17 ...
$ k1h : num 1.12 0 -1.46 -0.42 0.88 1.15 0 1.62 0 -1.3 ...
$ k3h : num 1.92 -0.61 -0.9 -1.55 1.67 -0.57 -0.6 1.08 -0.91 0 ...
$ k30h : num 0.62 0 0 0 0.26 0.2 -0.32 0 1.13 0 ...
$ maxcol: chr "k9h" "k9h" "k9h" "k2.5m" ...
head(HSCtc_all_maxcols)
AGI k2.5m k5m k10m k15m k20m k30m k1h k3h k30h maxcol
1 AT1G01010 0 0.00 0.00 0.0 0.00 0.00 1.12 1.92 0.62 k9h
2 AT1G01060 0 -0.38 -0.60 -0.6 0.33 0.00 0.00 -0.61 0.00 k9h
3 AT1G01070 0 0.00 0.00 0.0 0.00 -0.69 -1.46 -0.90 0.00 k9h
4 AT1G01080 0 0.00 0.00 0.0 -0.23 0.00 -0.42 -1.55 0.00 k2.5m
5 AT1G01120 0 0.00 0.00 0.0 0.00 0.49 0.88 1.67 0.26 k3h
6 AT1G01140 0 0.00 -0.27 0.0 0.85 1.00 1.15 -0.57 0.20 k1h
For each row in this data frame, I would like to identify the column name which has the first non-zero value (going from left to right). For example, in the above, for row 1 the column name with the first non-zero value is k1h. For row 2 it is k5m.
I have already written code to identify for each row the name of column with the highest value and this appears as the last value for each row in the column named maxcol. The code to do this was:
HSCtc_all_maxcols <- HSCtc_all %>% mutate(maxcol = names(HSCtc_all[2:18])[apply(HSCtc_all[,2:18],1,which.max)])
I would like to add another column to the data frame that identifies for each row the name of column with the first non-zero value.
For example:
head(HSCtc_all_maxcols)
AGI k2.5m k5m k10m k15m k20m k30m k1h k3h k30h maxcol fcol
1 AT1G01010 0 0.00 0.00 0.0 0.00 0.00 1.12 1.92 0.62 k9h k1h
2 AT1G01060 0 -0.38 -0.60 -0.6 0.33 0.00 0.00 -0.61 0.00 k9h k5m
3 AT1G01070 0 0.00 0.00 0.0 0.00 -0.69 -1.46 -0.90 0.00 k9h k30m
4 AT1G01080 0 0.00 0.00 0.0 -0.23 0.00 -0.42 -1.55 0.00 k2.5m k20m
5 AT1G01120 0 0.00 0.00 0.0 0.00 0.49 0.88 1.67 0.26 k3h k30m
6 AT1G01140 0 0.00 -0.27 0.0 0.85 1.00 1.15 -0.57 0.20 k1h k10m
Upvotes: 0
Views: 1008
Reputation: 389125
We could use apply
row-wise with which.max
to get index of first non-zero value in row and then use that index to subset the names
names(df[-1])[apply(df[-1] != 0, 1, which.max)]
#[1] "k1h" "k5m" "k30m" "k20m" "k30m" "k10m"
data
df <- structure(list(AGI = structure(1:6, .Label = c("AT1G01010", "AT1G01060",
"AT1G01070", "AT1G01080", "AT1G01120", "AT1G01140"), class = "factor"),
k2.5m = c(0L, 0L, 0L, 0L, 0L, 0L), k5m = c(0, -0.38, 0, 0,
0, 0), k10m = c(0, -0.6, 0, 0, 0, -0.27), k15m = c(0, -0.6,
0, 0, 0, 0), k20m = c(0, 0.33, 0, -0.23, 0, 0.85), k30m = c(0,
0, -0.69, 0, 0.49, 1), k1h = c(1.12, 0, -1.46, -0.42, 0.88,
1.15), k3h = c(1.92, -0.61, -0.9, -1.55, 1.67, -0.57), k30h = c(0.62,
0, 0, 0, 0.26, 0.2), maxcol = structure(c(4L, 4L, 4L, 2L,
3L, 1L), .Label = c("k1h", "k2.5m", "k3h", "k9h"), class = "factor")),
class = "data.frame", row.names = c("1", "2", "3", "4", "5", "6"))
Upvotes: 1
Reputation: 11150
Here's a way in base R using max.col()
-
names(HSCtc_all[2:18])[max.col(HSCtc_all[2:18] != 0, ties.method = "first")]
Upvotes: 2