Reputation: 463
I'm trying to create a factor column based on numeric values from another column. Here's a subset of my data:
> dput(sample)
structure(list(ID = c(1683L, 1684L, 1684L, 1684L, 1684L, 1685L,
1685L, 1685L, 1685L, 1686L, 1686L, 1686L, 1686L, 30759L, 30759L,
30759L, 30759L, 30760L, 30760L, 30760L, 30760L), Month = structure(c(2L,
2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L,
2L, 3L, 1L, 2L), .Label = c("Jun", "Jul", "Aug"), class = "factor"),
Year = c(2018, 2017, 2017, 2018, 2018, 2017, 2017, 2018,
2018, 2017, 2017, 2018, 2018, 2017, 2017, 2018, 2018, 2017,
2017, 2018, 2018), Homerange = c(NA, 27.2850594918174, NA,
NA, NA, NA, 30.52684873837, NA, NA, NA, 30.7069481409563,
10.625864752589, 29.2661529202662, 32.3278427642325, NA,
NA, NA, NA, 33.8586876862157, NA, NA)), out.attrs = list(
dim = c(58L, 4L, 2L), dimnames = list(Var1 = c("Var1= 1657",
"Var1= 1658", "Var1= 1659", "Var1= 1660", "Var1= 1661", "Var1= 1662",
"Var1= 1663", "Var1= 1664", "Var1= 1666", "Var1= 1667", "Var1= 1668",
"Var1= 1669", "Var1= 1670", "Var1= 1671", "Var1= 1672", "Var1= 1673",
"Var1= 1674", "Var1= 1675", "Var1= 1676", "Var1= 1678", "Var1= 1679",
"Var1= 1680", "Var1= 1681", "Var1= 1682", "Var1= 1683", "Var1= 1684",
"Var1= 1685", "Var1= 1686", "Var1=30759", "Var1=30760", "Var1=30761",
"Var1=30762", "Var1=30763", "Var1=30764", "Var1=30765", "Var1=30766",
"Var1=30767", "Var1=30768", "Var1=30769", "Var1=30770", "Var1=30771",
"Var1=30772", "Var1=30773", "Var1=30774", "Var1=30775", "Var1=30776",
"Var1=30777", "Var1=30778", "Var1=30779", "Var1=30780", "Var1=30781",
"Var1=30782", "Var1=30783", "Var1=30784", "Var1=30785", "Var1=30786",
"Var1=30787", "Var1=30788"), Var2 = c("Var2=Jun", "Var2=Jul",
"Var2=Aug", "Var2=Sep"), Var3 = c("Var3=2017", "Var3=2018"
))), row.names = c(315L, 84L, 142L, 258L, 316L, 85L, 143L,
259L, 317L, 86L, 144L, 260L, 318L, 87L, 145L, 261L, 319L, 88L,
146L, 262L, 320L), class = "data.frame")
The numeric column "ID" has values from 1659-1685 and 30759-30788. What I would like to do is create a factor column "Type" with 2 levels "V13" which corresponds to IDs 1659-1685, and "V16" which corresponds to IDs 30759-30788. I know I've done this before but for some reason I can't remember how. Thanks for the help!
Upvotes: 1
Views: 78
Reputation: 73212
Straight base R solution would be to apply ifelse
.
sample <- transform(sample,
Type=factor(ifelse(ID %in% 1659:1685, "V13",
ifelse(ID %in% 30759:30788, "V16",
NA))))
Or slightly more efficient with cut
(credits to @camille):
transform(sample, Type2=cut(sample$ID, c(1659, 1685, 1686, 30788), include.lowest=TRUE,
labels=c("V13", NA, "V16")))
or with data.table::inrange
library(data.table)
sample <- transform(sample,
Type=factor(ifelse(ID %inrange% c(1659, 1685), "V13",
ifelse(ID %inrange% c(30759, 30788), "V16",
NA))))
str(sample)
# 'data.frame': 21 obs. of 5 variables:
# $ ID : int 1683 1684 1684 1684 1684 1685 1685 1685 1685 1686 ...
# $ Month : Factor w/ 3 levels "Jun","Jul","Aug": 2 2 3 1 2 2 3 1 2 2 ...
# $ Year : num 2018 2017 2017 2018 2018 ...
# $ Homerange: num NA 27.3 NA NA NA ...
# $ Type : Factor w/ 2 levels "V13","V16": 1 1 1 1 1 1 1 1 1 NA ...
Upvotes: 3
Reputation: 940
Assuming that it is on purpose that ID 1686 it not considered in your ranges you can try this:
library(dplyr)
library(forcats)
df %>%
mutate(type = case_when(between(ID, 1659, 1685) ~ "V13",
between(ID, 30759, 30788) ~ "V16")) %>%
mutate(type = as_factor(type))
# A tibble: 21 x 5
ID Month Year Homerange type
<int> <fct> <dbl> <dbl> <fct>
1 1683 Jul 2018 NA V13
2 1684 Jul 2017 27.3 V13
3 1684 Aug 2017 NA V13
4 1684 Jun 2018 NA V13
5 1684 Jul 2018 NA V13
6 1685 Jul 2017 NA V13
7 1685 Aug 2017 30.5 V13
8 1685 Jun 2018 NA V13
9 1685 Jul 2018 NA V13
10 1686 Jul 2017 NA NA
11 1686 Aug 2017 30.7 NA
12 1686 Jun 2018 10.6 NA
13 1686 Jul 2018 29.3 NA
14 30759 Jul 2017 32.3 V16
15 30759 Aug 2017 NA V16
16 30759 Jun 2018 NA V16
17 30759 Jul 2018 NA V16
18 30760 Jul 2017 NA V16
19 30760 Aug 2017 33.9 V16
20 30760 Jun 2018 NA V16
21 30760 Jul 2018 NA V16
Upvotes: 3