Reputation: 65
I have a data set which the values of "age" has different units (days, months, year). I want to convert the rows which their values are based on days and months to year. How I can do it in R? If there is no letter after the number, then the unit is years. If there is a ‘D’ after the number, then the unit is days (e.g. 10D means 10 days) If there is an ‘M’ after the number, then the unit is months (e.g. 5M means 5 months).
Age <- c("33","32","44","54M","67M","34D","33D","44","77","88M","49 D","55D","11M")
ID <- c(1,2,3,4,5,6,7,8,9,10,11,12,13)
Data <- data.frame(ID,Age)
> Data
ID Age
1 1 33
2 2 32
3 3 44
4 4 54M
5 5 67M
6 6 34D
7 7 33D
8 8 44
9 9 77
10 10 88M
11 11 49 D
12 12 55D
13 13 11M
Upvotes: 2
Views: 56
Reputation: 4929
Additionally, a further solution using data.table
:
> library(data.table)
> dt <- data.table(ID, Age)
> dt[, Unit := ifelse(grepl("D$", Age), "D", ifelse(grepl("M$", Age), "M", "Y"))][
, Age := as.integer(gsub("M|D", "", Age))]
> dt[, Age_in_years := ifelse(Unit == "Y", Age,
ifelse(Unit == "M", Age / 12, Age / 365.25))][]
ID Age Unit Age_in_years
1: 1 33 Y 33.00000000
2: 2 32 Y 32.00000000
3: 3 44 Y 44.00000000
4: 4 54 M 4.50000000
5: 5 67 M 5.58333333
6: 6 34 D 0.09308693
7: 7 33 D 0.09034908
8: 8 44 Y 44.00000000
9: 9 77 Y 77.00000000
10: 10 88 M 7.33333333
11: 11 49 D 0.13415469
12: 12 55 D 0.15058179
13: 13 11 M 0.91666667
Upvotes: 0
Reputation: 11480
#baseR
Age <-c("33","32","44","54M","67M","34D","33D","44","77","88M","49 D","55D","11M")
AgeNum<- as.numeric(sub("\\s*\\D$","",Age))
Age[grepl("M$",Age)] <- AgeNum[grepl("M$",Age)]/12
Age[grepl("D$",Age)] <- AgeNum[grepl("D$",Age)]/365
Age <- as.numeric(Age)
result:
> Age
[1] 33.00000000 32.00000000 44.00000000 4.50000000 5.58333333 0.09315068 0.09041096 44.00000000
[9] 77.00000000 7.33333333 0.13424658 0.15068493 0.91666667
>
Upvotes: 1
Reputation: 173577
And here's another option using tidyverse tools:
library(dplyr)
library(stringr)
Data %>%
mutate(Unit = str_extract(string = Age,pattern = "[DM]"),
Unit = if_else(is.na(Unit),'Y',Unit),
Age = as.numeric(gsub(pattern = "[MD]","",Age))) %>%
mutate(AgeYears = Age / c('Y' = 1,'M' = 12,'D' = 365)[Unit])
ID Age Unit AgeYears
1 1 33 Y 33.00000000
2 2 32 Y 32.00000000
3 3 44 Y 44.00000000
4 4 54 M 4.50000000
5 5 67 M 5.58333333
6 6 34 D 0.09315068
7 7 33 D 0.09041096
8 8 44 Y 44.00000000
9 9 77 Y 77.00000000
10 10 88 M 7.33333333
11 11 49 D 0.13424658
12 12 55 D 0.15068493
13 13 11 M 0.91666667
Upvotes: 4
Reputation: 145835
Here's a quick way in base R:
Data$units = ifelse(grepl("M", Data$Age), "month", ifelse(grepl("D", Data$Age), "day", "year"))
Data$value = as.numeric(gsub(pattern = "[^0-9]", replacement = "", Data$Age))
Data$result = with(Data,
ifelse(units == "year", value,
ifelse(units == "month", value / 12, value / 365.25)))
Data
# ID Age units value result
# 1 1 33 year 33 33.00000000
# 2 2 32 year 32 32.00000000
# 3 3 44 year 44 44.00000000
# 4 4 54M month 54 4.50000000
# 5 5 67M month 67 5.58333333
# 6 6 34D day 34 0.09308693
# 7 7 33D day 33 0.09034908
# 8 8 44 year 44 44.00000000
# 9 9 77 year 77 77.00000000
# 10 10 88M month 88 7.33333333
# 11 11 49 D day 49 0.13415469
# 12 12 55D day 55 0.15058179
# 13 13 11M month 11 0.91666667
Upvotes: 5