stat
stat

Reputation: 65

Having a subset of a data set based on a specific condition

I have a data set which the values of "age" has different units (days, months, year). I want to convert the rows which their values are based on days and months to year. How I can do it in R? If there is no letter after the number, then the unit is years. If there is a ‘D’ after the number, then the unit is days (e.g. 10D means 10 days) If there is an ‘M’ after the number, then the unit is months (e.g. 5M means 5 months).

Age <- c("33","32","44","54M","67M","34D","33D","44","77","88M","49 D","55D","11M")
ID <- c(1,2,3,4,5,6,7,8,9,10,11,12,13)
Data <- data.frame(ID,Age)

> Data
   ID  Age
1   1   33
2   2   32
3   3   44
4   4  54M
5   5  67M
6   6  34D
7   7  33D
8   8   44
9   9   77
10 10  88M
11 11 49 D
12 12  55D
13 13  11M

Upvotes: 2

Views: 56

Answers (4)

Additionally, a further solution using data.table:

> library(data.table)
> dt <- data.table(ID, Age)

> dt[, Unit := ifelse(grepl("D$", Age), "D", ifelse(grepl("M$", Age), "M", "Y"))][
, Age := as.integer(gsub("M|D", "", Age))]

> dt[, Age_in_years := ifelse(Unit == "Y", Age, 
ifelse(Unit == "M", Age / 12, Age / 365.25))][]

    ID Age Unit Age_in_years
 1:  1  33    Y  33.00000000
 2:  2  32    Y  32.00000000
 3:  3  44    Y  44.00000000
 4:  4  54    M   4.50000000
 5:  5  67    M   5.58333333
 6:  6  34    D   0.09308693
 7:  7  33    D   0.09034908
 8:  8  44    Y  44.00000000
 9:  9  77    Y  77.00000000
10: 10  88    M   7.33333333
11: 11  49    D   0.13415469
12: 12  55    D   0.15058179
13: 13  11    M   0.91666667

Upvotes: 0

Andre Elrico
Andre Elrico

Reputation: 11480

#baseR

Age   <-c("33","32","44","54M","67M","34D","33D","44","77","88M","49 D","55D","11M")
AgeNum<- as.numeric(sub("\\s*\\D$","",Age))

Age[grepl("M$",Age)] <- AgeNum[grepl("M$",Age)]/12
Age[grepl("D$",Age)] <- AgeNum[grepl("D$",Age)]/365

Age <- as.numeric(Age)

result:

> Age
 [1] 33.00000000 32.00000000 44.00000000  4.50000000  5.58333333  0.09315068  0.09041096 44.00000000
 [9] 77.00000000  7.33333333  0.13424658  0.15068493  0.91666667
> 

Upvotes: 1

joran
joran

Reputation: 173577

And here's another option using tidyverse tools:

library(dplyr)
library(stringr)

Data %>% 
    mutate(Unit = str_extract(string = Age,pattern = "[DM]"),
                 Unit = if_else(is.na(Unit),'Y',Unit),
                 Age = as.numeric(gsub(pattern = "[MD]","",Age))) %>%
    mutate(AgeYears = Age / c('Y' = 1,'M' = 12,'D' = 365)[Unit])

   ID Age Unit    AgeYears
1   1  33    Y 33.00000000
2   2  32    Y 32.00000000
3   3  44    Y 44.00000000
4   4  54    M  4.50000000
5   5  67    M  5.58333333
6   6  34    D  0.09315068
7   7  33    D  0.09041096
8   8  44    Y 44.00000000
9   9  77    Y 77.00000000
10 10  88    M  7.33333333
11 11  49    D  0.13424658
12 12  55    D  0.15068493
13 13  11    M  0.91666667

Upvotes: 4

Gregor Thomas
Gregor Thomas

Reputation: 145835

Here's a quick way in base R:

Data$units = ifelse(grepl("M", Data$Age), "month", ifelse(grepl("D", Data$Age), "day", "year"))
Data$value = as.numeric(gsub(pattern = "[^0-9]", replacement = "", Data$Age))
Data$result = with(Data,
    ifelse(units == "year", value, 
        ifelse(units == "month", value / 12, value / 365.25)))
Data
#    ID  Age units value      result
# 1   1   33  year    33 33.00000000
# 2   2   32  year    32 32.00000000
# 3   3   44  year    44 44.00000000
# 4   4  54M month    54  4.50000000
# 5   5  67M month    67  5.58333333
# 6   6  34D   day    34  0.09308693
# 7   7  33D   day    33  0.09034908
# 8   8   44  year    44 44.00000000
# 9   9   77  year    77 77.00000000
# 10 10  88M month    88  7.33333333
# 11 11 49 D   day    49  0.13415469
# 12 12  55D   day    55  0.15058179
# 13 13  11M month    11  0.91666667

Upvotes: 5

Related Questions