barnsm2
barnsm2

Reputation: 195

How to normalize data for one column based on population

I have the dataset below:

##      ID        DOB sector meters Oct   Res_FROM     Res_TO   Exp_FROM
## 1 20100 1979-08-24    H38   6400   W 1979-08-15 1991-05-15 1979-08-24
## 2 20101 1980-05-05    B01   1600  NW 1980-05-15 1991-04-15 1980-05-15
## 3 20102 1979-03-17    H04   1600  SW 1972-06-15 1979-08-15 1979-03-17
## 4 20103 1981-11-30    B09   3200  NE 1982-01-15 1984-01-15 1982-01-15
## 5 20103 1981-11-30    B37   8000   N 1984-01-15 1986-04-15 1984-01-15
## 6 20104 1978-09-01    B09   3200  NE 1982-01-15 1984-01-15 1982-01-15
##       Exp_TO Exps_Grp Yr1952 Yr1953 Yr1954 Yr1955 Yr1956 Yr1957 Yr1958 Yr1959
## 1 1988-12-31  fr51>88     NA     NA     NA     NA     NA     NA     NA     NA
## 2 1988-12-31  fr51>88     NA     NA     NA     NA     NA     NA     NA     NA
## 3 1979-08-15  between     NA     NA     NA     NA     NA     NA     NA     NA
## 4 1984-01-15  between     NA     NA     NA     NA     NA     NA     NA     NA
## 5 1986-04-15  between     NA     NA     NA     NA     NA     NA     NA     NA
## 6 1984-01-15  between     NA     NA     NA     NA     NA     NA     NA     NA
##   Yr1960 Yr1961 Yr1962 Yr1963 Yr1964 Yr1965 Yr1966 Yr1967 Yr1968 Yr1969 Yr1970
## 1     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 2     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 3     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 4     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 5     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 6     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
##   Yr1971 Yr1972 Yr1973 Yr1974 Yr1975 Yr1976 Yr1977 Yr1978    Yr1979   Yr1980
## 1     NA     NA     NA     NA     NA     NA     NA     NA  5.950991 4.340588
## 2     NA     NA     NA     NA     NA     NA     NA     NA        NA 2.927725
## 3     NA     NA     NA     NA     NA     NA     NA     NA 20.608986       NA
## 4     NA     NA     NA     NA     NA     NA     NA     NA        NA       NA
## 5     NA     NA     NA     NA     NA     NA     NA     NA        NA       NA
## 6     NA     NA     NA     NA     NA     NA     NA     NA        NA       NA
##     Yr1981    Yr1982    Yr1983    Yr1984   Yr1985    Yr1986   Yr1987   Yr1988
## 1 4.340588  4.340588  4.340588 4.3405881 4.340588 4.3405881 4.340588 1.083782
## 2 4.447229  4.447229  4.447229 4.4472289 4.447229 4.4472289 4.447229 1.110409
## 3       NA        NA        NA        NA       NA        NA       NA       NA
## 4       NA 15.365412 16.018407 0.6529943       NA        NA       NA       NA
## 5       NA        NA        NA 2.9414202 3.052618 0.6918076       NA       NA
## 6       NA 15.365412 16.018407 0.6529943       NA        NA       NA       NA
##     Yrs_Exp arth_mean    median cumulative  caldate Age Month_Res
## 1 9.3616438  4.175948  4.340588  41.759478 12/31/88   9       141
## 2 8.6356164  3.907637  4.447229  35.168736 12/31/88   9       131
## 3 0.4136986 20.608986 20.608986  20.608986 12/31/88   9        86
## 4 2.0000000 10.678938 15.365412  32.036813 12/31/88   9        24
## 5 2.2493151  2.228615  2.941420   6.685846 12/31/88   8        27
## 6 2.0000000 10.678938 15.365412  32.036813 12/31/88   9        24

I am needing to normalize the cumulative exposures for each individual (ID) by the population of my dataset and by the residence time of each person. How would I do this in R? I have provided a small data set, but in reality I have about 14,000 rows. I've included code for the data as well.

dat <- structure(list(ID = c(20100L, 20101L, 20102L, 20103L, 20103L,
20104L, 20104L, 20105L, 20105L, 20106L, 20106L), DOB = c("1979-08-24",
"1980-05-05", "1979-03-17", "1981-11-30", "1981-11-30", "1978-09-01",
"1978-09-01", "1980-12-03", "1980-12-03", "1978-04-25", "1978-04-25"
), sector = c("H38", "B01", "H04", "B09", "B37", "B09", "B37",
"B09", "B09", "B09", "B09"), meters = c(6400L, 1600L, 1600L,
3200L, 8000L, 3200L, 8000L, 3200L, 3200L, 3200L, 3200L), Oct = c("W",
"NW", "SW", "NE", "N", "NE", "N", "NE", "NE", "NE", "NE"), Res_FROM = c("1979-08-15",
"1980-05-15", "1972-06-15", "1982-01-15", "1984-01-15", "1982-01-15",
"1984-01-15", "1980-12-15", "1983-08-15", "1978-04-15", "1983-08-15"
), Res_TO = c("1991-05-15", "1991-04-15", "1979-08-15", "1984-01-15",
"1986-04-15", "1984-01-15", "1986-04-15", "1983-08-15", "1991-03-15",
"1983-08-15", "2000-01-15"), Exp_FROM = c("1979-08-24", "1980-05-15",
"1979-03-17", "1982-01-15", "1984-01-15", "1982-01-15", "1984-01-15",
"1980-12-15", "1983-08-15", "1978-04-25", "1983-08-15"), Exp_TO = c("1988-12-31",
"1988-12-31", "1979-08-15", "1984-01-15", "1986-04-15", "1984-01-15",
"1986-04-15", "1983-08-15", "1988-12-31", "1983-08-15", "1988-12-31"
), Exps_Grp = c("fr51>88", "fr51>88", "between", "between", "between",
"between", "between", "between", "fr51>88", "between", "fr51>88"
), Yr1952 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1953 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1954 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), Yr1955 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), Yr1956 = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), Yr1957 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA), Yr1958 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), Yr1959 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1960 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1961 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), Yr1962 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), Yr1963 = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), Yr1964 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA), Yr1965 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), Yr1966 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1967 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1968 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), Yr1969 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), Yr1970 = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), Yr1971 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA), Yr1972 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), Yr1973 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1974 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1975 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), Yr1976 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), Yr1977 = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), Yr1978 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
79.39642441, NA), Yr1979 = c(5.950991161, NA, 20.60898553, NA,
NA, NA, NA, NA, NA, 59.94484924, NA), Yr1980 = c(4.340588078,
2.927724588, NA, NA, NA, NA, NA, 0.758267013, NA, 16.01840668,
NA), Yr1981 = c(4.340588078, 4.447228937, NA, NA, NA, NA, NA,
16.01840668, NA, 16.01840668, NA), Yr1982 = c(4.340588078, 4.447228937,
NA, 15.36541238, NA, 15.36541238, NA, 16.01840668, NA, 16.01840668,
NA), Yr1983 = c(4.340588078, 4.447228937, NA, 16.01840668, NA,
16.01840668, NA, 9.952203009, 6.066203667, 9.952203009, 6.066203667
), Yr1984 = c(4.340588078, 4.447228937, NA, 0.652994292, 2.941420153,
0.652994292, 2.941420153, NA, 16.01840668, NA, 16.01840668),
    Yr1985 = c(4.340588078, 4.447228937, NA, NA, 3.052618478,
    NA, 3.052618478, NA, 16.01840668, NA, 16.01840668), Yr1986 = c(4.340588078,
    4.447228937, NA, NA, 0.691807598, NA, 0.691807598, NA, 16.01840668,
    NA, 16.01840668), Yr1987 = c(4.340588078, 4.447228937, NA,
    NA, NA, NA, NA, NA, 16.01840668, NA, 16.01840668), Yr1988 = c(1.083782142,
    1.110408824, NA, NA, NA, NA, NA, NA, 3.999564755, NA, 3.999564755
    ), Yrs_Exp = c(9.361643836, 8.635616438, 0.41369863, 2, 2.249315068,
    2, 2.249315068, 2.665753425, 5.383561644, 5.309589041, 5.383561644
    ), arth_mean = c(4.175947792, 3.907637331, 20.60898553, 10.67893778,
    2.22861541, 10.67893778, 2.22861541, 10.68682084, 12.35656585,
    32.89144945, 12.35656585), median = c(4.340588078, 4.447228937,
    20.60898553, 15.36541238, 2.941420153, 15.36541238, 2.941420153,
    12.98530484, 16.01840668, 16.01840668, 16.01840668), cumulative = c(41.75947792,
    35.16873597, 20.60898553, 32.03681335, 6.685846229, 32.03681335,
    6.685846229, 42.74728337, 74.13939513, 197.3486967, 74.13939513
    ), caldate = c("12/31/88", "12/31/88", "12/31/88", "12/31/88",
    "12/31/88", "12/31/88", "12/31/88", "12/31/88", "12/31/88",
    "12/31/88", "12/31/88"), Age = c(9L, 9L, 9L, 9L, 8L, 9L,
    7L, 7L, 10L, 10L, 8L), Month_Res = c(141L, 131L, 86L, 24L,
    27L, 24L, 27L, 32L, 91L, 64L, 197L)), class = "data.frame", row.names = c(NA,
-11L))

Upvotes: 1

Views: 143

Answers (0)

Related Questions