Reputation: 1
I am trying to generate modelled smoking prevalence based on current demographic, mortality and numbers of people being referred to and treated in quit support services in Wales.
To do this I want to manipulate data across rows and columns and generate a loop starting with a baseline population of smokers (17.2% of the adult population) which each successive year gets smaller as a result of people quitting (spontaneously or through services), dying from smoking-related diseases, with a small uplift each year from teenagers and adults starting smoking.
The population of smokers in any year = (Existing smoking population (previous year) + teenage starters (in last year) + adult starters (in last year) - (quitters (in last year) + deaths from smoking (in last year))
I have only been able to manipulate data in Excel and import in. I think I should be able to use lead() or a radix as the starting population but am not sure how to generate a function to loop across multiple columns/variables.
#Extract head of data frame
structure(list(Year = 2018:2023, Popn_15 = c(32526L, 33238L, 34162L,
35419L, 35942L, 37310L), Popn_16_64 = c(1906559L, 1901378L,
1898159L, 1895177L, 1892121L, 1888770L), Popn_65_over = c(658616L,
669267L, 678630L, 688625L, 699857L, 711777L), Popn_16_over = c(2565175L,
2570645L, 2576789L, 2583802L, 2591978L, 2600547L), start_15 = c(2927.34,
2991.42, 3074.58, 3187.71, 3234.78, 3357.9), start_16 = c(38131.18,
38027.56, 37963.18, 37903.54, 37842.42, 37775.4)), .Names = c("Year",
"Popn_15", "Popn_16_64", "Popn_65_over", "Popn_16_over", "start_15",
"start_16"), row.names = c(NA, 6L), class = "data.frame")
I would like to produce a column of estimated smoking populations to then calculate prevalence proportions to visualise but cannot loop the data in R at present.
Many thanks in advance for any pointers or help.
Upvotes: 0
Views: 66
Reputation: 1
This is the output expected which I have created by manually iterating the data manipulation by row (first calculating quitters and smoking deaths from previous year's smoking population then calculating using these figures the new smoking population)
dput(smok_projections)
structure(list(Year = 2018:2039, Popn_15 = c(32526L, 33238L,
34162L, 35419L, 35942L, 37310L, 36706L, 37175L, 37900L, 37433L,
36190L, 35291L, 35161L, 35226L, 35349L, 35472L, 35634L, 35801L,
35955L, 36121L, 36190L, 36148L), Popn_16_64 = c(1906559L, 1901378L,
1898159L, 1895177L, 1892121L, 1888770L, 1886206L, 1882590L, 1878168L,
1873414L, 1868058L, 1860412L, 1852056L, 1844695L, 1837416L, 1831331L,
1825159L, 1820966L, 1816035L, 1813212L, 1812904L, 1814767L),
Popn_65_over = c(658616L, 669267L, 678630L, 688625L, 699857L,
711777L, 724086L, 736654L, 750282L, 764671L, 778915L, 793895L,
808374L, 821403L, 834068L, 845307L, 856400L, 865327L, 874819L,
881993L, 886488L, 888608L), Popn_16_over = c(2565175L, 2570645L,
2576789L, 2583802L, 2591978L, 2600547L, 2610292L, 2619244L,
2628450L, 2638085L, 2646973L, 2654307L, 2660430L, 2666098L,
2671484L, 2676638L, 2681559L, 2686293L, 2690854L, 2695205L,
2699392L, 2703375L), start_15 = c(2927.34, 2991.42, 3074.58,
3187.71, 3234.78, 3357.9, 3303.54, 3345.75, 3411, 3368.97,
3257.1, 3176.19, 3164.49, 3170.34, 3181.41, 3192.48, 3207.06,
3222.09, 3235.95, 3250.89, 3257.1, 3253.32), start_16 = c(38131.18,
38027.56, 37963.18, 37903.54, 37842.42, 37775.4, 37724.12,
37651.8, 37563.36, 37468.28, 37361.16, 37208.24, 37041.12,
36893.9, 36748.32, 36626.62, 36503.18, 36419.32, 36320.7,
36264.24, 36258.08, 36295.34), popn_smok = c(441210.1, 438922.3749356,
436707.501004443, 434617.775035249, 432676.649893459, 430802.904211835,
429060.975973056, 427276.931790465, 425531.406641874, 423828.370806947,
422050.52577905, 420124.404578419, 418051.062251724, 415901.410828628,
413721.712727943, 411522.988164282, 409332.175440519, 407151.360605936,
405020.66961321, 402920.133702152, 400890.97455639, 398968.459862344
), hmq_ref_growth = c(0.0343, 0.0032, 0.0032, 0.0032, 0.0032,
0.0032, 0.0032, 0.0032, 0.0032, 0.0032, 0.0032, 0.0032, 0.0032,
0.0032, 0.0032, 0.0032, 0.0032, 0.0032, 0.0032, 0.0032, 0.0032,
0.0032), hmq_prop_ref = c(0.0343, 0.0375, 0.0407, 0.0439,
0.0471, 0.0503, 0.0535, 0.0567, 0.0599, 0.0631, 0.0663, 0.0695,
0.0727, 0.0759, 0.0791, 0.0823, 0.0855, 0.0887, 0.0919, 0.0951,
0.0983, 0.1015), hmq_quit = c(1210.6805144, 1316.7671248068,
1421.91962327047, 1526.37762592379, 1630.32561679855, 1733.55088654842,
1836.38097716468, 1938.12816260155, 2039.14650062786, 2139.48561583347,
2238.55598873208, 2335.89168945601, 2431.38497805602, 2525.35336655143,
2618.03099814242, 2709.46735407363, 2799.83208001315, 2889.14605485973,
2977.71196299632, 3065.41637720597, 3152.60662391146, 3239.62389408223
), spont_quit = c(9044.80705, 8997.9086861798, 8952.50377059109,
8909.6643882226, 8869.8713228159, 8831.45953634261, 8795.75000744765,
8759.17710170452, 8723.39383615841, 8688.48160154241, 8652.03577847052,
8612.55029385758, 8570.04677616033, 8525.97892198687, 8481.29511092282,
8436.22125736777, 8391.30959653064, 8346.6028924217, 8302.9237270708,
8259.86274089412, 8218.264978406, 8178.85342717805), deaths_smok = c(33090.7575,
32919.17812017, 32753.0625753333, 32596.3331276436, 32450.7487420094,
32310.2178158876, 32179.5731979792, 32045.7698842848, 31914.8554981405,
31787.127810521, 31653.7894334287, 31509.3303433814, 31353.8296688793,
31192.6058121471, 31029.1284545957, 30864.2241123211, 30699.9131580389,
30536.3520454452, 30376.5502209907, 30219.0100276614, 30066.8230917293,
29922.6344896758)), .Names = c("Year", "Popn_15", "Popn_16_64",
"Popn_65_over", "Popn_16_over", "start_15", "start_16", "popn_smok",
"hmq_ref_growth", "hmq_prop_ref", "hmq_quit", "spont_quit", "deaths_smok"
), class = "data.frame", row.names = c(NA, -22L))
Upvotes: 0