Reputation: 49
I have dataset of people with multimorbidity. The diseases are developing at different time points.
ID time_ enrolled Baseline_age dis1 dis1_time dis2 dis2_time dis_3 dis3_time
1 2005.5 35 1 2010.7 1 2012.1 0 2015.3
2 2004.3 49 0 2011.3 1 2013.8 0 2017.9
...
I have summarized the proportion of people at each baseline age group 30-39, 40-49, etc, for having one, two or three diseases. Now I would like to count the number of people who e.g. at the age of 40-49 years had one, two or three diseases. And the proportion would be the proportion they represent of all individuals while at that age. First I thought to calculate the maximum age when they have developed their last disease, but then the categories of the younger age groups will be empty, also the older age groups will be empty, e.g. if the person has developed all the diseases before the age of 70. Does anyone have any thoughts how could I approach this problem? I am using SAS for programming.
Upvotes: 1
Views: 105
Reputation: 27508
There are a couple of considerations:
Approach
MEANS
to sum flags for each id age(standardized) to get count of concurrent diseases in an age rangeMEANS
again to count age and age*flag_countDATA
step to compute proportion of counts [age*flag_count]/[age]Sample code
* custom format and informat for standardizing age;
proc format;
value agegrp
20-29 = '20-29'
30-39 = '30-39'
40-49 = '40-49'
50-59 = '50-59'
60-69 = '60-69'
70-79 = '70-79'
80-89 = '80-89'
90-high = '90 + '
;
invalue agegrp
'20-29' = 20
'30-39' = 30
'40-49' = 40
'50-59' = 50
'60-69' = 60
'70-79' = 70
'80-89' = 80
'90 + ' = 90
;
run;
* generate some sample data;
data have;
call streaminit(123);
do id = 1 to 10000;
enrolled = '01jan2000'd + rand('integer', 1, 3650);
age = 20 + rand('integer', 59);
flag1 = rand('uniform') < 0.25;
date1 = enrolled + rand('integer',2500);
flag2 = rand('uniform') < 0.25;
date2 = date1 + rand('integer',2500);
flag3 = rand('uniform') < 0.25;
date3 = date2 + rand('integer',2500);
output;
end;
format enrolled date: yymmdd10. flag: 1.;
run;
* compute age at each disease flag time point;
data stage1;
set have;
age1 = age + intck('year', enrolled, date1);
age2 = age + intck('year', enrolled, date2);
age3 = age + intck('year', enrolled, date3);
run;
* parallel array based transposition of ages and flags;
data stage2;
set stage1;
* map age in range to first value in range;
* need for later when creating zero counts for intermediate age ranges not in data;
* counts of zero counts needed to compute proportion;
enroll_age = input(put(age,agegrp.),agegrp.);
age = input(put(age1,agegrp.),agegrp.); flag = flag1; output;
age = input(put(age2,agegrp.),agegrp.); flag = flag2; output;
age = input(put(age3,agegrp.),agegrp.); flag = flag3; output;
keep id enroll_age age flag;
format enroll_age age 4. flag 1.;
run;
ods listing;
* compute number of simultaneous diseases in age range;
proc means noprint nway data=stage2;
class id enroll_age age;
format enroll_age age agegrp.;
var flag;
output out=stage3(drop= _type_ _freq_) sum=flag_count;
run;
* data for cross join;
data all_ages;
do age = 20 to 90 by 10;
output;
end;
format age agegrp.;
run;
* combine all_ages with each id so as to get a zero count
* for a range not present between enroll_date and max date;
proc sql;
create table stage4 as
select ids.id, ages.age, coalesce(stage3.flag_count,0) as flag_count
from
( select distinct id from stage3 ) as ids
cross join
all_ages as ages
left join
stage3
on ids.id = stage3.id and ages.age = stage3.age
group by
ids.id
having
ages.age between min(stage3.enroll_age) and max(stage3.age)
order by
ids.id, age
;
quit;
* compute counts;
proc means noprint data=stage4;
class age flag_count;
output out=stage5 N(id)=;
types age age*flag_count;
run;
* compute proportions;
data want;
merge
stage5 (where=(_type_=2) rename=_freq_=age_freq)
stage5 (where=(_type_=3) rename=_freq_=flag_count_freq)
;
by age;
age_flag_count_proportion = flag_count_freq / age_freq;
format age_flag_count_proportion percent5.;
keep age flag_count age_freq flag_count_freq age_flag_count_proportion;
format flag_count 1.;
run;
Example output data
Upvotes: 1