Ken
Ken

Reputation: 893

Detect the difference b/w ages greater than some value using SAS

I am trying to detect groups which contain the difference between first age and second age are greater than 5. For example, if I have the following data, the difference between age in grp=1 is 39 so I want to output that group in a separate data set. Same goes for grp 4.

id  grp age sex
1   1   60  M
2   1   21  M
3   2   30  M
4   2   25  F
5   3   45  F
6   3   30  F
7   3   18  M
8   4   32  M
9   4   18  M
10  4   16  M

My initial idea was to sort them by grp and then get the absolute value between ages using something like if first.grp then do;. But I don't know how to get the absolute value between first age and second age by group or actually I don't know how should I start this.

Thanks in advance.

Upvotes: 0

Views: 314

Answers (5)

Todd
Todd

Reputation: 21

data have;
input id $ grp $ age sex $;
datalines;
1   1   60  M
2   1   21  M
3   2   30  M
4   2   25  F
5   3   45  F
6   3   30  F
7   3   18  M
8   4   32  M
9   4   18  M
10  4   16  M
;

data want;
  do i = 1 by 1 until(last.grp);
    set have;
by grp notsorted;
if first.grp then cnt = 0;
cnt + 1;
    if cnt = 1 then age1 = age;
if cnt = 2 then age2 = age;
diff = sum( age1, -age2 );
  end;
 do until(last.grp);
     set have;
 by grp;
 if diff > 5 then output;
 end;
 run;

Upvotes: 0

Robert Penridge
Robert Penridge

Reputation: 8513

Here's an SQL approach (using CarolinaJay's code to create the dataset):

data groups1;

 input id grp age sex $;
 datalines;
1   1   60  M
2   1   21  M
3   2   30  M
4   2   25  F
5   3   45  F
6   3   30  F
7   3   18  M
8   4   32  M
9   4   18  M
10  4   16  M
;
run;

proc sql noprint;
  create table xx as
  select a.*
  from groups1 a
  where grp in (select b.grp
                from groups1 b
                join groups1 c  on c.id = b.id+1
                               and c.grp = b.grp
                               and abs(c.age - b.age) > 5
                left join groups1 d  on d.id = b.id-1
                                    and d.grp = b.grp
                where d.id eq .
               )
  ;   
quit;

The join on C finds all occurrences where the subsequent record in the same group has an absolute value > 5. The join on D (and the where clause) makes sure we only consider the results from the C join if the record is the very first record in the group.

Upvotes: 0

Longfish
Longfish

Reputation: 7602

If you are after the difference between just the 1st and 2nd ages, then the following code is a fairly straightforward way of extracting these. It reads though the dataset to identify the groups, then uses the direct access method, POINT=, to extract the relevant records. I put in an extra condition, grp=lag(grp) just in case you have any groups with only 1 record.

data want;
set have;
by grp;
if first.grp then do;
    num_grp=0;
    outflag=0;
    end;
outflag+ifn(lag(first.grp)=1 and grp=lag(grp) and abs(dif(age))>5,1,0) /* set flag to determine if group meets criteria */;
if not first.grp then num_grp+1; /* count number of records in group */
if last.grp and outflag=1 then do i=_n_-num_grp to _n_;
    set have point=i; /* extract required group records */
    drop num_grp outflag;
    output;
    end;
run;

Upvotes: 0

DavB
DavB

Reputation: 1696

I would use PROC TRANSPOSE so the values in each group can easily be compared. For example:

data groups1;

 input id $ grp age sex $;
 datalines;
1   1   60  M
2   1   21  M
3   2   30  M
4   2   25  F
5   3   45  F
6   3   30  F
7   3   18  M
8   4   32  M
9   4   18  M
10  4   16  M
;
run;

proc sort data=groups1;
  by grp; /* This maintains age order */
run;

proc transpose data=groups1 out=groups2;
  by grp;
  var age;
run;

With the transposed data you can do whatever comparison you like (I can't tell from your question what exactly you want, so I just compare first two ages):

/* With all ages of a particular group in a single row, it is easy to compare */
data outgroups1(keep=grp);
  set groups2;
  if abs(col1-col2)>5 then output;
run;

In this instance this would be my preferred method for creating a separate data set for each group that satisfies whatever condition is applied (generate and include code dynamically):

/* A separate data set per GRP value in OUTGROUPS1 */
filename dynacode catalog "work.dynacode.mycode.source";
data _null_;
  set outgroups1;
  file dynacode;
  put "data grp" grp ";";
  put "  set groups1(where=(grp=" grp "));";
  put "run;" /;
run;

%inc dynacode;

Upvotes: 1

Jay Corbett
Jay Corbett

Reputation: 28421

Here's one way that I think works.

data have;
 input id $ grp $ age sex $;
 datalines;
1   1   60  M
2   1   21  M
3   2   30  M
4   2   25  F
5   3   45  F
6   3   30  F
7   3   18  M
8   4   32  M
9   4   18  M
10  4   16  M
;

proc sort data=have ;
 by grp descending age;
run;

data temp(keep=grp);
 retain old;
 set have;
 by grp descending age;
 if first.grp then old=age;
 if last.grp then do;
  diff=old-age;
  if diff>5 then output ;
 end;
run;

Data want;
 merge temp(in=a) have(in=b);
 by grp ;
 if a and b;
run;

Upvotes: 2

Related Questions