Bob
Bob

Reputation: 121

SAS: Looping and Outputing record if date criteria is met

I have to partition a SAS table with million of records, and output it to multiple SAS tables base on monthly date criteria. For example, if there a customer_id who was effective between the year-month (date format) 201308 and 201408, then there should be 12 tables created for this one record. Each table will have the column fields below, and a newly created column called "YearMonth" for the month it was active, as in the first table should have 201308, 201309, 201310, etc.

Below are tables to demonstrate the above point.

Original table with one sample record

Cust_ID     Eff_YM  Trm_YM   
NH000001    201308  201408

New table 201308

Cust_ID     Eff_YM  Trm_YM  YearMonth     
NH000001    201308  201408  201308

New Table 201309

Cust_ID     Eff_YM  Trm_YM  YearMonth     
NH000001    201308  201408  201309

New Table 201310

Cust_ID     Eff_YM  Trm_YM  YearMonth     
NH000001    201308  201408  201310

Upvotes: 2

Views: 220

Answers (3)

sushil
sushil

Reputation: 1576

The solution to your problem is simple. create a new dataset from the your old dataset and do loop from starting year-month to end year-month. Later create a macro list of unique year-month that are there in your dataset that was previously created and loop through that to create datasets.

data have;
 input cust_id $ eff_ym :yymmn6. trm_ym :yymmn6. ;
 format eff_ym trm_ym yymmdd10.;
datalines;
NH000001    201308  201408
NH000002    201301  201401
;
run;

data staging;
set have;
do i = intck('month',0,eff_ym) to intck('month',0,trm_ym);
    yearmonth=intnx('month',0,i);
    output;
end;

format yearmonth yymmdd10.;
drop i;
run;
%macro splitter;
proc sql noprint;
    select distinct yearmonth format=date9. into :yearmonth1-:yearmonth99999 
      from staging;
quit;

%do i = 1 %to &sqlobs;
    %let dsn=%sysfunc(putn(%sysfunc(inputn(&&yearmonth&i,date9.)),yymmn6.));

        proc append base=data_&dsn data=staging(where=(yearmonth="&&yearmonth&i"d));
        run;
%end;

%mend splitter;
options mprint;
%splitter

Upvotes: 1

Jay Corbett
Jay Corbett

Reputation: 28411

data HAVE;
    Length CUST_ID $8;
    Input Cust_ID $ Eff_YM Trm_YM;
datalines;
NH000001    201308  201408
NH000002    201301  201401
;
run;

Get Min and Max date to use in building all possible Data sets

proc sql noprint;
select min(Eff_YM), max(Trm_YM) into: min_Eff_YM, :max_Trm_YM
    From HAVE;
quit;
%Put min_EFF_YM= &min_EFF_YM;
%Put max_TRM_YM= &max_TRM_YM;

Build all possible data sets and create macro vars for looping

data DSNs(drop=start i);
    Start=input(put(&min_EFF_YM,6.),yymmn6.);
    Diff=intck('month',Start,input(put(&max_TRM_YM,6.),yymmn6.));
    Put DIFF=;

    Do i = 0 to diff;
        DSN=Cats("_",put(intnx('Month',Start,i,'b'),yymmn6.));
        Output;
    End;
run;

Proc sql noprint;
    Select count(dsn) into :cnt separated by "" from DSNs;
    Select dsn into :all1 - :all&cnt from DSNs;
Quit;
%Put CNT: &cnt;
%Put ALL1: &all1;
%Put ALL&cnt: &&all&cnt;

Create data sets and insert appropriate records

%Macro Create_Tables;
Data %do i = 1 %to &cnt; &&all&i %end;
    ;
    set HAVE;
    %do i=0 %to 12;
        YearMonth_dt=intnx('month',input(put(EFF_YM,6.),yymmn6.),&i);
        YearMonth=input(put(YearMonth_dt,yymmn6.),6.);
        YearMonth_dsn=cats("_",put(yearmonth_dt,yymmn6.));
        %do j = 1 %to &cnt;
            %Let DSN=&&all&j;
            if YearMonth_dsn="&dsn" then output &dsn;
        %end;
    %end;
    Keep CUST_ID EFF_YM TRM_YM YEARMONTH;
run;
%Mend;
%Create_Tables ;

Upvotes: 1

in_user
in_user

Reputation: 1958

Creating the sample dataset.

data test;
infile datalines;
input Cust_ID : $10.
      Eff_YM : 8.
      Trm_YM : 8.
      ;
datalines;
NH000001    201308  201408
NH000001    201308  201312
;
run;

Selecting minimum and maximum period from the dataset. There would be as many distinct datasets as many distinct intervals.

proc sql noprint;
select min(Eff_YM) into: min_Eff_YM from test;
select max(Trm_YM) into: max_Trm_YM from test;
quit;

As we need to specify the name of the datasets beforehand in the data statement, so creating the list of names here.

data dataset_names(keep=period dataset_name);
length dataset_name $20.;
format min_date date9. max_date date9.;
min_date=mdy((substr(compress(&min_Eff_YM.),5,2)),1,(substr(compress(&min_Eff_YM.),1,4)));
max_date=mdy((substr(compress(&max_Trm_YM.),5,2)),1,(substr(compress(&max_Trm_YM.),1,4)));
no_of_months=intck('month',min_date,max_date);
do i=0 to no_of_months;
period=put(intnx('month',min_date,i),yymmn6.);
dataset_name=compress(cat("dataset_",period));
output;
end;
run;


proc sql noprint;
select dataset_name into :all_datsets separated by " " from dataset_names;
select count(dataset_name) into :num_datasets from dataset_names;
select period into: all_periods separated by "," from dataset_names;
quit;

Creating the list of possible records by using the intervals between Eff_YM and Trm_YM

%macro chk(YYMM);
data test_all;
set test;
No_of_loop=intck('month',
                  mdy((substr(compress(Eff_YM),5,2)),1,(substr(compress(Eff_YM),1,4))),
                  mdy((substr(compress(Trm_YM),5,2)),1,(substr(compress(Trm_YM),1,4))));

do i=0 to No_of_loop;
YearMonth = put(intnx('month',mdy((substr(compress(Eff_YM),5,2)),1,(substr(compress(Eff_YM),1,4))),i),yymmn6.);
output;
end;
run;
%mend;
%chk;

Dividing the dataset into separate datasets as per the period name

%macro data_dates;

data &all_datsets.;
set test_all;
%do i=1 %to &num_datasets.;
if YearMonth=scan("&all_periods.",&i.,",") then do;
output dataset_%sysfunc(scan("&all_periods.",&i.,","));
end;
%end;
run;
%mend;
%data_dates;

Upvotes: 2

Related Questions