Reputation: 121
I have to partition a SAS table with million of records, and output it to multiple SAS tables base on monthly date criteria. For example, if there a customer_id who was effective between the year-month (date format) 201308 and 201408, then there should be 12 tables created for this one record. Each table will have the column fields below, and a newly created column called "YearMonth" for the month it was active, as in the first table should have 201308, 201309, 201310, etc.
Below are tables to demonstrate the above point.
Original table with one sample record
Cust_ID Eff_YM Trm_YM
NH000001 201308 201408
New table 201308
Cust_ID Eff_YM Trm_YM YearMonth
NH000001 201308 201408 201308
New Table 201309
Cust_ID Eff_YM Trm_YM YearMonth
NH000001 201308 201408 201309
New Table 201310
Cust_ID Eff_YM Trm_YM YearMonth
NH000001 201308 201408 201310
Upvotes: 2
Views: 220
Reputation: 1576
The solution to your problem is simple. create a new dataset from the your old dataset and do loop from starting year-month to end year-month. Later create a macro list of unique year-month that are there in your dataset that was previously created and loop through that to create datasets.
data have;
input cust_id $ eff_ym :yymmn6. trm_ym :yymmn6. ;
format eff_ym trm_ym yymmdd10.;
datalines;
NH000001 201308 201408
NH000002 201301 201401
;
run;
data staging;
set have;
do i = intck('month',0,eff_ym) to intck('month',0,trm_ym);
yearmonth=intnx('month',0,i);
output;
end;
format yearmonth yymmdd10.;
drop i;
run;
%macro splitter;
proc sql noprint;
select distinct yearmonth format=date9. into :yearmonth1-:yearmonth99999
from staging;
quit;
%do i = 1 %to &sqlobs;
%let dsn=%sysfunc(putn(%sysfunc(inputn(&&yearmonth&i,date9.)),yymmn6.));
proc append base=data_&dsn data=staging(where=(yearmonth="&&yearmonth&i"d));
run;
%end;
%mend splitter;
options mprint;
%splitter
Upvotes: 1
Reputation: 28411
data HAVE;
Length CUST_ID $8;
Input Cust_ID $ Eff_YM Trm_YM;
datalines;
NH000001 201308 201408
NH000002 201301 201401
;
run;
Get Min and Max date to use in building all possible Data sets
proc sql noprint;
select min(Eff_YM), max(Trm_YM) into: min_Eff_YM, :max_Trm_YM
From HAVE;
quit;
%Put min_EFF_YM= &min_EFF_YM;
%Put max_TRM_YM= &max_TRM_YM;
Build all possible data sets and create macro vars for looping
data DSNs(drop=start i);
Start=input(put(&min_EFF_YM,6.),yymmn6.);
Diff=intck('month',Start,input(put(&max_TRM_YM,6.),yymmn6.));
Put DIFF=;
Do i = 0 to diff;
DSN=Cats("_",put(intnx('Month',Start,i,'b'),yymmn6.));
Output;
End;
run;
Proc sql noprint;
Select count(dsn) into :cnt separated by "" from DSNs;
Select dsn into :all1 - :all&cnt from DSNs;
Quit;
%Put CNT: &cnt;
%Put ALL1: &all1;
%Put ALL&cnt: &&all&cnt;
Create data sets and insert appropriate records
%Macro Create_Tables;
Data %do i = 1 %to &cnt; &&all&i %end;
;
set HAVE;
%do i=0 %to 12;
YearMonth_dt=intnx('month',input(put(EFF_YM,6.),yymmn6.),&i);
YearMonth=input(put(YearMonth_dt,yymmn6.),6.);
YearMonth_dsn=cats("_",put(yearmonth_dt,yymmn6.));
%do j = 1 %to &cnt;
%Let DSN=&&all&j;
if YearMonth_dsn="&dsn" then output &dsn;
%end;
%end;
Keep CUST_ID EFF_YM TRM_YM YEARMONTH;
run;
%Mend;
%Create_Tables ;
Upvotes: 1
Reputation: 1958
Creating the sample dataset
.
data test;
infile datalines;
input Cust_ID : $10.
Eff_YM : 8.
Trm_YM : 8.
;
datalines;
NH000001 201308 201408
NH000001 201308 201312
;
run;
Selecting minimum
and maximum
period from the dataset
. There would be as many distinct datasets
as many distinct intervals.
proc sql noprint;
select min(Eff_YM) into: min_Eff_YM from test;
select max(Trm_YM) into: max_Trm_YM from test;
quit;
As we need to specify the name of the datasets
beforehand in the data
statement, so creating the list of names here.
data dataset_names(keep=period dataset_name);
length dataset_name $20.;
format min_date date9. max_date date9.;
min_date=mdy((substr(compress(&min_Eff_YM.),5,2)),1,(substr(compress(&min_Eff_YM.),1,4)));
max_date=mdy((substr(compress(&max_Trm_YM.),5,2)),1,(substr(compress(&max_Trm_YM.),1,4)));
no_of_months=intck('month',min_date,max_date);
do i=0 to no_of_months;
period=put(intnx('month',min_date,i),yymmn6.);
dataset_name=compress(cat("dataset_",period));
output;
end;
run;
proc sql noprint;
select dataset_name into :all_datsets separated by " " from dataset_names;
select count(dataset_name) into :num_datasets from dataset_names;
select period into: all_periods separated by "," from dataset_names;
quit;
Creating the list of possible records by using the intervals between Eff_YM
and Trm_YM
%macro chk(YYMM);
data test_all;
set test;
No_of_loop=intck('month',
mdy((substr(compress(Eff_YM),5,2)),1,(substr(compress(Eff_YM),1,4))),
mdy((substr(compress(Trm_YM),5,2)),1,(substr(compress(Trm_YM),1,4))));
do i=0 to No_of_loop;
YearMonth = put(intnx('month',mdy((substr(compress(Eff_YM),5,2)),1,(substr(compress(Eff_YM),1,4))),i),yymmn6.);
output;
end;
run;
%mend;
%chk;
Dividing the dataset into separate datasets as per the period name
%macro data_dates;
data &all_datsets.;
set test_all;
%do i=1 %to &num_datasets.;
if YearMonth=scan("&all_periods.",&i.,",") then do;
output dataset_%sysfunc(scan("&all_periods.",&i.,","));
end;
%end;
run;
%mend;
%data_dates;
Upvotes: 2