Reputation: 57
For each observation in Data1, if Flag=1, replace the Val with values in another dataset Data2. For example, if an observation in Data1 has Flag=1, suppose it has Visit=Week2 and Group=2, then I want to replace that Val with the "Week2_2" value in Data2. This process loop through each row in Data2 to have N new datasets.
My thought is to create a macro for the variable name thing and then loop through each obs in Data1, if satisfies the condition, replace the value, and then loop through each row in Data2. But I am really bad at macro and keep getting errors for days... Thank you for helping!
data Data1;
length ID $3 Visit $8 Group Flag Value 8;
do id_num=1 to 10;
ID=put(id_num,z3.);
Group=rand('Interger',1,3);
do visit_num=1 to 5;
Visit=catx(' ', 'Week', visit_num);
Flag=rand('Bernoulli', 0.4);
Value=rand('Uniform');
output;
end;
end;
drop id_num visit_num;
run;
data Data2;
input Week1_1 Week1_2 Week1_3 Week2_1 Week2_2 Week2_3
Week3_1 Week3_2 Week3_3 Week4_1 Week4_2 Week4_3
Week5_1 Week5_2 Week5_3;
datalines;
0.123 0.234 0.345 0.456 0.567 0.678 0.789 0.890 0.901 0.012 0.123 0.234 0.345 0.456 0.567
0.234 0.345 0.456 0.567 0.678 0.789 0.890 0.901 0.012 0.123 0.234 0.345 0.456 0.567 0.678
0.345 0.456 0.567 0.678 0.789 0.890 0.901 0.012 0.123 0.234 0.345 0.456 0.567 0.678 0.789
;
run;
Upvotes: 0
Views: 43
Reputation: 27508
The requirement to produce one output table per row of DATA2 necessitates writing a macro to generate code that will produces those tables.
It is often a best practice to write a solution for a fixed case and then abstract the parts that scale to the 'multiple' requirement.
Consider this example that reshapes the replacements with transpose and uses a hash to perform the lookup replacements, and the macro code generates a DATA statement with multiple output data sets listed.
data Data1;
length ID $3 Visit $8 Group Flag Value 8;
do id_num=1 to 10;
ID=put(id_num,z3.);
Group=rand('Interger',1,3);
do visit_num=1 to 5;
Visit=catx(' ', 'Week', visit_num);
Flag=rand('Bernoulli', 0.4);
Value=round(rand('Uniform'),0.001) ;
output;
end;
end;
drop id_num visit_num;
format value 5.3;
run;
data Data2;
input Week1_1 Week1_2 Week1_3 Week2_1 Week2_2 Week2_3
Week3_1 Week3_2 Week3_3 Week4_1 Week4_2 Week4_3
Week5_1 Week5_2 Week5_3;
datalines;
0.123 0.234 0.345 0.456 0.567 0.678 0.789 0.890 0.901 0.012 0.123 0.234 0.345 0.456 0.567
0.234 0.345 0.456 0.567 0.678 0.789 0.890 0.901 0.012 0.123 0.234 0.345 0.456 0.567 0.678
0.345 0.456 0.567 0.678 0.789 0.890 0.901 0.012 0.123 0.234 0.345 0.456 0.567 0.678 0.789
;
;
%macro dupWithReplacements (base=data1, data=data2, outprefix=RDUP) ;
%local i outn ;
proc sql noprint;
select count(*) into :outn trimmed from &data ;
quit ;
proc transpose data=&data out=lookups;
run ;
data %do i = 1 %to &outn ; &outprefix.&i. %end ; %str(;)
set &base ;
array col(&outn) ;
if _n_ = 1 then do ;
declare hash replacements(dataset:'lookups');
replacements.defineKey('_NAME_') ;
replacements.defineData(all:'yes') ;
replacements.defineDone() ;
end ;
if flag then do ;
_name_ = compress(Visit)||'_'||cats(group) ;
rc = replacements.find() ;
if rc ne 0 then put 'WARNING: Key not found ' _name_=;
orig_value = value ;
%do i = 1 %to &outn ;
if rc = 0 then do ;
put "NOTE: Replacing in output &i " _name_ orig_value= 'with ' col(&i)= ;
value = col(&i) ;
end ;
output &outprefix.&i. ;
%end ;
end ;
else do ;
* flag not set ;
%do i = 1 %to &outn ;
output &outprefix.&i. ;
%end ;
end ;
drop _name_ col: orig_value rc ;
run;
%mend dupWithReplacements ;
options mprint;
%dupWithReplacements(base=data1, data=data2)
will log
MPRINT(DUPWITHREPLACEMENTS): data RDUP1 RDUP2 RDUP3 ;
MPRINT(DUPWITHREPLACEMENTS): set data1 ;
MPRINT(DUPWITHREPLACEMENTS): array col(3) ;
MPRINT(DUPWITHREPLACEMENTS): if _n_ = 1 then do ;
MPRINT(DUPWITHREPLACEMENTS): declare hash replacements(dataset:'lookups');
MPRINT(DUPWITHREPLACEMENTS): replacements.defineKey('_NAME_') ;
MPRINT(DUPWITHREPLACEMENTS): replacements.defineData(all:'yes') ;
MPRINT(DUPWITHREPLACEMENTS): replacements.defineDone() ;
MPRINT(DUPWITHREPLACEMENTS): end ;
MPRINT(DUPWITHREPLACEMENTS): if flag then do ;
MPRINT(DUPWITHREPLACEMENTS): _name_ = compress(Visit)||'_'||cats(group) ;
MPRINT(DUPWITHREPLACEMENTS): rc = replacements.find() ;
MPRINT(DUPWITHREPLACEMENTS): if rc ne 0 then put 'WARNING: Key not found ' _name_=;
MPRINT(DUPWITHREPLACEMENTS): orig_value = value ;
MPRINT(DUPWITHREPLACEMENTS): if rc = 0 then do ;
MPRINT(DUPWITHREPLACEMENTS): put "NOTE: Replacing in output 1 " _name_ orig_value= 'with ' col(1)= ;
MPRINT(DUPWITHREPLACEMENTS): value = col(1) ;
MPRINT(DUPWITHREPLACEMENTS): end ;
MPRINT(DUPWITHREPLACEMENTS): output RDUP1 ;
MPRINT(DUPWITHREPLACEMENTS): if rc = 0 then do ;
MPRINT(DUPWITHREPLACEMENTS): put "NOTE: Replacing in output 2 " _name_ orig_value= 'with ' col(2)= ;
MPRINT(DUPWITHREPLACEMENTS): value = col(2) ;
MPRINT(DUPWITHREPLACEMENTS): end ;
MPRINT(DUPWITHREPLACEMENTS): output RDUP2 ;
MPRINT(DUPWITHREPLACEMENTS): if rc = 0 then do ;
MPRINT(DUPWITHREPLACEMENTS): put "NOTE: Replacing in output 3 " _name_ orig_value= 'with ' col(3)= ;
MPRINT(DUPWITHREPLACEMENTS): value = col(3) ;
MPRINT(DUPWITHREPLACEMENTS): end ;
MPRINT(DUPWITHREPLACEMENTS): output RDUP3 ;
MPRINT(DUPWITHREPLACEMENTS): end ;
MPRINT(DUPWITHREPLACEMENTS): else do ;
MPRINT(DUPWITHREPLACEMENTS): * flag not set ;
MPRINT(DUPWITHREPLACEMENTS): output RDUP1 ;
MPRINT(DUPWITHREPLACEMENTS): output RDUP2 ;
MPRINT(DUPWITHREPLACEMENTS): output RDUP3 ;
MPRINT(DUPWITHREPLACEMENTS): end ;
MPRINT(DUPWITHREPLACEMENTS): drop _name_ col: orig_value rc ;
MPRINT(DUPWITHREPLACEMENTS): run;
NOTE: There were 15 observations read from the data set WORK.LOOKUPS.
NOTE: Replacing in output 1 Week2_2 orig_value=0.887 with col1=0.567
NOTE: Replacing in output 2 Week2_2 orig_value=0.887 with col2=0.678
NOTE: Replacing in output 3 Week2_2 orig_value=0.887 with col3=0.789
NOTE: Replacing in output 1 Week3_2 orig_value=0.255 with col1=0.89
...
Upvotes: 2