Reputation: 7607
I am having a SAS dataset where I want to keep, let's say, the firt 2
columns and last 4
columns, so to speak. In other words, only columns from beginning and from end.
data test;
input a b c d e f g h i j;
cards;
1 2 3 4 5 6 7 8 9 10
;
What I want is the following -
Output desired:
I checked on the net, people are trying something with varnum
, as shown here
, but I can't figure out. I don't want to use keep/drop
, rather I want an automated way to solve this issue.
Upvotes: 0
Views: 1375
Reputation: 27536
%DOSUBL
can run code in a separate stream and be part of a code generation scheme at code submit (pre-run) time.
Suppose the requirement is to to slice the columns of a data set out based on meta data column position as indicated by varnum
(i.e. places), and the syntax for places is:
p:q
to select the range of columns whose varnum
position is between p
and q
) p
, can be specifiedkeep= drop=
All the complex logic for implementing the requirements could be done in pure macro code using only %sysfunc
and data functions such as open
, varnum
, varname
, etc... That code would be pretty unwieldy.
The selection of names from meta data can be cleaner using SAS features such as Proc CONTENTS
and Proc SQL
executed within DOSUBL
.
Example:
Macro logic is used to construct (or map) the filtering criteria statement based on varnum
. Metadata retrieval and processing done with Procs.
%macro columns_slice (data=, places=);
%local varlist temp index p token part1 part2 filter joiner;
%let temp = __&sysmacroname._%sysfunc(monotonic());
%do index = 1 %to %sysfunc(countw(&places,%str( )));
%let token = %scan(&places,&index,%str( ));
%if NOT %sysfunc(prxmatch(/^(-?\d+:)?-?\d+$/,&token)) %then %do;
%put ERROR: &sysmacname, invalid places=&places;
%return;
%end;
%let part1 = %scan (%superq(token),1,:);
%let part2 = %scan (%superq(token),2,:);
%if %qsubstr(&part1,1,1) = %str(-) %then
%let part1 = max(varnum) + 1 &part1;
%if %length(&part2) %then %do;
%if %qsubstr(&part2,1,1) = %str(-) %then
%let part2 = max(varnum) + 1 &part2;
%end;
%else
%let part2 = &part1;
%let filter=&filter &joiner (varnum between &part1. and &part2.) ;
%let joiner = OR;
%end;
%put NOTE: &=filter;
%if 0 eq %sysfunc(dosubl(%nrstr(
options nonotes;
proc contents noprint data=&data out=&temp(keep=name varnum);
proc sql noprint;
select name
into :varlist separated by ' '
from &temp
having &filter
order by varnum
;
drop table &temp;
quit;
)))
%then %do;&varlist.%end;
%else
%put ERROR: &sysmacname;
%mend;
Using the slicer
* create sample table for demonstration;
data lotsa_columns(label='A silly 1:1 merge');
if _n_ > 10 then stop;
merge
sashelp.class
sashelp.cars
;
run;
%put %columns_slice (data=lotsa_columns, places=1:3);
%put %columns_slice (data=lotsa_columns, places=-1:-5);
%put %columns_slice (data=lotsa_columns, places=2:4 -2:-4 6 7 8);
1848 %put %columns_slice (data=lotsa_columns, places=1:3);
NOTE: FILTER=(varnum between 1 and 3)
Name Sex Age
1849 %put %columns_slice (data=lotsa_columns, places=-1:-5);
NOTE: FILTER=(varnum between max(varnum) + 1 -1 and max(varnum) + 1 -5)
Horsepower MPG_City MPG_Highway Wheelbase Length
1850 %put %columns_slice (data=lotsa_columns, places=2:4 -2:-4 6 7 8);
NOTE: FILTER=(varnum between 2 and 4) OR (varnum between max(varnum) + 1 -2 and max(varnum) + 1
-4) OR (varnum between 6 and 6) OR (varnum between 7 and 7) OR (varnum between 8 and 8)
Sex Age Height Make Model Type MPG_City MPG_Highway Wheelbase
Honoring options
data have;
array x(100);
array y(100);
array z(100);
run;
%put %columns_slice (data=have(keep=x:), places=2:4 8:10 -2:-4 -25:-27 -42);
1858 %put %columns_slice (data=have(keep=x:), places=2:4 8:10 -2:-4 -25:-27 -42);
NOTE: FILTER=(varnum between 2 and 4) OR (varnum between 8 and 10) OR (varnum between max(varnum)
+ 1 -2 and max(varnum) + 1 -4) OR (varnum between max(varnum) + 1 -25 and max(varnum) + 1 -27) OR
(varnum between max(varnum) + 1 -42 and max(varnum) + 1 -42)
x2 x3 x4 x8 x9 x10 x59 x74 x75 x76 x97 x98 x99
Upvotes: 2
Reputation: 51621
If the names follow a pattern just generate the list using the pattern. So if the names look like month names you just need to know one month to generate the other.
%let last_month = '01JAN2019'd ;
%let first_var = %sysfunc(intnx(month,&last_month,-12),monyy7.);
%let last_var = %sysfunc(intnx(month,&last_month,-0),monyy7.);
data want;
set have(keep= id1 id2 &first_var -- &last_var);
run;
If you cannot find a SAS function or format that generates the names in the style your variables use then write your own logic.
data _null_;
array month_abbr [12] $3 _temporary_ ('JAN' 'FEB' 'MAR' 'APR' 'MAY' 'JUN' 'JUL' 'AUG' 'SEP' 'OKT' 'NOV' 'DEK' );
last_month=today();
first_month=intnx('month',last_month,-12);
call symputx('first_var',catx('_',month_abbr[month(first_month)],year(first_month)));
call symputx('last_var',catx('_',month_abbr[month(last_month)],year(last_month)));
run;
Upvotes: 1
Reputation: 1770
If you don't know number of variables, you can use this macro(you should specify num of first variables and num of last variables to keep in data set, libname and name of dataset):
%macro drop_vars(num_first_vars,num_end_vars,lib,dataset); %macro d;%mend d;
proc sql noprint;;
select sum(num_character,num_numeric) into:ncolumns
from dictionary.tables
where libname=upcase("&lib") and memname=upcase("&dataset");
select name into: vars_to_drop separated by ','
from dictionary.columns
where libname=upcase("&lib") and
memname=upcase("&dataset") and
varnum between %eval(&num_first_vars.+1) and %eval(&ncolumns-&num_end_vars);
alter table &lib..&dataset
drop &vars_to_drop;
quit;
%mend drop_vars;
%drop_vars(2,3,work,test);
Dataset before macro execution:
+---+---+---+---+---+---+---+---+---+----+
| a | b | c | d | e | f | g | h | i | j |
+---+---+---+---+---+---+---+---+---+----+
| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
+---+---+---+---+---+---+---+---+---+----+
Dataset after macro execution:
+---+---+---+---+----+
| a | b | h | i | j |
+---+---+---+---+----+
| 1 | 2 | 8 | 9 | 10 |
+---+---+---+---+----+
Upvotes: 1