Qimeng Zhang
Qimeng Zhang

Reputation: 31

how to create variables that names are concat with two array variable names

I have a HCC dataset DATA_HCC that with member ID and 79 binary variables:

Member_ID   HCC1    HCC2    HCC6    HCC8 ...    HCC189
XXXXXXX1    1       0       1       0    ...    0
XXXXXXX2    0       0       1       0    ...    0
XXXXXXX3    0       1       0       0    ...    1

I am trying to create a output dataset that could create new binary variables for all the combination of those 79 variables. Each new variable represents if a member had both of the variables as 1.

%LET hccList =  HCC1 HCC2 HCC6 HCC8 HCC9 HCC10 HCC11 HCC12 HCC17 HCC18 HCC19 HCC21 HCC22 HCC23 HCC27 
                HCC28 HCC29 HCC33 HCC34 HCC35 HCC39 HCC40 HCC46 HCC47 HCC48 HCC54 HCC55 HCC57 HCC58 
                HCC70 HCC71 HCC72 HCC73 HCC74 HCC75 HCC76 HCC77 HCC78 HCC79 HCC80 HCC82 HCC83 HCC84 
                HCC85 HCC86 HCC87 HCC88 HCC96 HCC99 HCC100 HCC103 HCC104 HCC106 HCC107 HCC108 HCC110 
                HCC111 HCC112 HCC114 HCC115 HCC122 HCC124 HCC134 HCC135 HCC136 HCC137 HCC157 HCC158 
                HCC161 HCC162 HCC166 HCC167 HCC169 HCC170 HCC173 HCC176 HCC186 HCC188 HCC189;

DATA COUNT_HCC; SET DATA_HCC;
    ARRAY HCC [*] &hccList.;
    DO i = 1 TO DIM(HCC);
        DO j = i+1 TO DIM(HCC);
            %LET HCC_COMBO = CATX('_', VARNAME(HCC[i]), VARNAME(HCC[j]));
            &HCC_COMBO. = MIN(HCC[i], HCC[j]);
        END;
    END;
RUN;

I tried to use CATX function to just concat the two variable names but it didn't work.

Here is the log error that I got:

ERROR: Undeclared array referenced: CATX.
ERROR: Variable CATX has not been declared as an array.
ERROR 71-185: The VARNAME function call does not have enough arguments.

And the results output sample would like this:

Member_ID   HCC1_HCC2    HCC1_HCC6    HCC1_HCC8   ...    HCC188_HCC189
XXXXXXX1    0               1           0         ...       0
XXXXXXX2    0               0           0         ...       0
XXXXXXX3    0               0           0         ...       1

Upvotes: 1

Views: 312

Answers (3)

Stu Sztukowski
Stu Sztukowski

Reputation: 12909

To achieve dynamic variable name generation, use a macro to create the variables that you need. The below code generates dynamic variable names and generates data step code to create the variables.

%macro get_hcc_combo_mins;
    %do i = 1 %to %sysfunc(countw(&hccList.));
        %do j = %eval(&i.+1) %to %sysfunc(countw(&hccList.));
            %let hcc1 = %scan(&hccList., &i.);
            %let hcc2 = %scan(&hccList., &j.);

            &hcc1._&hcc2. = min(&hcc1., &hcc2.);
        %end;
    %end;
%mend;

DATA COUNT_HCC; SET DATA_HCC;
    ARRAY HCC [*] &hccList.;
    %get_hcc_combo_mins;
RUN;

The macro %get_hcc_combo_mins generates this code in the data step:

HCC1_HCC2 = min(HCC1, HCC2);
HCC1_HCC6 = min(HCC1, HCC6);
HCC1_HCC8 = min(HCC1, HCC8);
...

There may be other ways to do this all within one data step that I'm not aware of, but macros can get the job done.

Upvotes: 2

Richard
Richard

Reputation: 27508

A DATA Step with LEXCOMB can generate variable name pairs. CALL EXECUTE submit a statement using those names.

Example:

Presume HCC: variable names, which specific ones not known apriori.

data have;
  call streaminit(1234);
  do id = 1 to 100;
    array hcc hcc1 hcc3 hcc5 hcc7 hcc10-hcc79 hcc150 hcc155 hcc180 hcc190-hcc191;
    do over hcc; 
      hcc = rand('uniform', dim(hcc)) < _i_;
    end;
    output;
  end;
run;

data _null_;
  set have;
  array hcc hcc:;

  do _n_ = 1 to dim(hcc);
    hcc(_n_) = _n_;
  end;

  call execute("data pairwise; set have;");

  do _n_ = 1 to comb(dim(hcc),2);
    call lexcomb(_n_, 2, of hcc(*));

    index1 = hcc(1);
    index2 = hcc(2);

    name1 = vname(hcc(index1));
    name2 = vname(hcc(index2));

    put name1=;

    call execute (cats(
      catx(    '_',name1,name2),
      '=',
      catx(' and ',name1,name2),
      ';'
    ));
  end;

  call execute('run;');

  stop;
run;

Upvotes: 1

PeterClemmensen
PeterClemmensen

Reputation: 4937

See if you can use this as a template.

/* Example data */
data have (drop = i j);
   array h {*} HCC1 HCC2 HCC6 HCC8 HCC9 HCC10 HCC11 HCC12 HCC17 HCC18 HCC19 HCC21 HCC22 HCC23 HCC27 
               HCC28 HCC29 HCC33 HCC34 HCC35 HCC39 HCC40 HCC46 HCC47 HCC48 HCC54 HCC55 HCC57 HCC58 
               HCC70 HCC71 HCC72 HCC73 HCC74 HCC75 HCC76 HCC77 HCC78 HCC79 HCC80 HCC82 HCC83 HCC84 
               HCC85 HCC86 HCC87 HCC88 HCC96 HCC99 HCC100 HCC103 HCC104 HCC106 HCC107 HCC108 HCC110 
               HCC111 HCC112 HCC114 HCC115 HCC122 HCC124 HCC134 HCC135 HCC136 HCC137 HCC157 HCC158 
               HCC161 HCC162 HCC166 HCC167 HCC169 HCC170 HCC173 HCC176 HCC186 HCC188 HCC189;

   do i = 1 to 10;
      do j = 1 to dim (h);
         h [j] = rand('uniform') > .5;
      end;
      output;
   end;
run;

/* Create long version of output data */
data temp (drop = i j);
   set have;
   array a {*} HC:;
   do i = 1 to dim (a)-1;
      do j = i+1 to dim (a);
         v = catx('_',  vname (a[i]), vname (a[j]));
         d = a [i] * a [j];
         n = _N_;
         output;
      end;
   end;
run;

/* Transpose to wide format */
proc transpose data=temp out=temp2 (drop=_: n);
    by n;
    id v;
    var d;
run;

/* Merge back with original data */
data want;
   merge have temp2;
run;

Upvotes: 0

Related Questions