PROC DS2 performance issues

I was trying to use proc ds2 to try and get some performance increases over the normal data step by using the multithreaded capability.
fred.testdata is a SPDE dataset containing 5 million observations. My code is below:

proc ds2; 
   thread home_claims_thread / overwrite = yes;
   /*declare char(10) producttype;
   declare char(12) wrknat_clmtype;
   declare char(7) claimtypedet;
   declare char(1) event_flag;*/
   /*declare date week_ending having format date9.;*/
   method run();
      /*declare char(7) _week_ending;*/
      set fred.testdata;
      if claim = 'X' then claimtypedet= 'ABC';
      else if claim = 'Y' then claimtypedet= 'DEF';
      /*_week_ending = COMPRESS(exposmth,'M');
    week_ending = to_date(substr(_week_ending,1,4) || '-' || substr(_week_ending,5,2) || '-01');*/

data home_claims / overwrite = yes;
   declare thread home_claims_thread t;  
   method run();
      set from t threads=8;

I didn't include all IF statements and only included a few otherwise it would have taken up a few pages (you should get the idea hopefully). As the code currently is it works quite a fair bit faster than the normal data step however significant performance issues arise when any of the following happens:

  1. I uncomment any of the declare statements
  2. I include any numeric variables in fred.testdata (even without performing any calculations on the numeric variables)

My questions are:

  1. Is there any way to introduce numeric variables into fred.testdata without getting significant slowdowns which make DS2 way slower than the normal data step? (for this small table of 5 million rows including numeric column/s the real time is about 1 min 30 for ds2 and 20 seconds for normal data step). The actual full table is closer to 600 million rows. For example I would like to be able to do that week_ending conversion without it introducing a 5x performance penalty in run times. Run times for ds2 WITHOUT declare statements and numeric variables takes about 7 seconds
  2. Is there any way to compress the table in ds2 without having to do an additional data step to compress it?

Thank you

Stu Sztukowski
Two methods to try: using proc hpds2 to have SAS handle parallel execution, or a more manual approach. Note that it is impossible to always preserve order with either of these methods.

Method 1: PROC HPDS2

HPDS2 is a way of performing massive parallel processing of data. In single-machine mode, it will make parallel runs per core, then put the data all back together. You only need to make a few slight modifications to your code in order to run it.

hpds2 has a setup where you declare your data in the data and out statements in the proc statement. Your data and set statements will always use the following syntax:

    data DS2GTF.out;
        method run();

Knowing that, we can modify your code to run on HPDS2:

proc hpds2 data=fred.test_data

   data DS2GTF.out;
   /*declare char(10) producttype;
   declare char(12) wrknat_clmtype;
   declare char(7) claimtypedet;
   declare char(1) event_flag;*/

   /*declare date week_ending having format date9.;*/
   method run();

      /*declare char(7) _week_ending;*/

      if claim = 'X' then claimtypedet= 'ABC';
      else if claim = 'Y' then claimtypedet= 'DEF';

      /*_week_ending = COMPRESS(exposmth,'M');
    week_ending = to_date(substr(_week_ending,1,4) || '-' || substr(_week_ending,5,2) || '-01');*/



Method 2: Split the data using rsubmit and append

The below code makes use of rsubmit and direct observation access to read data in chunks, then append them all together at the end. This one can work especially well if you have your data set up for Block I/O

options sascmd='!sascmd'

%let cpucount = %sysfunc(getoption(cpucount));

%macro parallel_execute(data=, out=, threads=&cpucount);

    /* Get total obs from data */
    %let dsid = %sysfunc(open(&data.));
    %let n    = %sysfunc(attrn(&dsid., nlobs));
    %let rc   = %sysfunc(close(&dsid.));

    /* Run &threads rsubmit sessions */
    %do i = 1 %to &threads;

        /* Determine the records that each worker will read */
        %let firstobs = %sysevalf(&n.-(&n./&threads.)*(&threads.-&i+1)+1, floor);
        %let lastobs  = %sysevalf(&n.-(&n./&threads.)*(&threads.-&i.), floor);

        /* Get this session's work directory */
        %let workdir = %sysfunc(getoption(work));

        /* Send all macro variables to the remote session, and simultaneously start the remote session */
        %syslput _USER_ / remote=worker&i.;

        /* Check for an input libname */
        %if(%scan(&data., 2, .) NE) %then %do;
            %let inlib = %scan(&data., 1, .);
            %let indsn = %scan(&data., 2, .);
            %else %do;
                %let inlib = workdir;
                %let indsn = &data.;

        /* Check for an output libname */
        %if(%scan(&out., 2, .) NE) %then %do;
            %let outlib = %scan(&out., 1, .);
            %let outdsn = %scan(&out., 2, .);
            %else %do;
                %let outlib = workdir;
                %let outdsn = &out.;

        /* Work library location of this session to be inherited by the parallel session */
        %let workdir = %sysfunc(getoption(work));

        /* Sign on to a remote session and send over all user-made macro variables */
        %syslput _USER_ / remote=worker&i.;

        /* Run code on remote session &i */
        rsubmit remote=worker&i. inheritlib=(&inlib.);

             libname workdir "&workdir.";

             data workdir._&outdsn._&i.;
                 set &inlib..&indsn.(firstobs=&firstobs. obs=&lastobs.);
/*               <PUT CODE HERE>;*/


    /* Wait for everything to complete */
    waitfor _ALL_;

    /* Append all of the chunks together */
    proc datasets nolist;
        delete &out.;

        %do i = 1 %to &threads.;
            append base=&out.

/* Optional: remove all temporary data */
/*      delete _&outdsn._:;*/

    libname workdir clear;

You can test its functionality with the below code:

data pricedata;
    set sashelp.pricedata;

%parallel_execute(data=pricedata, out=test, threads=3);

If you look at the temporary files in your WORK directory, you'll see that it evenly split up the dataset among the 3 parallel processes, and that it adds up to the original total.

_test_1 = 340
_test_2 = 340
_test_3 = 340
TOTAL   = 1020

pricedata = 1020

