Reputation: 11
I would like to create one dataset within SAS by importing multiple excel files. The excel files have the same variables within and are saved in the same directory, the files all have consistent names:
excel_20150101 excel_20150201
How would I write this in SAS?
Upvotes: 1
Views: 4811
Reputation: 21264
So this is going to be interesting. When you import data from Excel the types are not controlled and you have little control. So I'm 99% certain you'll run into an issue combining data because the types will not align. I would recommend converting all your files to CSV (via batch script) and then importing all the CSV's at once.
Here's a macro I wrote that imports all:
%*Creates a list of all files in the DIR directory with the specified extension (EXT);
%macro list_files(dir,ext);
%local filrf rc did memcnt name i;
%let rc=%sysfunc(filename(filrf,&dir));
%let did=%sysfunc(dopen(&filrf));
%if &did eq 0 %then
%do;
%put Directory &dir cannot be open or does not exist;
%return;
%end;
%do i = 1 %to %sysfunc(dnum(&did));
%let name=%qsysfunc(dread(&did,&i));
%if %qupcase(%qscan(&name,-1,.)) = %upcase(&ext) %then
%do;
%put &dir\&name;
%let file_name = %qscan(&name,1,.);
%put &file_name;
data _tmp;
length dir $512 name $100;
dir=symget("dir");
name=symget("name");
path = catx('\',dir,name);
the_name = substr(name,1,find(name,'.')-1);
run;
proc append base=list data=_tmp force;
run;
quit;
proc sql;
drop table _tmp;
quit;
%end;
%else %if %qscan(&name,2,.) = %then
%do;
%list_files(&dir\&name,&ext)
%end;
%end;
%let rc=%sysfunc(dclose(&did));
%let rc=%sysfunc(filename(filrf));
%mend list_files;
%*Macro to import a single file, using the path, filename and an output dataset name must be specified;
%macro import_file(path, file_name, dataset_name );
proc import
datafile="&path.\&file_name."
dbms=xlsx
out=&dataset_name replace;
run;
%mend;
*Create the list of files, in this case all XLSX files;
%list_files(c:\_localData\temp, xlsx);
%*Call macro once for each entry in the list table created from the %list_files() macro;
data _null_;
set list;
string = catt('%import_file(', dir, ', ', name,', ', catt('test', put(_n_, z2.)), ');');
call execute (string);
run;
Instead of this however, I recommend converting all files to CSV and then importing those. https://gist.github.com/statgeek/878e585102c14e01581f55dbe972d27e
And then importing all the CSV's at once into a single file: https://blogs.sas.com/content/sasdummy/2018/10/09/read-multiple-text-files/
Upvotes: 0
Reputation: 370
here is how you could create a dataset with all the file name and their path
%let windowpath=/data/exports/Analytics/Users/psamson/Travel_project/PCA/;
data file; /*THIS PREPARE A TABLE OF TARGET FILE TO IMPORT*/
length file $200 name $28;
rc=filename("myfile","&windowpath");
DirId=dopen("myfile");
memcount=dnum(DirId);
do i = 1 to memcount;
fdir = "&windowpath./" || strip(dread(DirId,i));
rc2=filename("fdir",fdir);
fDirId=fopen("fdir");
if fDirId > 0 then do;
file = strip(dread(DirId,i));
name = substr(strip(dread(DirId,i)),1,25);
if index(file,'.xlsx') then output; /*THIS ENSURE WE ONLY IMPORT CSV file */
end;
rc2 = fclose(fDirId);
rc2= filename("fdir", "");
end;
rc = dclose(DirId);
rc = filename("myfile");
keep file name ; /*KEEP THE 2 VARIABLE TO VALIDATE AND USE FOR IMPORT*/
run;
From this list loop trough the file table to call your import like in the macro above!
Upvotes: 0
Reputation: 682
This macro will loop through all Excel files in a Directory and will import the contents in datasets named : DS1, DS2, DS3 …. DS400(If there are 400 Excel files). Pease make sure to keep only Excel(.xlsx) files in the specific directory.
options merror mlogic mprint symbolgen spool;
%macro drive(dir,ext);
%local filrf rc did memcnt name i;
/* Assigns a fileref to the directory and opens the directory */
%let rc=%sysfunc(filename(filrf,&dir));
%let did=%sysfunc(dopen(&filrf));
/* Loops through entire directory */
%do i = 1 %to %sysfunc(dnum(&did));
/* Retrieve name and import each Excel file */
%let name=%qsysfunc(dread(&did,&i));
proc import
out=DS&i
datafile= "Y:\Excel\&name"
dbms=XLSX replace;
getnames=yes;
run;
%end;
/* Closes the directory and clear the fileref */
%let rc=%sysfunc(dclose(&did));
%let rc=%sysfunc(filename(filrf));
%mend drive;
/* First parameter is the directory of where your files are stored. */
/* Second parameter is the extension you are looking for. */
%drive(Y:\Excel,xlsx);
Upvotes: 1