1
0
mirror of https://github.com/sasjs/core.git synced 2025-12-10 14:04:36 +00:00
Files
core/base/mp_csv2ds.sas

140 lines
3.2 KiB
SAS

/**
@file mp_csv2ds.sas
@brief Efficient import of arbitrary CSV using a dataset as template
@details Used to import relevant columns from a large CSV using
a dataset to provide the types and lengths. Assumes that a header
row is provided, and datarows start on line 2. Extra columns in
both the CSV and base dataset are ignored.
Usage:
filename mycsv temp;
data _null_;
file mycsv;
put 'name,age,nickname';
put 'John,48,Jonny';
put 'Jennifer,23,Jen';
run;
%mp_csv2ds(inref=mycsv,outds=myds,baseds=sashelp.class)
@param inref= (0) Fileref to the CSV
@param outds= (0) Output ds (lib.ds format)
@param view= (NO) Set to YES or NO to determine whether the output should be
a view or not. Default is NO (not a view).
@param baseds= (0) Template dataset on which to create the input statement.
Is used to determine types, lengths, and any informats.
@version 9.2
@author Allan Bowe
<h4> SAS Macros </h4>
@li mp_abort.sas
@li mf_existds.sas
**/
%macro mp_csv2ds(inref=0,outds=0,baseds=0,view=NO);
%mp_abort(iftrue=( &inref=0 )
,mac=&sysmacroname
,msg=%str(the INREF variable must be provided)
)
%mp_abort(iftrue=( %superq(outds)=0 )
,mac=&sysmacroname
,msg=%str(the OUTDS variable must be provided)
)
%mp_abort(iftrue=( &baseds=0 )
,mac=&sysmacroname
,msg=%str(the BASEDS variable must be provided)
)
%mp_abort(iftrue=( %mf_existds(&baseds)=0 )
,mac=&sysmacroname
,msg=%str(the BASEDS dataset (&baseds) needs to be assigned, and to exist)
)
/* count rows */
%local hasheader; %let hasheader=0;
data _null_;
if _N_ > 1 then do;
call symputx('hasheader',1,'l');
stop;
end;
infile &inref;
input;
run;
%mp_abort(iftrue=( &hasheader=0 )
,mac=&sysmacroname
,msg=%str(No header row in &inref)
)
/* get the variables in the CSV */
data _data_;
infile &inref;
input;
length name $32;
do i=1 to countc(_infile_,',')+1;
name=upcase(scan(_infile_,i,','));
output;
end;
stop;
run;
%local csv_vars;%let csv_vars=&syslast;
/* get the variables in the dataset */
proc contents noprint data=&baseds
out=_data_ (keep=name type length format: informat);
run;
%local base_vars; %let base_vars=&syslast;
proc sql undo_policy=none;
create table &csv_vars as
select a.*
,b.type
,b.length
,b.format
,b.formatd
,b.formatl
,b.informat
from &csv_vars a
left join &base_vars b
on a.name=upcase(b.name)
order by i;
/* prepare the input statement */
%local instat dropvars;
data _null_;
set &syslast end=last;
length in dropvars $32767;
retain in dropvars;
if missing(type) then do;
informat='$1.';
dropvars=catx(' ',dropvars,name);
end;
else if missing(informat) then do;
if type=1 then informat='best.';
else informat=cats('$',length,'.');
end;
else informat=cats(informat,'.');
in=catx(' ',in,name,':',informat);
if last then do;
call symputx('instat',in,'l');
call symputx('dropvars',dropvars,'l');
end;
run;
/* import the CSV */
data &outds
%if %upcase(&view)=YES %then %do;
/view=&outds
%end;
;
infile &inref dsd firstobs=2;
input &instat;
%if %length(&dropvars)>0 %then %do;
drop &dropvars;
%end;
run;
%mend mp_csv2ds;