mirror of
https://github.com/sasjs/core.git
synced 2025-12-11 06:24:35 +00:00
feat: mp_csv2ds macro for importing a CSV using a SAS table to provide a template (eg for lengths / types etc)
This commit is contained in:
107
all.sas
107
all.sas
@@ -1904,6 +1904,113 @@ Usage:
|
||||
|
||||
%mend;
|
||||
/**
|
||||
@file mp_csv2ds.sas
|
||||
@brief Efficient import of arbitrary CSV using a dataset as template
|
||||
@details Used to import relevant columns from a large CSV using
|
||||
a dataset to provide the types and lengths. Assumes that a header
|
||||
row is provided, and datarows start on line 2. Extra columns in
|
||||
both the CSV and base dataset are ignored.
|
||||
|
||||
Usage:
|
||||
|
||||
filename mycsv temp;
|
||||
data _null_;
|
||||
file mycsv;
|
||||
put 'name,age,nickname';
|
||||
put 'John,48,Jonny';
|
||||
put 'Jennifer,23,Jen';
|
||||
run;
|
||||
|
||||
%mp_csv2ds(inref=mycsv,outds=myds,baseds=sashelp.class)
|
||||
|
||||
|
||||
@param inref= fileref to the CSV
|
||||
@param outds= output ds. Could also be a view (eg `outds=myds/view=myds`)
|
||||
@param baseds= Template dataset on which to create the input statement.
|
||||
Is used to determine types, lengths, and any informats.
|
||||
|
||||
@version 9.2
|
||||
@author Allan Bowe
|
||||
**/
|
||||
|
||||
%macro mp_csv2ds(inref=0,outds=0,baseds=0);
|
||||
%if &inref=0 %then %do;
|
||||
%put %str(ERR)OR: the INREF variable must be provided;
|
||||
%let syscc=4;
|
||||
%abort;
|
||||
%end;
|
||||
%if &outds=0 %then %do;
|
||||
%put %str(ERR)OR: the OUTDS variable must be provided;
|
||||
%let syscc=4;
|
||||
%return;
|
||||
%end;
|
||||
%if &baseds=0 %then %do;
|
||||
%put %str(ERR)OR: the BASEDS variable must be provided;
|
||||
%let syscc=4;
|
||||
%return;
|
||||
%end;
|
||||
|
||||
/* get the variables in the CSV */
|
||||
data _data_;
|
||||
infile &inref;
|
||||
input;
|
||||
length name $32;
|
||||
do i=1 to countc(_infile_,',')+1;
|
||||
name=upcase(scan(_infile_,i,','));
|
||||
output;
|
||||
end;
|
||||
stop;
|
||||
run;
|
||||
%local csv_vars;%let csv_vars=&syslast;
|
||||
|
||||
/* get the variables in the dataset */
|
||||
proc contents noprint data=&baseds
|
||||
out=_data_ (keep=name type length format: informat);
|
||||
run;
|
||||
%local base_vars; %let base_vars=&syslast;
|
||||
|
||||
proc sql undo_policy=none;
|
||||
create table &csv_vars as
|
||||
select a.*
|
||||
,b.type
|
||||
,b.length
|
||||
,b.format
|
||||
,b.formatd
|
||||
,b.formatl
|
||||
,b.informat
|
||||
from &csv_vars a
|
||||
left join &base_vars b
|
||||
on a.name=upcase(b.name)
|
||||
order by i;
|
||||
|
||||
/* prepare the input statement */
|
||||
%local instat dropvars;
|
||||
data _null_;
|
||||
set &syslast end=last;
|
||||
length in dropvars $32767;
|
||||
retain in dropvars;
|
||||
if missing(type) then do;
|
||||
informat='$1.';
|
||||
dropvars=catx(' ',dropvars,name);
|
||||
end;
|
||||
else if missing(informat) then do;
|
||||
if type=1 then informat='best.';
|
||||
else informat=cats('$',length,'.');
|
||||
end;
|
||||
in=catx(' ',in,name,':',informat);
|
||||
if last then do;
|
||||
call symputx('instat',in,'l');
|
||||
call symputx('dropvars',dropvars,'l');
|
||||
end;
|
||||
run;
|
||||
|
||||
data &outds;
|
||||
infile &inref dsd firstobs=2;
|
||||
input &instat;
|
||||
drop &dropvars;
|
||||
run;
|
||||
|
||||
%mend;/**
|
||||
@file mp_deleteconstraints.sas
|
||||
@brief Delete constraionts
|
||||
@details Takes the output from mp_getconstraints.sas as input
|
||||
|
||||
109
base/mp_csv2ds.sas
Normal file
109
base/mp_csv2ds.sas
Normal file
@@ -0,0 +1,109 @@
|
||||
/**
|
||||
@file mp_csv2ds.sas
|
||||
@brief Efficient import of arbitrary CSV using a dataset as template
|
||||
@details Used to import relevant columns from a large CSV using
|
||||
a dataset to provide the types and lengths. Assumes that a header
|
||||
row is provided, and datarows start on line 2. Extra columns in
|
||||
both the CSV and base dataset are ignored.
|
||||
|
||||
Usage:
|
||||
|
||||
filename mycsv temp;
|
||||
data _null_;
|
||||
file mycsv;
|
||||
put 'name,age,nickname';
|
||||
put 'John,48,Jonny';
|
||||
put 'Jennifer,23,Jen';
|
||||
run;
|
||||
|
||||
%mp_csv2ds(inref=mycsv,outds=myds,baseds=sashelp.class)
|
||||
|
||||
|
||||
@param inref= fileref to the CSV
|
||||
@param outds= output ds. Could also be a view (eg `outds=myds/view=myds`)
|
||||
@param baseds= Template dataset on which to create the input statement.
|
||||
Is used to determine types, lengths, and any informats.
|
||||
|
||||
@version 9.2
|
||||
@author Allan Bowe
|
||||
**/
|
||||
|
||||
%macro mp_csv2ds(inref=0,outds=0,baseds=0);
|
||||
%if &inref=0 %then %do;
|
||||
%put %str(ERR)OR: the INREF variable must be provided;
|
||||
%let syscc=4;
|
||||
%abort;
|
||||
%end;
|
||||
%if &outds=0 %then %do;
|
||||
%put %str(ERR)OR: the OUTDS variable must be provided;
|
||||
%let syscc=4;
|
||||
%return;
|
||||
%end;
|
||||
%if &baseds=0 %then %do;
|
||||
%put %str(ERR)OR: the BASEDS variable must be provided;
|
||||
%let syscc=4;
|
||||
%return;
|
||||
%end;
|
||||
|
||||
/* get the variables in the CSV */
|
||||
data _data_;
|
||||
infile &inref;
|
||||
input;
|
||||
length name $32;
|
||||
do i=1 to countc(_infile_,',')+1;
|
||||
name=upcase(scan(_infile_,i,','));
|
||||
output;
|
||||
end;
|
||||
stop;
|
||||
run;
|
||||
%local csv_vars;%let csv_vars=&syslast;
|
||||
|
||||
/* get the variables in the dataset */
|
||||
proc contents noprint data=&baseds
|
||||
out=_data_ (keep=name type length format: informat);
|
||||
run;
|
||||
%local base_vars; %let base_vars=&syslast;
|
||||
|
||||
proc sql undo_policy=none;
|
||||
create table &csv_vars as
|
||||
select a.*
|
||||
,b.type
|
||||
,b.length
|
||||
,b.format
|
||||
,b.formatd
|
||||
,b.formatl
|
||||
,b.informat
|
||||
from &csv_vars a
|
||||
left join &base_vars b
|
||||
on a.name=upcase(b.name)
|
||||
order by i;
|
||||
|
||||
/* prepare the input statement */
|
||||
%local instat dropvars;
|
||||
data _null_;
|
||||
set &syslast end=last;
|
||||
length in dropvars $32767;
|
||||
retain in dropvars;
|
||||
if missing(type) then do;
|
||||
informat='$1.';
|
||||
dropvars=catx(' ',dropvars,name);
|
||||
end;
|
||||
else if missing(informat) then do;
|
||||
if type=1 then informat='best.';
|
||||
else informat=cats('$',length,'.');
|
||||
end;
|
||||
in=catx(' ',in,name,':',informat);
|
||||
if last then do;
|
||||
call symputx('instat',in,'l');
|
||||
call symputx('dropvars',dropvars,'l');
|
||||
end;
|
||||
run;
|
||||
|
||||
/* import the CSV */
|
||||
data &outds;
|
||||
infile &inref dsd firstobs=2;
|
||||
input &instat;
|
||||
drop &dropvars;
|
||||
run;
|
||||
|
||||
%mend;
|
||||
Reference in New Issue
Block a user