mirror of
https://github.com/sasjs/core.git
synced 2025-12-11 06:24:35 +00:00
254 lines
7.1 KiB
SAS
254 lines
7.1 KiB
SAS
/**
|
|
@file
|
|
@brief Create a CARDS file from a SAS dataset.
|
|
@details Uses dataset attributes to convert all data into datalines.
|
|
Running the generated file will rebuild the original dataset.
|
|
Usage:
|
|
|
|
%mp_ds2cards(base_ds=sashelp.class
|
|
, cards_file= "C:\temp\class.sas"
|
|
, maxobs=5)
|
|
|
|
TODO:
|
|
- labelling the dataset
|
|
- explicity setting a unix LF
|
|
- constraints / indexes etc
|
|
|
|
@param [in] base_ds= Should be two level - eg work.blah. This is the table
|
|
that is converted to a cards file.
|
|
@param [in] tgt_ds= Table that the generated cards file would create.
|
|
Optional - if omitted, will be same as BASE_DS.
|
|
@param [out] cards_file= Location in which to write the (.sas) cards file
|
|
@param [in] maxobs= to limit output to the first <code>maxobs</code>
|
|
observations
|
|
@param [in] showlog= whether to show generated cards file in the SAS log
|
|
(YES/NO)
|
|
@param [in] outencoding= provide encoding value for file statement (eg utf-8)
|
|
@param [in] append= If NO then will rebuild the cards file if it already
|
|
exists, otherwise will append to it. Used by the mp_lib2cards.sas macro.
|
|
|
|
|
|
@version 9.2
|
|
@author Allan Bowe
|
|
**/
|
|
|
|
%macro mp_ds2cards(base_ds=, tgt_ds=
|
|
,cards_file="%sysfunc(pathname(work))/cardgen.sas"
|
|
,maxobs=max
|
|
,random_sample=NO
|
|
,showlog=YES
|
|
,outencoding=
|
|
,append=NO
|
|
)/*/STORE SOURCE*/;
|
|
%local i setds nvars;
|
|
|
|
%if not %sysfunc(exist(&base_ds)) %then %do;
|
|
%put %str(WARN)ING: &base_ds does not exist;
|
|
%return;
|
|
%end;
|
|
|
|
%if %index(&base_ds,.)=0 %then %let base_ds=WORK.&base_ds;
|
|
%if (&tgt_ds = ) %then %let tgt_ds=&base_ds;
|
|
%if %index(&tgt_ds,.)=0 %then %let tgt_ds=WORK.%scan(&base_ds,2,.);
|
|
%if ("&outencoding" ne "") %then %let outencoding=encoding="&outencoding";
|
|
%if ("&append" = "") %then %let append=;
|
|
%else %let append=mod;
|
|
|
|
/* get varcount */
|
|
%let nvars=0;
|
|
proc sql noprint;
|
|
select count(*) into: nvars from dictionary.columns
|
|
where upcase(libname)="%scan(%upcase(&base_ds),1)"
|
|
and upcase(memname)="%scan(%upcase(&base_ds),2)";
|
|
%if &nvars=0 %then %do;
|
|
%put %str(WARN)ING: Dataset &base_ds has no variables, will not be converted.;
|
|
%return;
|
|
%end;
|
|
|
|
/* get indexes */
|
|
proc sort
|
|
data=sashelp.vindex(
|
|
where=(upcase(libname)="%scan(%upcase(&base_ds),1)"
|
|
and upcase(memname)="%scan(%upcase(&base_ds),2)")
|
|
)
|
|
out=_data_;
|
|
by indxname indxpos;
|
|
run;
|
|
|
|
%local indexes;
|
|
data _null_;
|
|
set &syslast end=last;
|
|
if _n_=1 then call symputx('indexes','(index=(','l');
|
|
by indxname indxpos;
|
|
length vars $32767 nom uni $8;
|
|
retain vars;
|
|
if first.indxname then do;
|
|
idxcnt+1;
|
|
nom='';
|
|
uni='';
|
|
vars=name;
|
|
end;
|
|
else vars=catx(' ',vars,name);
|
|
if last.indxname then do;
|
|
if nomiss='yes' then nom='/nomiss';
|
|
if unique='yes' then uni='/unique';
|
|
call symputx('indexes'
|
|
,catx(' ',symget('indexes'),indxname,'=(',vars,')',nom,uni)
|
|
,'l');
|
|
end;
|
|
if last then call symputx('indexes',cats(symget('indexes'),'))'),'l');
|
|
run;
|
|
|
|
|
|
data;run;
|
|
%let setds=&syslast;
|
|
proc sql
|
|
%if %datatyp(&maxobs)=NUMERIC %then %do;
|
|
outobs=&maxobs;
|
|
%end;
|
|
;
|
|
create table &setds as select * from &base_ds
|
|
%if &random_sample=YES %then %do;
|
|
order by ranuni(42)
|
|
%end;
|
|
;
|
|
reset outobs=max;
|
|
create table datalines1 as
|
|
select name,type,length,varnum,format,label from dictionary.columns
|
|
where upcase(libname)="%upcase(%scan(&base_ds,1))"
|
|
and upcase(memname)="%upcase(%scan(&base_ds,2))";
|
|
|
|
/**
|
|
Due to long decimals cannot use best. format
|
|
So - use bestd. format and then use character functions to strip trailing
|
|
zeros, if NOT an integer!!
|
|
resolved code = ifc(int(VARIABLE)=VARIABLE
|
|
,put(VARIABLE,best32.)
|
|
,substrn(put(VARIABLE,bestd32.),1
|
|
,findc(put(VARIABLE,bestd32.),'0','TBK')));
|
|
**/
|
|
|
|
data datalines_2;
|
|
format dataline $32000.;
|
|
set datalines1 (where=(upcase(name) not in
|
|
('PROCESSED_DTTM','VALID_FROM_DTTM','VALID_TO_DTTM')));
|
|
if type='num' then dataline=
|
|
cats('ifc(int(',name,')=',name,'
|
|
,put(',name,',best32.-l)
|
|
,substrn(put(',name,',bestd32.-l),1
|
|
,findc(put(',name,',bestd32.-l),"0","TBK")))');
|
|
else dataline=name;
|
|
run;
|
|
|
|
proc sql noprint;
|
|
select dataline into: datalines separated by ',' from datalines_2;
|
|
|
|
%local
|
|
process_dttm_flg
|
|
valid_from_dttm_flg
|
|
valid_to_dttm_flg
|
|
;
|
|
%let process_dttm_flg = N;
|
|
%let valid_from_dttm_flg = N;
|
|
%let valid_to_dttm_flg = N;
|
|
data _null_;
|
|
set datalines1 ;
|
|
/* build attrib statement */
|
|
if type='char' then type2='$';
|
|
if strip(format) ne '' then format2=cats('format=',format);
|
|
if strip(label) ne '' then label2=cats('label=',quote(trim(label)));
|
|
str1=catx(' ',(put(name,$33.)||'length=')
|
|
,put(cats(type2,length),$7.)||format2,label2);
|
|
|
|
|
|
/* Build input statement */
|
|
if type='char' then type3=':$char.';
|
|
str2=put(name,$33.)||type3;
|
|
|
|
|
|
if(upcase(name) = "PROCESSED_DTTM") then
|
|
call symputx("process_dttm_flg", "Y", "L");
|
|
if(upcase(name) = "VALID_FROM_DTTM") then
|
|
call symputx("valid_from_dttm_flg", "Y", "L");
|
|
if(upcase(name) = "VALID_TO_DTTM") then
|
|
call symputx("valid_to_dttm_flg", "Y", "L");
|
|
|
|
|
|
call symputx(cats("attrib_stmt_", put(_N_, 8.)), str1, "L");
|
|
call symputx(cats("input_stmt_", put(_N_, 8.))
|
|
, ifc(upcase(name) not in
|
|
('PROCESSED_DTTM','VALID_FROM_DTTM','VALID_TO_DTTM'), str2, ""), "L");
|
|
run;
|
|
|
|
data _null_;
|
|
file &cards_file. &outencoding lrecl=32767 termstr=nl &append;
|
|
length __attrib $32767;
|
|
if _n_=1 then do;
|
|
put '/*******************************************************************';
|
|
put " Datalines for %upcase(%scan(&base_ds,2)) dataset ";
|
|
put " Generated by %nrstr(%%)mp_ds2cards()";
|
|
put " Available on github.com/sasjs/core";
|
|
put '********************************************************************/';
|
|
put "data &tgt_ds &indexes;";
|
|
put "attrib ";
|
|
%do i = 1 %to &nvars;
|
|
__attrib=symget("attrib_stmt_&i");
|
|
put __attrib;
|
|
%end;
|
|
put ";";
|
|
|
|
%if &process_dttm_flg. eq Y %then %do;
|
|
put 'retain PROCESSED_DTTM %sysfunc(datetime());';
|
|
%end;
|
|
%if &valid_from_dttm_flg. eq Y %then %do;
|
|
put 'retain VALID_FROM_DTTM &low_date;';
|
|
%end;
|
|
%if &valid_to_dttm_flg. eq Y %then %do;
|
|
put 'retain VALID_TO_DTTM &high_date;';
|
|
%end;
|
|
if __nobs=0 then do;
|
|
put 'call missing(of _all_);/* avoid uninitialised notes */';
|
|
put 'stop;';
|
|
put 'run;';
|
|
end;
|
|
else do;
|
|
put "infile cards dsd delimiter=',';";
|
|
put "input ";
|
|
%do i = 1 %to &nvars.;
|
|
%if(%length(&&input_stmt_&i..)) %then
|
|
put " &&input_stmt_&i..";
|
|
;
|
|
%end;
|
|
put ";";
|
|
put "datalines4;";
|
|
end;
|
|
end;
|
|
set &setds end=__lastobs nobs=__nobs;
|
|
/* remove all formats for write purposes - some have long underlying decimals */
|
|
format _numeric_ best30.29;
|
|
length __dataline $32767;
|
|
__dataline=catq('cqtmb',&datalines);
|
|
put __dataline;
|
|
if __lastobs then do;
|
|
put ';;;;';
|
|
put 'run;';
|
|
stop;
|
|
end;
|
|
run;
|
|
proc sql;
|
|
drop table &setds;
|
|
quit;
|
|
|
|
%if &showlog=YES %then %do;
|
|
data _null_;
|
|
infile &cards_file lrecl=32767;
|
|
input;
|
|
put _infile_;
|
|
run;
|
|
%end;
|
|
|
|
%put NOTE: CARDS FILE SAVED IN:;
|
|
%put NOTE-;%put NOTE-;
|
|
%put NOTE- %sysfunc(dequote(&cards_file.));
|
|
%put NOTE-;%put NOTE-;
|
|
%mend mp_ds2cards; |