1
0
mirror of https://github.com/sasjs/core.git synced 2025-12-11 06:24:35 +00:00

Merge pull request #168 from sasjs/loadformat

feat: new mp_loadformat macro
This commit is contained in:
Allan Bowe
2022-02-10 15:46:20 +02:00
committed by GitHub
10 changed files with 516 additions and 29 deletions

View File

@@ -22,6 +22,12 @@
)/*/STORE SOURCE*/;
%local a b c;
%if &switch.NONE=NONE %then %do;
%if %symexist(sasjsprocessmode) %then %do;
%if &sasjsprocessmode=Stored Program %then %do;
SASJS
%return;
%end;
%end;
%if %symexist(sysprocessmode) %then %do;
%if "&sysprocessmode"="SAS Object Server"
or "&sysprocessmode"= "SAS Compute Server" %then %do;

View File

@@ -98,7 +98,7 @@
create table _data_ as
select name,offset,value
from dictionary.macros
where scope="&scope" and name not in (%mf_getquotedstr(&ilist))
where scope="&scope" and upcase(name) not in (%mf_getquotedstr(&ilist))
order by name,offset;
%let ds=&syslast;

303
base/mp_loadformat.sas Normal file
View File

@@ -0,0 +1,303 @@
/**
@file
@brief Loads a format catalog from a staging dataset
@details When loading staged data, it is common to receive only the records
that have actually changed. However, when loading a format catalog, if
records are missing they are presumed to be no longer required.
This macro will augment a staging dataset with other records from the same
format, to prevent loss of data - UNLESS the input dataset contains a marker
column, specifying that a particular row needs to be deleted (`delete_col=`).
This macro can also be used to identify which records would be (or were)
considered new, modified or deleted (`loadtarget=`) by creating the following
tables:
@li work.outds_add
@li work.outds_del
@li work.outds_mod
For example usage, see mp_loadformat.test.sas
@param [in] libcat The format catalog to be loaded
@param [in] libds The staging table to load
@param [in] loadtarget= (NO) Set to YES to actually load the target catalog
@param [in] delete_col= (_____DELETE__THIS__RECORD_____) The column used to
mark a record for deletion. Values should be "Yes" or "No".
@param [out] auditlibds= (0) For change tracking, set to the libds of an audit
table as defined in mddl_dc_difftable.sas
@param [in] locklibds= (0) For multi-user (parallel) situations, set to the
libds of the DC lock table as defined in the mddl_dc_locktable.sas macro.
@param [out] outds_add= (0) Set a libds here to see the new records added
@param [out] outds_del= (0) Set a libds here to see the records deleted
@param [out] outds_mod= (0) Set a libds here to see the modified records
@param [in] mdebug= (0) Set to 1 to enable DEBUG messages and preserve outputs
<h4> SAS Macros </h4>
@li mddl_sas_cntlout.sas
@li mf_getuniquename.sas
@li mf_nobs.sas
@li mp_abort.sas
@li mp_lockanytable.sas
<h4> Related Macros </h4>
@li mddl_dc_difftable.sas
@li mddl_dc_locktable.sas
@li mp_loadformat.test.sas
@li mp_lockanytable.sas
@li mp_storediffs.sas
@li mp_stackdiffs.sas
@version 9.2
@author Allan Bowe
**/
%macro mp_loadformat(libcat,libds
,loadtarget=NO
,auditlibds=0
,locklibds=0
,delete_col=_____DELETE__THIS__RECORD_____
,outds_add=0
,outds_del=0
,outds_mod=0
,mdebug=0
);
/* set up local macro variables and temporary tables (with a prefix) */
%local err msg prefix dslist i var fmtlist ibufsize;
%let dslist=base base_fmts template inlibds ds1 stagedata storediffs;
%if &outds_add=0 %then %let dslist=&dslist outds_add;
%if &outds_del=0 %then %let dslist=&dslist outds_del;
%if &outds_mod=0 %then %let dslist=&dslist outds_mod;
%let prefix=%substr(%mf_getuniquename(),1,22);
%do i=1 %to %sysfunc(countw(&dslist));
%let var=%scan(&dslist,&i);
%local &var;
%let &var=%upcase(&prefix._&var);
%end;
/* perform input validations */
%let err=0;
%let msg=0;
data _null_;
if _n_=1 then putlog "&sysmacroname entry vars:";
set sashelp.vmacro;
where scope="&sysmacroname";
value=upcase(value);
if &mdebug=0 then put name '=' value;
if name=:'LOAD' and value not in ('YES','NO') then do;
call symputx('msg',"invalid value for "!!name!!":"!!value);
call symputx('err',1);
stop;
end;
else if name='LIBCAT' then do;
if exist(value,'CATALOG') le 0 then do;
call symputx('msg',"Unable to open catalog: "!!value);
call symputx('err',1);
stop;
end;
end;
else if name='LIBDS' then do;
if exist(value) le 0 then do;
call symputx('msg',"Unable to open staging table: "!!value);
call symputx('err',1);
stop;
end;
end;
else if (name=:'OUTDS' or name in ('DELETE_COL','LOCKLIBDS','AUDITLIBDS'))
and missing(value) then do;
call symputx('msg',"missing value in var: "!!name);
call symputx('err',1);
stop;
end;
run;
%mp_abort(
iftrue=(&err ne 0)
,mac=&sysmacroname
,msg=%str(&msg)
)
/**
* First, extract only relevant formats from the catalog
*/
proc sql noprint;
select distinct fmtname into: fmtlist separated by ' ' from &libds;
proc format lib=&libcat cntlout=&base;
select
/* send formats individually to avoid line truncation in the input stack */
%do i=1 %to %sysfunc(countw(&fmtlist));
%scan(&fmtlist,&i,%str( ))
%end;
;
run;
proc sort data=&base;
by fmtname start;
run;
/**
* Ensure input table and base_formats have consistent lengths and types
*/
%mddl_sas_cntlout(libds=&template)
data &inlibds;
if 0 then set &template;
set &libds;
if missing(type) then do;
if substr(fmtname,1,1)='$' then type='C';
else type='N';
end;
if type='N' then start=put(input(start,best.),best16.);
run;
data &base_fmts;
if 0 then set &template;
set &base;
run;
/*
format values can be up to 32767 wide. SQL joins on such a wide column can
cause buffer issues. Update ibufsize and reset at the end.
*/
%let ibufsize=%sysfunc(getoption(ibufsize));
options ibufsize=32767 ;
/**
* Identify new records
*/
proc sql;
create table &outds_add(drop=&delete_col) as
select a.*
from &inlibds a
left join &base_fmts b
on a.fmtname=b.fmtname
and a.start=b.start
where b.fmtname is null
and upcase(a.&delete_col) ne "YES"
order by fmtname, start;;
/**
* Identify deleted records
*/
create table &outds_del(drop=&delete_col) as
select a.*
from &inlibds a
inner join &base_fmts b
on a.fmtname=b.fmtname
and a.start=b.start
where upcase(a.&delete_col)="YES"
order by fmtname, start;
/**
* Identify modified records
*/
create table &outds_mod (drop=&delete_col) as
select a.*
from &inlibds a
inner join &base_fmts b
on a.fmtname=b.fmtname
and a.start=b.start
where upcase(a.&delete_col) ne "YES"
order by fmtname, start;
options ibufsize=&ibufsize;
%mp_abort(
iftrue=(&syscc ne 0)
,mac=&sysmacroname
,msg=%str(SYSCC=&syscc prior to load prep)
)
%if &loadtarget=YES %then %do;
data &ds1;
merge &base_fmts(in=base)
&outds_mod(in=mod)
&outds_add(in=add)
&outds_del(in=del);
if not del and not mod;
by fmtname start;
run;
data &stagedata;
set &ds1 &outds_mod;
run;
proc sort;
by fmtname start;
run;
%end;
/* mp abort needs to run outside of conditional blocks */
%mp_abort(
iftrue=(&syscc ne 0)
,mac=&sysmacroname
,msg=%str(SYSCC=&syscc prior to actual load)
)
%if &loadtarget=YES %then %do;
%if %mf_nobs(&stagedata)=0 %then %do;
%put There are no changes to load in &libcat!;
%return;
%end;
%if &locklibds ne 0 %then %do;
/* prevent parallel updates */
%mp_lockanytable(LOCK,
lib=%scan(&libcat,1,.)
,ds=%scan(&libcat,2,.)
,ref=MP_LOADFORMAT commencing format load
,ctl_ds=&locklibds
)
%end;
/* do the actual load */
proc format lib=&libcat cntlin=&stagedata;
run;
%if &locklibds ne 0 %then %do;
/* unlock the table */
%mp_lockanytable(UNLOCK
lib=%scan(&libcat,1,.)
,ds=%scan(&libcat,2,.)
,ref=MP_LOADFORMAT completed format load
,ctl_ds=&locklibds
)
%end;
/* track the changes */
%if &auditlibds ne 0 %then %do;
%if &locklibds ne 0 %then %do;
%mp_lockanytable(LOCK,
lib=%scan(&auditlibds,1,.)
,ds=%scan(&auditlibds,2,.)
,ref=MP_LOADFORMAT commencing audit table load
,ctl_ds=&locklibds
)
%end;
%mp_storediffs(&libcat
,&stageds
,FMTNAME START
,delds=&outds_del
,modds=&outds_mod
,appds=&outds_add
,outds=&storediffs
,mdebug=&mdebug
)
%if &locklibds ne 0 %then %do;
%mp_lockanytable(UNLOCK
lib=%scan(&auditlibds,1,.)
,ds=%scan(&auditlibds,2,.)
,ref=MP_LOADFORMAT commencing audit table load
,ctl_ds=&locklibds
)
%end;
%end;
%end;
%mp_abort(
iftrue=(&syscc ne 0)
,mac=&sysmacroname
,msg=%str(SYSCC=&syscc after load)
)
%if &mdebug=0 %then %do;
proc datasets lib=work;
delete &prefix:;
run;
%put &sysmacroname exit vars:;
%put _local_;
%end;
%mend mp_loadformat;

58
base/mp_md5.sas Normal file
View File

@@ -0,0 +1,58 @@
/**
@file
@brief Generates an md5 expression for hashing a set of variables
@details This is the same algorithm used to hash records in
[Data Controller for SAS](https://datacontroller.io) (free for up
to 5 users).
It is not designed to be efficient - it is designed to be effective,
given the range of edge cases (large floating points, special missing
numerics, thousands of columns, very wide columns).
It can be used only in data step, eg as follows:
data _null_;
set sashelp.class;
hashvar=%mp_md5(cvars=name sex, nvars=age height weight);
put hashvar=;
run;
Unfortunately it will not run in SQL - it fails with the following message:
> The width value for HEX is out of bounds. It should be between 1 and 16
The macro will also cause errors if the data contains (non-special) missings
and the (undocumented) `options dsoptions=nonote2err;` is in effect.
This can be avoided in two ways:
@li Global option: `options dsoptions=nonote2err;`
@li Data step option: `data YOURLIB.YOURDATASET /nonote2err;`
@param cvars= Space seperated list of character variables
@param nvars= Space seperated list of numeric variables
<h4> Related Programs </h4>
@li mp_init.sas
@version 9.2
@author Allan Bowe
**/
%macro mp_md5(cvars=,nvars=);
%local i var sep;
put(md5(
%do i=1 %to %sysfunc(countw(&cvars));
%let var=%scan(&cvars,&i,%str( ));
&sep put(md5(trim(&var)),$hex32.)
%let sep=!!;
%end;
%do i=1 %to %sysfunc(countw(&nvars));
%let var=%scan(&nvars,&i,%str( ));
/* multiply by 1 to strip precision errors (eg 0 != 0) */
/* but ONLY if not missing, else will lose any special missing values */
&sep put(md5(trim(put(ifn(missing(&var),&var,&var*1),binary64.))),$hex32.)
%let sep=!!;
%end;
),hex32.)
%mend mp_md5;

View File

@@ -55,7 +55,7 @@ data _null_;
run;
%if &contentype=CSV %then %do;
%if &platform=SASMETA and &streamweb=1 %then %do;
%if (&platform=SASMETA and &streamweb=1) or &platform=SASJS %then %do;
data _null_;
rc=stpsrv_header('Content-type','application/csv');
rc=stpsrv_header('Content-disposition',"attachment; filename=&outname");
@@ -69,7 +69,7 @@ run;
%end;
%else %if &contentype=EXCEL %then %do;
/* suitable for XLS format */
%if &platform=SASMETA and &streamweb=1 %then %do;
%if (&platform=SASMETA and &streamweb=1) or &platform=SASJS %then %do;
data _null_;
rc=stpsrv_header('Content-type','application/vnd.ms-excel');
rc=stpsrv_header('Content-disposition',"attachment; filename=&outname");
@@ -82,7 +82,7 @@ run;
%end;
%end;
%else %if &contentype=GIF or &contentype=JPEG or &contentype=PNG %then %do;
%if &platform=SASMETA and &streamweb=1 %then %do;
%if (&platform=SASMETA and &streamweb=1) or &platform=SASJS %then %do;
data _null_;
rc=stpsrv_header('Content-type',"image/%lowcase(&contenttype)");
run;
@@ -99,7 +99,7 @@ run;
%end;
%end;
%else %if &contentype=TEXT %then %do;
%if &platform=SASMETA and &streamweb=1 %then %do;
%if (&platform=SASMETA and &streamweb=1) or &platform=SASJS %then %do;
data _null_;
rc=stpsrv_header('Content-type','application/text');
rc=stpsrv_header('Content-disposition',"attachment; filename=&outname");
@@ -112,7 +112,7 @@ run;
%end;
%end;
%else %if &contentype=WOFF or &contentype=WOFF2 or &contentype=TTF %then %do;
%if &platform=SASMETA and &streamweb=1 %then %do;
%if (&platform=SASMETA and &streamweb=1) or &platform=SASJS %then %do;
data _null_;
rc=stpsrv_header('Content-type',"font/%lowcase(&contenttype)");
run;
@@ -123,7 +123,7 @@ run;
%end;
%end;
%else %if &contentype=XLSX %then %do;
%if &platform=SASMETA and &streamweb=1 %then %do;
%if (&platform=SASMETA and &streamweb=1) or &platform=SASJS %then %do;
data _null_;
rc=stpsrv_header('Content-type',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
@@ -138,7 +138,7 @@ run;
%end;
%end;
%else %if &contentype=ZIP %then %do;
%if &platform=SASMETA and &streamweb=1 %then %do;
%if (&platform=SASMETA and &streamweb=1) or &platform=SASJS %then %do;
data _null_;
rc=stpsrv_header('Content-type','application/zip');
rc=stpsrv_header('Content-disposition',"attachment; filename=&outname");

View File

@@ -4,9 +4,7 @@
@details Loops with a `sleep()` command until a file arrives or the max wait
period expires.
@example
Wait 3 minutes OR for /tmp/flag.txt to appear
Example: Wait 3 minutes OR for /tmp/flag.txt to appear
%mp_wait4file(/tmp/flag.txt , maxwait=60*3)

View File

@@ -12,25 +12,30 @@
proc sql;
create table &libds(
FMTNAME char(32) label='Format name'
,START char(16) label='Starting value for format'
,END char(16) label='Ending value for format'
,LABEL char(256) label='Format value label'
FMTNAME char(32) label='Format name'
/*
to accomodate larger START values, mp_loadformat.sas will need the
SQL dependency removed (proc sql needs to accomodate 3 index values in
a 32767 ibufsize limit)
*/
,START char(10000) label='Starting value for format'
,END char(32767) label='Ending value for format'
,LABEL char(32767) label='Format value label'
,MIN num length=3 label='Minimum length'
,MAX num length=3 label='Maximum length'
,DEFAULT num length=3 label='Default length'
,LENGTH num length=3 label='Format length'
,FUZZ num label='Fuzz value'
,PREFIX char(2) label='Prefix characters'
,MULT num label='Multiplier'
,FILL char(1) label='Fill character'
,NOEDIT num length=3 label='Is picture string noedit?'
,TYPE char(1) label='Type of format'
,SEXCL char(1) label='Start exclusion'
,EEXCL char(1) label='End exclusion'
,HLO char(13) label='Additional information'
,DECSEP char(1) label='Decimal separator'
,DIG3SEP char(1) label='Three-digit separator'
,DEFAULT num length=3 label='Default length'
,LENGTH num length=3 label='Format length'
,FUZZ num label='Fuzz value'
,PREFIX char(2) label='Prefix characters'
,MULT num label='Multiplier'
,FILL char(1) label='Fill character'
,NOEDIT num length=3 label='Is picture string noedit?'
,TYPE char(1) label='Type of format'
,SEXCL char(1) label='Start exclusion'
,EEXCL char(1) label='End exclusion'
,HLO char(13) label='Additional information'
,DECSEP char(1) label='Decimal separator'
,DIG3SEP char(1) label='Three-digit separator'
,DATATYPE char(8) label='Date/time/datetime?'
,LANGUAGE char(8) label='Language for date strings'
);

View File

@@ -0,0 +1,76 @@
/**
@file
@brief Testing mp_loadformat.sas macro
<h4> SAS Macros </h4>
@li mp_loadformat.sas
@li mp_assert.sas
@li mp_assertscope.sas
**/
/* prep format catalog */
libname perm (work);
data work.loadfmts;
length fmtname $32;
eexcl='Y';
type='N';
do i=1 to 100;
fmtname=cats('SASJS_',i,'X');
do j=1 to 100;
start=cats(j);
end=cats(j+1);
label= cats('Dummy ',start);
output;
end;
end;
run;
proc format cntlin=work.loadfmts library=perm.testcat;
run;
/* make some test data */
data work.stagedata;
set work.loadfmts;
type='N';
eexcl='Y';
if _n_<150 then deleteme='Yes';
else if _n_<250 then label='mod'!!cats(_n_);
else if _n_<350 then do;
start=cats(_n_);
end=cats(_n_+1);
label='newval'!!cats(_N_);
end;
else stop;
run;
/* load the above */
%mp_assertscope(SNAPSHOT)
%mp_loadformat(perm.testcat
,work.stagedata
,loadtarget=YES
,auditlibds=0
,locklibds=0
,delete_col=deleteme
,outds_add=add_test1
,outds_del=del_test1
,outds_mod=mod_test1
,mdebug=1
)
%mp_assertscope(COMPARE)
%mp_assert(
iftrue=(%mf_nobs(del_test1)=149),
desc=Test 1 - delete obs,
outds=work.test_results
)
%mp_assert(
iftrue=(%mf_nobs(add_test1)=100),
desc=Test 1 - add obs,
outds=work.test_results
)
%mp_assert(
iftrue=(%mf_nobs(mod_test1)=100),
desc=Test 1 - mod obs,
outds=work.test_results
)

View File

@@ -0,0 +1,41 @@
/**
@file
@brief Testing mp_md5.sas macro
<h4> SAS Macros </h4>
@li mp_md5.sas
@li mp_assert.sas
@li mp_assertscope.sas
**/
%global hash1 hash2 hash3;
%mp_assertscope(SNAPSHOT)
data work.test1 /nonote2err;
c1='';
c2=repeat('x',32767);
c3=' f';
n1=.a;
n2=.;
n3=1.0000000001;
hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3);
call symputx('hash1',hash);
n1=.b;
hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3);
call symputx('hash2',hash);
c3='f';
hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3);
call symputx('hash3',hash);
run;
%mp_assertscope(COMPARE,ignorelist=HASH1 HASH2 HASH3)
%mp_assert(
iftrue=("&hash1" ne "&hash2"),
desc=Checking first hash diff,
outds=work.test_results
)
%mp_assert(
iftrue=("&hash2" ne "&hash3"),
desc=Checking first hash diff,
outds=work.test_results
)

View File

@@ -19,7 +19,7 @@
%macro loglevel();
%if "&_debug"="2477" or "&_debug"="fields,log,trace" %then %do;
%put debug mode activated;
options mprint;
options mprint mprintnest;
%end;
%mend loglevel;