From c9d8df0a4814f291eea94f28ad2e402d140c3daf Mon Sep 17 00:00:00 2001 From: munja Date: Wed, 15 Dec 2021 10:16:24 +0000 Subject: [PATCH] fix: updates from testing mp_storediffs (impact on some related macros) --- all.sas | 241 ++++++++++++++++++++++++++++++++++++++- base/mf_getvarlist.sas | 2 +- base/mp_include.sas | 1 + base/mp_lockanytable.sas | 2 +- base/mp_storediffs.sas | 45 ++++++-- 5 files changed, 279 insertions(+), 12 deletions(-) diff --git a/all.sas b/all.sas index b6be7ca..bc64229 100644 --- a/all.sas +++ b/all.sas @@ -1128,7 +1128,7 @@ https://github.com/yabwon/SAS_PACKAGES/blob/main/packages/baseplus.md#functionex %put &sysmacroname: SYSMSG= %sysfunc(sysmsg()); %let rc=%sysfunc(close(&dsid)); %end; - &outvar + %do;%unquote(&outvar)%end; %mend mf_getvarlist;/** @file @brief Returns the position of a variable in dataset (varnum attribute). @@ -6359,6 +6359,7 @@ run; /* prepare the errds */ data &errds; length msg mac $1000; + call missing(msg,mac); iftrue='1=0'; run; @@ -7004,7 +7005,7 @@ run; %else %do; data _null_; putlog 'NOTE-' / 'NOTE-'; - putlog "NOTE- &sysmacroname: Table &lib..&ds locked at "@ + putlog "NOTE- &sysmacroname: Table &lib..&ds locked at "@; putlog " %sysfunc(datetime(),datetime19.) (iteration &x)"@; putlog 'NOTE-' / 'NOTE-'; run; @@ -8085,6 +8086,242 @@ run; %mend mp_sortinplace;/** + @file + @brief Converts deletes/changes/appends into a single audit table. + @details When tracking changes to data over time, it can be helpful to have + a single base table to track ALL modifications - enabling audit trail, + data recovery, and change re-application. This macro is one of many + data management utilities used in [Data Controller for SAS]( + https:datacontroller.io) - a comprehensive data ingestion solution, which + works on any SAS platform (Viya, SAS 9, Foundation) and is free for up to 5 + users. + + NOTE - this macro does not validate the inputs. It is assumed that the + datasets containing the new / changed / deleted rows are CORRECT, contain + no additional (or missing columns), and that the originals dataset contains + all relevant base records (and no additionals). + + Usage: + + data work.orig work.deleted work.changed work.appended; + set sashelp.class; + if _n_=1 then do; + output work.orig work.deleted; + end; + else if _n_=2 then do; + output work.orig; + age=99; + output work.changed; + end; + else do; + name='Newbie'; + output work.appended; + stop; + end; + run; + + %mp_storediffs(sashelp.class,work.orig,NAME + ,delds=work.deleted + ,modds=work.changed + ,appds=work.appended + ,outds=work.final + ,mdebug=1 + ) + + @param [in] libds Target table against which the changes were applied + @param [in] origds Dataset with original (unchanged) records. Can be empty if + only appending. + @param [in] key Space seperated list of key variables + @param [in] delds= (0) Dataset with deleted records + @param [in] appds= (0) Dataset with appended records + @param [in] modds= (0) Dataset with modified records + @param [out] outds= (work.mp_storediffs) Output table containing stored data. + Has the following format: + + proc sql; + create table &outds( + load_ref char(36) label='unique load reference', + processed_dttm num format=E8601DT26.6 label='Processed at timestamp', + libref char(8) label='Library Reference (8 chars)', + dsn char(32) label='Dataset Name (32 chars)', + key_hash char(32) label= + 'MD5 Hash of primary key values (pipe seperated)', + move_type char(1) label='Either (A)ppended, (D)eleted or (M)odified', + is_pk num label='Is Primary Key Field? (1/0)', + is_diff num label= + 'Did value change? (1/0/-1). Always -1 for appends and deletes.', + tgtvar_type char(1) label='Either (C)haracter or (N)umeric', + tgtvar_nm char(32) label='Target variable name (32 chars)', + oldval_num num label='Old (numeric) value', + newval_num num label='New (numeric) value', + oldval_char char(32767) label='Old (character) value', + newval_char char(32767) label='New (character) value', + constraint pk_mpe_audit + primary key(load_ref,libref,dsn,key_hash,tgtvar_nm) + ); + + @param [in] processed_dttm= (0) Provide a datetime constant in relation to + the actual load time. If not provided, current timestamp is used. + @param [in] mdebug= set to 1 to enable DEBUG messages and preserve outputs + @param [out] loadref= (0) Provide a unique key to reference the load, + otherwise a UUID will be generated. + +

SAS Macros

+ @li mf_getquotedstr.sas + @li mf_getuniquename.sas + @li mf_getvarlist.sas + + @version 9.2 + @author Allan Bowe +**/ +/** @cond */ + +%macro mp_storediffs(libds + ,origds + ,key + ,delds=0 + ,appds=0 + ,modds=0 + ,outds=work.mp_storediffs + ,loadref=0 + ,processed_dttm=0 + ,mdebug=0 +)/*/STORE SOURCE*/; +%local dbg; +%if &mdebug=1 %then %do; + %put &sysmacroname entry vars:; + %put _local_; +%end; +%else %let dbg=*; + +/* set up unique and temporary vars */ +%local ds1 ds2 ds3 ds4 hashkey inds_auto inds_keep dslist; +%let ds1=%upcase(work.%mf_getuniquename(prefix=mpsd_ds1)); +%let ds2=%upcase(work.%mf_getuniquename(prefix=mpsd_ds2)); +%let ds3=%upcase(work.%mf_getuniquename(prefix=mpsd_ds3)); +%let ds4=%upcase(work.%mf_getuniquename(prefix=mpsd_ds4)); +%let hashkey=%upcase(%mf_getuniquename(prefix=mpsd_hashkey)); +%let inds_auto=%upcase(%mf_getuniquename(prefix=mpsd_inds_auto)); +%let inds_keep=%upcase(%mf_getuniquename(prefix=mpsd_inds_keep)); + +%let dslist=&origds; +%if &delds ne 0 %then %do; + %let delds=%upcase(&delds); + %if %scan(&delds,-1,.)=&delds %then %let delds=WORK.&delds; + %let dslist=&dslist &delds; +%end; +%if &appds ne 0 %then %do; + %let appds=%upcase(&appds); + %if %scan(&appds,-1,.)=&appds %then %let appds=WORK.&appds; + %let dslist=&dslist &appds; +%end; +%if &modds ne 0 %then %do; + %let modds=%upcase(&modds); + %if %scan(&modds,-1,.)=&modds %then %let modds=WORK.&modds; + %let dslist=&dslist &modds; +%end; + +%let origds=%upcase(&origds); +%if %scan(&origds,-1,.)=&origds %then %let origds=WORK.&origds; + +%let key=%upcase(&key); + +/* hash the key and append all the tables (marking the source) */ +data &ds1; + set &dslist indsname=&inds_auto; + &hashkey=put(md5(catx('|',%mf_getquotedstr(&key,quote=N))),$hex32.); + &inds_keep=&inds_auto; +proc sort; + by &inds_keep &hashkey; +run; + +/* transpose numeric & char vars */ +proc transpose data=&ds1 + out=&ds2(rename=(&hashkey=key_hash _name_=tgtvar_nm col1=newval_num)); + by &inds_keep &hashkey; + var _numeric_; +run; +proc transpose data=&ds1 + out=&ds3( + rename=(&hashkey=key_hash _name_=tgtvar_nm col1=newval_char) + where=(tgtvar_nm not in ("&hashkey","&inds_keep")) + ); + by &inds_keep &hashkey; + var _character_; +run; +data &ds4; + length &inds_keep $41 tgtvar_nm $32; + set &ds2 &ds3 indsname=&inds_auto; + + tgtvar_nm=upcase(tgtvar_nm); + if tgtvar_nm in (%upcase(%mf_getvarlist(&libds,dlm=%str(,),quote=DOUBLE))); + + if &inds_auto="&ds2" then tgtvar_type='N'; + else if &inds_auto="&ds3" then tgtvar_type='C'; + else do; + putlog "%str(ERR)OR: unidentified vartype input!" &inds_auto; + call symputx('syscc',98); + end; + + if &inds_keep="&appds" then move_type='A'; + else if &inds_keep="&delds" then move_type='D'; + else if &inds_keep="&modds" then move_type='M'; + else if &inds_keep="&origds" then move_type='O'; + else do; + putlog "%str(ERR)OR: unidentified movetype input!" &inds_keep; + call symputx('syscc',99); + end; + tgtvar_nm=upcase(tgtvar_nm); + if tgtvar_nm in (%mf_getquotedstr(&key)) then is_pk=1; + else is_pk=0; + drop &inds_keep; +run; + +%if "&loadref"="0" %then %let loadref=%sysfunc(uuidgen()); +%if &processed_dttm=0 %then %let processed_dttm=%sysfunc(datetime()); +%let libds=%upcase(&libds); + +/* join orig vals for modified & deleted */ +proc sql; +create table &outds as + select "&loadref" as load_ref length=36 + ,&processed_dttm as processed_dttm format=E8601DT26.6 + ,"%scan(&libds,1,.)" as libref length=8 + ,"%scan(&libds,2,.)" as dsn length=32 + ,b.key_hash length=32 + ,b.move_type length=1 + ,b.tgtvar_nm length=32 + ,b.is_pk + ,case when b.move_type ne 'M' then -1 + when a.newval_num=b.newval_num and a.newval_char=b.newval_char then 0 + else 1 + end as is_diff + ,b.tgtvar_type length=1 + ,case when b.move_type='D' then b.newval_num + else a.newval_num + end as oldval_num + ,case when b.move_type='D' then . + else b.newval_num + end as newval_num + ,case when b.move_type='D' then b.newval_char + else a.newval_char + end as oldval_char length=32767 + ,case when b.move_type='D' then '' + else b.newval_char + end as newval_char length=32767 + from &ds4(where=(move_type='O')) as a + full join &ds4(where=(move_type ne 'O')) as b + on a.tgtvar_nm=b.tgtvar_nm + and a.key_hash=b.key_hash + order by move_type, key_hash,is_pk desc, tgtvar_nm; + +%if &mdebug=0 %then %do; + proc sql; + drop table &ds1, &ds2, &ds3, &ds4; +%end; + +%mend mp_storediffs; +/** @endcond *//** @file @brief Capture session start / finish times and request details @details For details, see diff --git a/base/mf_getvarlist.sas b/base/mf_getvarlist.sas index 731042e..b9625da 100755 --- a/base/mf_getvarlist.sas +++ b/base/mf_getvarlist.sas @@ -70,5 +70,5 @@ %put &sysmacroname: SYSMSG= %sysfunc(sysmsg()); %let rc=%sysfunc(close(&dsid)); %end; - &outvar + %do;%unquote(&outvar)%end; %mend mf_getvarlist; \ No newline at end of file diff --git a/base/mp_include.sas b/base/mp_include.sas index 678d9e8..4669ac4 100644 --- a/base/mp_include.sas +++ b/base/mp_include.sas @@ -88,6 +88,7 @@ run; /* prepare the errds */ data &errds; length msg mac $1000; + call missing(msg,mac); iftrue='1=0'; run; diff --git a/base/mp_lockanytable.sas b/base/mp_lockanytable.sas index ea91ca7..d25be87 100644 --- a/base/mp_lockanytable.sas +++ b/base/mp_lockanytable.sas @@ -180,7 +180,7 @@ run; %else %do; data _null_; putlog 'NOTE-' / 'NOTE-'; - putlog "NOTE- &sysmacroname: Table &lib..&ds locked at "@ + putlog "NOTE- &sysmacroname: Table &lib..&ds locked at "@; putlog " %sysfunc(datetime(),datetime19.) (iteration &x)"@; putlog 'NOTE-' / 'NOTE-'; run; diff --git a/base/mp_storediffs.sas b/base/mp_storediffs.sas index 1971c82..2920876 100644 --- a/base/mp_storediffs.sas +++ b/base/mp_storediffs.sas @@ -9,6 +9,11 @@ works on any SAS platform (Viya, SAS 9, Foundation) and is free for up to 5 users. + NOTE - this macro does not validate the inputs. It is assumed that the + datasets containing the new / changed / deleted rows are CORRECT, contain + no additional (or missing columns), and that the originals dataset contains + all relevant base records (and no additionals). + Usage: data work.orig work.deleted work.changed work.appended; @@ -33,6 +38,7 @@ ,modds=work.changed ,appds=work.appended ,outds=work.final + ,mdebug=1 ) @param [in] libds Target table against which the changes were applied @@ -44,10 +50,11 @@ @param [in] modds= (0) Dataset with modified records @param [out] outds= (work.mp_storediffs) Output table containing stored data. Has the following format: + proc sql; create table &outds( load_ref char(36) label='unique load reference', - processed_dttm num format=E8601DT26.6, label='Processed at timestamp' + processed_dttm num format=E8601DT26.6 label='Processed at timestamp', libref char(8) label='Library Reference (8 chars)', dsn char(32) label='Dataset Name (32 chars)', key_hash char(32) label= @@ -60,17 +67,22 @@ tgtvar_nm char(32) label='Target variable name (32 chars)', oldval_num num label='Old (numeric) value', newval_num num label='New (numeric) value', - oldval_char char(32767) label='Old (character) value',, + oldval_char char(32767) label='Old (character) value', newval_char char(32767) label='New (character) value', + constraint pk_mpe_audit + primary key(load_ref,libref,dsn,key_hash,tgtvar_nm) ); + @param [in] processed_dttm= (0) Provide a datetime constant in relation to the actual load time. If not provided, current timestamp is used. + @param [in] mdebug= set to 1 to enable DEBUG messages and preserve outputs @param [out] loadref= (0) Provide a unique key to reference the load, otherwise a UUID will be generated.

SAS Macros

@li mf_getquotedstr.sas @li mf_getuniquename.sas + @li mf_getvarlist.sas @version 9.2 @author Allan Bowe @@ -86,7 +98,14 @@ ,outds=work.mp_storediffs ,loadref=0 ,processed_dttm=0 + ,mdebug=0 )/*/STORE SOURCE*/; +%local dbg; +%if &mdebug=1 %then %do; + %put &sysmacroname entry vars:; + %put _local_; +%end; +%else %let dbg=*; /* set up unique and temporary vars */ %local ds1 ds2 ds3 ds4 hashkey inds_auto inds_keep dslist; @@ -95,7 +114,7 @@ %let ds3=%upcase(work.%mf_getuniquename(prefix=mpsd_ds3)); %let ds4=%upcase(work.%mf_getuniquename(prefix=mpsd_ds4)); %let hashkey=%upcase(%mf_getuniquename(prefix=mpsd_hashkey)); -%let inds_auto=%upcase(%mf_getuniquename(prefix=mpsd_inds_auto); +%let inds_auto=%upcase(%mf_getuniquename(prefix=mpsd_inds_auto)); %let inds_keep=%upcase(%mf_getuniquename(prefix=mpsd_inds_keep)); %let dslist=&origds; @@ -123,7 +142,7 @@ /* hash the key and append all the tables (marking the source) */ data &ds1; set &dslist indsname=&inds_auto; - &hashkey=put(md5(cats(%mf_getquotedstr(&key,quote=N))),$hex32.); + &hashkey=put(md5(catx('|',%mf_getquotedstr(&key,quote=N))),$hex32.); &inds_keep=&inds_auto; proc sort; by &inds_keep &hashkey; @@ -146,6 +165,10 @@ run; data &ds4; length &inds_keep $41 tgtvar_nm $32; set &ds2 &ds3 indsname=&inds_auto; + + tgtvar_nm=upcase(tgtvar_nm); + if tgtvar_nm in (%upcase(%mf_getvarlist(&libds,dlm=%str(,),quote=DOUBLE))); + if &inds_auto="&ds2" then tgtvar_type='N'; else if &inds_auto="&ds3" then tgtvar_type='C'; else do; @@ -187,11 +210,15 @@ create table &outds as else 1 end as is_diff ,b.tgtvar_type length=1 - ,a.newval_num as oldval_num + ,case when b.move_type='D' then b.newval_num + else a.newval_num + end as oldval_num ,case when b.move_type='D' then . else b.newval_num end as newval_num - ,a.newval_char as oldval_char length=32767 + ,case when b.move_type='D' then b.newval_char + else a.newval_char + end as oldval_char length=32767 ,case when b.move_type='D' then '' else b.newval_char end as newval_char length=32767 @@ -201,8 +228,10 @@ create table &outds as and a.key_hash=b.key_hash order by move_type, key_hash,is_pk desc, tgtvar_nm; -%mp_dropmembers(&ds1 &ds2 &ds3 &ds4) +%if &mdebug=0 %then %do; + proc sql; + drop table &ds1, &ds2, &ds3, &ds4; +%end; %mend mp_storediffs; - /** @endcond */ \ No newline at end of file