1
0
mirror of https://github.com/sasjs/core.git synced 2025-12-10 14:04:36 +00:00

feat: mp_stripdiffs macro - closes #373

This commit is contained in:
^
2024-04-25 10:45:23 +01:00
parent e616bc940f
commit 556c7bdb28
6 changed files with 579 additions and 11 deletions

247
all.sas
View File

@@ -5541,13 +5541,21 @@ data _null_;
header = cats(coalescec(varlabel(dsid,i),varnm),dlm);
%end;
%else %if &headerformat=SASJS %then %do;
if vartype(dsid,i)='C' then header=cats(varnm,':$char',varlen(dsid,i),'.');
vlen=varlen(dsid,i);
if vartype(dsid,i)='C' then header=cats(varnm,':$char',vlen,'.');
else do;
vfmt=coalescec(varfmt(dsid,i),'0');
fmttype=mcf_getfmttype(vfmt);
if fmttype='DATE' then header=cats(varnm,':date9.');
else if fmttype='DATETIME' then header=cats(varnm,':E8601DT26.6');
else if fmttype='TIME' then header=cats(varnm,':TIME12.');
/**
* there is not much point importing a short length numeric like this,
* eg with best4., as the resulting variable will still be stored as
* length 8. We need a length or format statement to ensure variable
* is creatd with the smaller length...
**/
else if vlen<8 then header=cats(varnm,':best',vlen,'.');
else header=cats(varnm,':best.');
end;
%end;
@@ -5574,6 +5582,7 @@ data _null_;
set &ds end=last;
%do i=1 %to &vcnt;
%let var=%scan(&varlist,&i);
%local vlen&i;
%if %mf_getvartype(&ds,&var)=C %then %do;
%let dsv1=%mf_getuniquename(prefix=csvcol1_);
%let dsv2=%mf_getuniquename(prefix=csvcol2_);
@@ -12466,6 +12475,7 @@ run;
@li mp_coretable.sas
@li mp_stackdiffs.test.sas
@li mp_storediffs.sas
@li mp_stripdiffs.sas
@todo The current approach assumes that a variable called KEY_HASH is not on
the base table. This part will need to be refactored (eg using
@@ -12924,6 +12934,7 @@ select distinct tgtvar_nm into: missvars separated by ' '
<h4> Related Macros </h4>
@li mp_stackdiffs.sas
@li mp_storediffs.test.sas
@li mp_stripdiffs.sas
@version 9.2
@author Allan Bowe
@@ -13043,7 +13054,7 @@ data &ds4;
run;
%if "&loadref"="0" %then %let loadref=%sysfunc(uuidgen());
%if &processed_dttm=0 %then %let processed_dttm=%sysfunc(datetime());
%if &processed_dttm=0 %then %let processed_dttm=%sysfunc(datetime(),8.6);
%let libds=%upcase(&libds);
/* join orig vals for modified & deleted */
@@ -13373,6 +13384,229 @@ run;
%mend mp_streamfile;
/**
@file
@brief Generates a stage dataset to revert diffs tracked in an audit table
@details A big benefit of tracking data changes in an audit table is that
those changes can be subsequently reverted if necessary!
This macro prepares a staging dataset containing those differences - eg for:
@li deleted rows - these are re-inserted
@li changed rows - differences are reverted
@li added rows - these are marked with `_____DELETE_THIS_RECORD_____="YES"`
These changes are NOT applied to the base table - a staging dataset is
simply prepared for an ETL process to action. In Data Controller, this
dataset is used directly as an input to the APPROVE process (so that the
reversion diffs can be reviewed prior to being applied).
@param [in] libds Base library.dataset (will not be modified). The library
must be assigned.
@param [in] loadref Unique identifier for the version to be reverted. This
change, plus ALL SUBSEQUENT CHANGES, will be reverted in the output table.
@param [in] difftable The dataset containing the diffs. Definition available
in mddl_dc_difftable.sas
@param [out] outds= (work.mp_stripdiffs) Output table containing the diffs.
Has the same format as the base datset, plus a
`_____DELETE_THIS_RECORD_____` variable.
@param [in] mdebug= set to 1 to enable DEBUG messages and preserve outputs
<h4> SAS Macros </h4>
@li mf_getuniquefileref.sas
@li mf_getuniquename.sas
@li mf_islibds.sas
@li mp_abort.sas
<h4> Related Macros </h4>
@li mddl_dc_difftable.sas
@li mp_stackdiffs.sas
@li mp_storediffs.sas
@li mp_stripdiffs.test.sas
@version 9.2
@author Allan Bowe
**/
/** @cond */
%macro mp_stripdiffs(libds
,loadref
,difftable
,outds=work.mp_stripdiffs
,mdebug=0
)/*/STORE SOURCE*/;
%local dbg;
%if &mdebug=1 %then %do;
%put &sysmacroname entry vars:;
%put _local_;
%end;
%else %let dbg=*;
%let libds=%upcase(&libds);
/* safety checks */
%mp_abort(iftrue= (&syscc ne 0)
,mac=&sysmacroname
,msg=%str(SYSCC=&syscc on entry. Clean session required!)
)
%let libds=%upcase(&libds);
%mp_abort(iftrue= (%mf_islibds(&libds)=0)
,mac=&sysmacroname
,msg=%str(Invalid library.dataset reference - %superq(libds))
)
/* set up unique and temporary vars */
%local ds1 ds2 ds3 ds4 ds5 fref1;
%let fref1=%mf_getuniquefileref();
/* get timestamp of the diff to be reverted */
%local ts;
proc sql noprint;
select put(processed_dttm,datetime19.6) into: ts
from &difftable where load_ref="&loadref";
%mp_abort(iftrue= (&sqlobs=0)
,mac=&sysmacroname
,msg=%str(Load ref %superq(loadref) not found!)
)
/* extract diffs for this base table from this timestamp onwards */
%let ds1=%upcase(work.%mf_getuniquename(prefix=mpsd_diffs));
create table &ds1 (drop=libref dsn) as
select * from &difftable
where upcase(cats(libref))="%scan(&libds,1,.)"
and upcase(cats(dsn))="%scan(&libds,2,.)"
and processed_dttm ge "&ts"dt
order by processed_dttm desc, key_hash, is_pk;
/* extract key values only */
%let ds2=%upcase(work.%mf_getuniquename(prefix=mpsd_pks));
create table &ds2 as
select key_hash,
tgtvar_nm,
tgtvar_type,
coalescec(oldval_char,newval_char) as charval,
coalesce(oldval_num, newval_num) as numval,
processed_dttm
from &ds1
where is_pk=1
order by key_hash, processed_dttm;
/* grab pk values */
%local pk;
data _null_;
set &ds2;
by key_hash;
call symputx('pk',catx(' ',symget('pk'),tgtvar_nm),'l');
if last.key_hash then stop;
run;
%let ds3=%upcase(work.%mf_getuniquename(prefix=mpsd_keychar));
proc transpose data=&ds2(where=(tgtvar_type='C'))
out=&ds3(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var charval;
run;
%let ds4=%upcase(work.%mf_getuniquename(prefix=mpsd_keynum));
proc transpose data=&ds2(where=(tgtvar_type='N'))
out=&ds4(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var numval;
run;
/* shorten the lengths */
%mp_ds2squeeze(&ds3,outds=&ds3)
%mp_ds2squeeze(&ds4,outds=&ds4)
%let ds5=%upcase(work.%mf_getuniquename(prefix=mpsd_merged));
data &ds5;
merge &ds3 &ds4;
by key_hash;
if not missing(key_hash);
run;
/* join to base table for preliminary stage DS */
proc sql;
create table &outds as select "No " as _____DELETE_THIS_RECORD_____,
b.*
from &ds5 a
inner join &libds b
on 1=1
%do x=1 %to %sysfunc(countw(&pk,%str( )));
and a.%scan(&pk,&x,%str( ))=b.%scan(&pk,&x,%str( ))
%end;
;
/* create SAS code to apply to stage_ds */
data _null_;
set &ds1;
file &fref1;
if _n_=1 then put 'proc sql noprint;';
by descending processed_dttm key_hash is_pk;
if move_type='M' then do;
if first.key_hash then do;
put "update &outds set " @@;
end;
if IS_PK=0 then do;
put " " tgtvar_nm '=' @@;
charval=quote(cats(oldval_char));
if tgtvar_type='C' then put charval @@;
else put oldval_num @@;
if not last.is_pk then put ',';
end;
else do;
if first.is_pk then put " where 1=1 " @@;
put " and " tgtvar_nm '=' @@;
charval=quote(cats(oldval_char));
if tgtvar_type='C' then put charval @@;
else put oldval_num @@;
end;
end;
else if move_type='A' then do;
if first.key_hash then do;
put "update &outds set _____DELETE_THIS_RECORD_____='Yes' where 1=1 " @@;
end;
/* gating if - as only need PK now */
if is_pk=1;
put ' AND ' tgtvar_nm '=' @@;
charval=quote(cats(newval_char));
if tgtvar_type='C' then put charval @@;
else put newval_num @@;
end;
else if move_type='D' then do;
if first.key_hash then do;
put "insert into &outds set _____DELETE_THIS_RECORD_____='No' " @@;
end;
put " ," tgtvar_nm '=' @@;
charval=quote(cats(oldval_char));
if tgtvar_type='C' then put charval @@;
else put oldval_num @@;
end;
if last.key_hash then put ';';
run;
/* apply the modification statements */
%inc &fref1/source2;
%if &mdebug=0 %then %do;
proc sql;
drop table &ds1, &ds2, &ds3, &ds4, &ds5;
file &fref1 clear;
%end;
%else %do;
data _null_;
infile &fref1;
input;
if _n_=1 then putlog "Contents of SQL adjustments";
putlog _infile_;
run;
%end;
%mend mp_stripdiffs;
/** @endcond *//**
@file
@brief Runs arbitrary code for a specified amount of time
@details Executes a series of procs and data steps to enable performance
@@ -17964,10 +18198,11 @@ run;
@param [in] user= the metadata user to return groups for. Leave blank for all
groups.
@param [in] repo= the metadata repository that contains the user/group
information
@param [in] mDebug= set to 1 to show debug messages in the log
@param [out] outds= the dataset to create that contains the list of groups
@param [in] repo= (foundation) the metadata repository that contains the
user/group information
@param [in] mDebug= (0) set to 1 to show debug messages in the log
@param [out] outds= (work.mm_getgroups) The dataset to create that contains
the list of groups
@returns outds dataset containing all groups in a column named "metagroup"
- groupuri

View File

@@ -197,6 +197,7 @@
@li mp_coretable.sas
@li mp_stackdiffs.test.sas
@li mp_storediffs.sas
@li mp_stripdiffs.sas
@todo The current approach assumes that a variable called KEY_HASH is not on
the base table. This part will need to be refactored (eg using

View File

@@ -64,6 +64,7 @@
<h4> Related Macros </h4>
@li mp_stackdiffs.sas
@li mp_storediffs.test.sas
@li mp_stripdiffs.sas
@version 9.2
@author Allan Bowe
@@ -183,7 +184,7 @@ data &ds4;
run;
%if "&loadref"="0" %then %let loadref=%sysfunc(uuidgen());
%if &processed_dttm=0 %then %let processed_dttm=%sysfunc(datetime());
%if &processed_dttm=0 %then %let processed_dttm=%sysfunc(datetime(),8.6);
%let libds=%upcase(&libds);
/* join orig vals for modified & deleted */

224
base/mp_stripdiffs.sas Normal file
View File

@@ -0,0 +1,224 @@
/**
@file
@brief Generates a stage dataset to revert diffs tracked in an audit table
@details A big benefit of tracking data changes in an audit table is that
those changes can be subsequently reverted if necessary!
This macro prepares a staging dataset containing those differences - eg for:
@li deleted rows - these are re-inserted
@li changed rows - differences are reverted
@li added rows - these are marked with `_____DELETE_THIS_RECORD_____="YES"`
These changes are NOT applied to the base table - a staging dataset is
simply prepared for an ETL process to action. In Data Controller, this
dataset is used directly as an input to the APPROVE process (so that the
reversion diffs can be reviewed prior to being applied).
@param [in] libds Base library.dataset (will not be modified). The library
must be assigned.
@param [in] loadref Unique identifier for the version to be reverted. This
change, plus ALL SUBSEQUENT CHANGES, will be reverted in the output table.
@param [in] difftable The dataset containing the diffs. Definition available
in mddl_dc_difftable.sas
@param [out] outds= (work.mp_stripdiffs) Output table containing the diffs.
Has the same format as the base datset, plus a
`_____DELETE_THIS_RECORD_____` variable.
@param [in] mdebug= set to 1 to enable DEBUG messages and preserve outputs
<h4> SAS Macros </h4>
@li mf_getuniquefileref.sas
@li mf_getuniquename.sas
@li mf_islibds.sas
@li mp_abort.sas
<h4> Related Macros </h4>
@li mddl_dc_difftable.sas
@li mp_stackdiffs.sas
@li mp_storediffs.sas
@li mp_stripdiffs.test.sas
@version 9.2
@author Allan Bowe
**/
/** @cond */
%macro mp_stripdiffs(libds
,loadref
,difftable
,outds=work.mp_stripdiffs
,mdebug=0
)/*/STORE SOURCE*/;
%local dbg;
%if &mdebug=1 %then %do;
%put &sysmacroname entry vars:;
%put _local_;
%end;
%else %let dbg=*;
%let libds=%upcase(&libds);
/* safety checks */
%mp_abort(iftrue= (&syscc ne 0)
,mac=&sysmacroname
,msg=%str(SYSCC=&syscc on entry. Clean session required!)
)
%let libds=%upcase(&libds);
%mp_abort(iftrue= (%mf_islibds(&libds)=0)
,mac=&sysmacroname
,msg=%str(Invalid library.dataset reference - %superq(libds))
)
/* set up unique and temporary vars */
%local ds1 ds2 ds3 ds4 ds5 fref1;
%let fref1=%mf_getuniquefileref();
/* get timestamp of the diff to be reverted */
%local ts;
proc sql noprint;
select put(processed_dttm,datetime19.6) into: ts
from &difftable where load_ref="&loadref";
%mp_abort(iftrue= (&sqlobs=0)
,mac=&sysmacroname
,msg=%str(Load ref %superq(loadref) not found!)
)
/* extract diffs for this base table from this timestamp onwards */
%let ds1=%upcase(work.%mf_getuniquename(prefix=mpsd_diffs));
create table &ds1 (drop=libref dsn) as
select * from &difftable
where upcase(cats(libref))="%scan(&libds,1,.)"
and upcase(cats(dsn))="%scan(&libds,2,.)"
and processed_dttm ge "&ts"dt
order by processed_dttm desc, key_hash, is_pk;
/* extract key values only */
%let ds2=%upcase(work.%mf_getuniquename(prefix=mpsd_pks));
create table &ds2 as
select key_hash,
tgtvar_nm,
tgtvar_type,
coalescec(oldval_char,newval_char) as charval,
coalesce(oldval_num, newval_num) as numval,
processed_dttm
from &ds1
where is_pk=1
order by key_hash, processed_dttm;
/* grab pk values */
%local pk;
data _null_;
set &ds2;
by key_hash;
call symputx('pk',catx(' ',symget('pk'),tgtvar_nm),'l');
if last.key_hash then stop;
run;
%let ds3=%upcase(work.%mf_getuniquename(prefix=mpsd_keychar));
proc transpose data=&ds2(where=(tgtvar_type='C'))
out=&ds3(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var charval;
run;
%let ds4=%upcase(work.%mf_getuniquename(prefix=mpsd_keynum));
proc transpose data=&ds2(where=(tgtvar_type='N'))
out=&ds4(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var numval;
run;
/* shorten the lengths */
%mp_ds2squeeze(&ds3,outds=&ds3)
%mp_ds2squeeze(&ds4,outds=&ds4)
%let ds5=%upcase(work.%mf_getuniquename(prefix=mpsd_merged));
data &ds5;
merge &ds3 &ds4;
by key_hash;
if not missing(key_hash);
run;
/* join to base table for preliminary stage DS */
proc sql;
create table &outds as select "No " as _____DELETE_THIS_RECORD_____,
b.*
from &ds5 a
inner join &libds b
on 1=1
%do x=1 %to %sysfunc(countw(&pk,%str( )));
and a.%scan(&pk,&x,%str( ))=b.%scan(&pk,&x,%str( ))
%end;
;
/* create SAS code to apply to stage_ds */
data _null_;
set &ds1;
file &fref1;
if _n_=1 then put 'proc sql noprint;';
by descending processed_dttm key_hash is_pk;
if move_type='M' then do;
if first.key_hash then do;
put "update &outds set " @@;
end;
if IS_PK=0 then do;
put " " tgtvar_nm '=' @@;
charval=quote(cats(oldval_char));
if tgtvar_type='C' then put charval @@;
else put oldval_num @@;
if not last.is_pk then put ',';
end;
else do;
if first.is_pk then put " where 1=1 " @@;
put " and " tgtvar_nm '=' @@;
charval=quote(cats(oldval_char));
if tgtvar_type='C' then put charval @@;
else put oldval_num @@;
end;
end;
else if move_type='A' then do;
if first.key_hash then do;
put "update &outds set _____DELETE_THIS_RECORD_____='Yes' where 1=1 " @@;
end;
/* gating if - as only need PK now */
if is_pk=1;
put ' AND ' tgtvar_nm '=' @@;
charval=quote(cats(newval_char));
if tgtvar_type='C' then put charval @@;
else put newval_num @@;
end;
else if move_type='D' then do;
if first.key_hash then do;
put "insert into &outds set _____DELETE_THIS_RECORD_____='No' " @@;
end;
put " ," tgtvar_nm '=' @@;
charval=quote(cats(oldval_char));
if tgtvar_type='C' then put charval @@;
else put oldval_num @@;
end;
if last.key_hash then put ';';
run;
/* apply the modification statements */
%inc &fref1/source2;
%if &mdebug=0 %then %do;
proc sql;
drop table &ds1, &ds2, &ds3, &ds4, &ds5;
file &fref1 clear;
%end;
%else %do;
data _null_;
infile &fref1;
input;
if _n_=1 then putlog "Contents of SQL adjustments";
putlog _infile_;
run;
%end;
%mend mp_stripdiffs;
/** @endcond */

View File

@@ -11,10 +11,11 @@
@param [in] user= the metadata user to return groups for. Leave blank for all
groups.
@param [in] repo= the metadata repository that contains the user/group
information
@param [in] mDebug= set to 1 to show debug messages in the log
@param [out] outds= the dataset to create that contains the list of groups
@param [in] repo= (foundation) the metadata repository that contains the
user/group information
@param [in] mDebug= (0) set to 1 to show debug messages in the log
@param [out] outds= (work.mm_getgroups) The dataset to create that contains
the list of groups
@returns outds dataset containing all groups in a column named "metagroup"
- groupuri

View File

@@ -0,0 +1,106 @@
/**
@file
@brief Testing mp_stripdiffs.sas macro
@details
<h4> SAS Macros </h4>
@li mp_assert.sas
@li mp_assertscope.sas
@li mp_ds2md.sas
@li mp_stripdiffs.sas
**/
/* make an adjustable base dataset */
/* use a composite key also (name weight) */
libname libby (work);
data libby.class;
set sashelp.class;
run;
/* first, store some diffs */
data work.orig work.deleted work.changed work.appended;
set libby.class;
if _n_=1 then do;
call symputx('delname',name);
output work.orig work.deleted;
end;
else if _n_=2 then do;
output work.orig;
call symputx('modname',name);
call symputx('modval',age);
age=99;
output work.changed;
end;
else do;
name='Newbie';
output work.appended;
stop;
end;
run;
%mp_storediffs(libby.class,work.orig,NAME WEIGHT
,delds=work.deleted
,modds=work.changed
,appds=work.appended
,outds=work.audit
,loadref=UPLOAD1
,mdebug=0
)
%mp_ds2md(work.audit)
%mp_assert(
iftrue=(&syscc=0),
desc=Checking preparation case,
outds=work.test_results
)
/* apply the changes */
proc sql;
delete from libby.class where name in ("&delname","&modname");
proc append base=libby.class data=work.appended;
proc append base=libby.class data=work.changed;
run;
/* now, prepare the revert dataset */
%mp_assertscope(SNAPSHOT)
%mp_stripdiffs(libby.class
,UPLOAD1
,work.audit
,outds=work.mp_stripdiffs
,mdebug=1
)
%mp_ds2md(work.mp_stripdiffs)
%mp_assertscope(COMPARE)
%mp_assert(
iftrue=(&syscc=0),
desc=Checking error condition,
outds=work.test_results
)
%let delpass=0;
%let modpass=0;
%let addpass=0;
data _null_;
set work.mp_stripdiffs;
if upcase(_____DELETE_THIS_RECORD_____)='NO' and name="&delname"
then call symputx('delpass',1);
if name="&modname" and age=&modval then call symputx('modpass',1);
if upcase(_____DELETE_THIS_RECORD_____)='YES' and name="Newbie"
then call symputx('addpass',1);
run;
%mp_assert(
iftrue=(&delpass=1),
desc=Ensuring deleted record is back in the dataset,
outds=work.test_results
)
%mp_assert(
iftrue=(&modpass=1),
desc=Ensuring modified record now has old value,
outds=work.test_results
)
%mp_assert(
iftrue=(&addpass=1),
desc=Ensuring added record is now marked for deletion,
outds=work.test_results
)