From bdd22abc5523972cdedabed917f393a4c6ecb60f Mon Sep 17 00:00:00 2001 From: munja Date: Wed, 19 Jan 2022 22:05:56 +0100 Subject: [PATCH] feat: adding delete capability (and tests) for mp_stackdiffs --- base/mp_guesspk.sas | 2 - base/mp_stackdiffs.sas | 319 +++++++++++++++++---- sasjs/sasjsconfig.json | 1 + tests/crossplatform/mp_stackdiffs.test.sas | 53 +++- 4 files changed, 311 insertions(+), 64 deletions(-) diff --git a/base/mp_guesspk.sas b/base/mp_guesspk.sas index c20ab0e..bdbdd3b 100644 --- a/base/mp_guesspk.sas +++ b/base/mp_guesspk.sas @@ -59,9 +59,7 @@ /* declare local vars */ %local var vars vcnt i j k l tmpvar tmpds rows posspks ppkcnt; %let vars=%upcase(%mf_getvarlist(&baseds)); -%put &=vars; %let vars=%mf_wordsInStr1ButNotStr2(str1=&vars,str2=%upcase(&ignore_cols)); -%put &=vars; %let vcnt=%sysfunc(countw(&vars)); %if &vcnt=0 %then %do; diff --git a/base/mp_stackdiffs.sas b/base/mp_stackdiffs.sas index a6da2be..f7d0d2c 100644 --- a/base/mp_stackdiffs.sas +++ b/base/mp_stackdiffs.sas @@ -23,23 +23,23 @@ Essentially then, what this macro does, is turn a table like this: - |LIBREF:$8.|DSN:$32.|MOVE_TYPE:$1.|TGTVAR_NM:$32.|IS_PK:best.|IS_DIFF:best.|TGTVAR_TYPE:$1.|OLDVAL_NUM:best32.|NEWVAL_NUM:best32.|OLDVAL_CHAR:$32765.|NEWVAL_CHAR:$32765.| - |---|---|---|---|---|---|---|---|---|---|---| - |`SASHELP `|`CLASS `|`A `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|` `|`Newbie `| - |`SASHELP `|`CLASS `|`A `|`AGE `|`0 `|`-1 `|`N `|`. `|`13 `|` `|` `| - |`SASHELP `|`CLASS `|`A `|`HEIGHT `|`0 `|`-1 `|`N `|`. `|`65.3 `|` `|` `| - |`SASHELP `|`CLASS `|`A `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|` `|`F `| - |`SASHELP `|`CLASS `|`A `|`WEIGHT `|`0 `|`-1 `|`N `|`. `|`98 `|` `|` `| - |`SASHELP `|`CLASS `|`D `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|`Alfred `|` `| - |`SASHELP `|`CLASS `|`D `|`AGE `|`0 `|`-1 `|`N `|`14 `|`. `|` `|` `| - |`SASHELP `|`CLASS `|`D `|`HEIGHT `|`0 `|`-1 `|`N `|`69 `|`. `|` `|` `| - |`SASHELP `|`CLASS `|`D `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|`M `|` `| - |`SASHELP `|`CLASS `|`D `|`WEIGHT `|`0 `|`-1 `|`N `|`112.5 `|`. `|` `|` `| - |`SASHELP `|`CLASS `|`M `|`NAME `|`1 `|`0 `|`C `|`. `|`. `|`Alice `|`Alice `| - |`SASHELP `|`CLASS `|`M `|`AGE `|`0 `|`1 `|`N `|`13 `|`99 `|` `|` `| - |`SASHELP `|`CLASS `|`M `|`HEIGHT `|`0 `|`0 `|`N `|`56.5 `|`56.5 `|` `|` `| - |`SASHELP `|`CLASS `|`M `|`SEX `|`0 `|`0 `|`C `|`. `|`. `|`F `|`F `| - |`SASHELP `|`CLASS `|`M `|`WEIGHT `|`0 `|`0 `|`N `|`84 `|`84 `|` `|` `| + |MOVE_TYPE:$1.|TGTVAR_NM:$32.|IS_PK:best.|IS_DIFF:best.|TGTVAR_TYPE:$1.|OLDVAL_NUM:best32.|NEWVAL_NUM:best32.|OLDVAL_CHAR:$32765.|NEWVAL_CHAR:$32765.| + |---|---|---|---|---|---|---|---|---| + |`A `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|` `|`Newbie `| + |`A `|`AGE `|`0 `|`-1 `|`N `|`. `|`13 `|` `|` `| + |`A `|`HEIGHT `|`0 `|`-1 `|`N `|`. `|`65.3 `|` `|` `| + |`A `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|` `|`F `| + |`A `|`WEIGHT `|`0 `|`-1 `|`N `|`. `|`98 `|` `|` `| + |`D `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|`Alfred `|` `| + |`D `|`AGE `|`0 `|`-1 `|`N `|`14 `|`. `|` `|` `| + |`D `|`HEIGHT `|`0 `|`-1 `|`N `|`69 `|`. `|` `|` `| + |`D `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|`M `|` `| + |`D `|`WEIGHT `|`0 `|`-1 `|`N `|`112.5 `|`. `|` `|` `| + |`M `|`NAME `|`1 `|`0 `|`C `|`. `|`. `|`Alice `|`Alice `| + |`M `|`AGE `|`0 `|`1 `|`N `|`13 `|`99 `|` `|` `| + |`M `|`HEIGHT `|`0 `|`0 `|`N `|`56.5 `|`56.5 `|` `|` `| + |`M `|`SEX `|`0 `|`0 `|`C `|`. `|`. `|`F `|`F `| + |`M `|`WEIGHT `|`0 `|`0 `|`N `|`84 `|`84 `|` `|` `| Into three tables like this: @@ -64,23 +64,34 @@ base table contains a PROCESSED_DTTM column (or similar), this can be ignored by declaring it in the `processed_dttm_var` parameter. - If the structure of the Base Table has changed, the following rules apply: + The macro is also flexible where columns have been added or removed from + the base table UNLESS there is a change to the primary key. - @li New Columns - Irrelevant for deletes. For inserts, the new column - values are set to missing. For updates, the base table values are used. - @li Deleted Columns - These will also be absent in the output tables. - @li Change in Primary Key - This is not (well, cannot be) supported!! + Changes to the primary key are NOT supported, and are likely to cause + unexpected results. + + The following pre-flight checks are made: + + @li All primary key columns exist on the base table + @li There is no change in variable TYPE for any of the columns + @li There is no reduction in variable LENGTH below the max-length of the + supplied values Rules for stacking changes are as follows: - + + + + @@ -88,7 +99,13 @@ + @@ -102,36 +119,47 @@ If the row no longer exists on the base table, then the row is added to the `errDS` table instead. +
Transaction TypeBehaviour
Transaction TypeKey BehaviourColumn Behaviour
Deletes - For previously deleted rows, the PK is added to the `outDEL` table
- If the row no longer exists in the base table, the row is added to the - `errDS` table instead. + The row is added to `&outDEL.` UNLESS it no longer exists + in the base table, in which case it is added to `&errDS.` instead. +
+ Deletes are unaffected by the addition or removal of non Primary-Key + columns.
Previously newly added rows are added to the `outADD` table UNLESS they are present in the Base table.
In this case they are added to the - `errDS` table instead. + `&errDS.` table instead. +
+ Inserts are unaffected by the addition of columns in the Base Table + (they are padded with blanks). Deleted columns are only a problem if + they appear on the previous insert - in which case the record is added + to `&errDS.`.
+ Updates are unaffected by the addition of columns in the Base Table - + the new cells are simply populated with Base Table values. Deleted + columns are only a problem if they relate to a modified cell + (`is_diff=1`) - in which case the record is added to `&errDS.`. +
To illustrate the above with a diagram: - @dot - digraph { - rankdir="TB" + @dot + digraph { + rankdir="TB" + start[label="Transaction Type?" shape=Mdiamond] + del[label="Does Base Row exist?" shape=rectangle] + add [label="Does Base Row exist?" shape=rectangle] + mod [label="Does Base Row exist?" shape=rectangle] + chkmod [label="Do all modified\n(is_diff=1) cells exist?" shape=rectangle] + chkadd [label="Do all inserted cells exist?" shape=rectangle] + outmod [label="outMOD\nTable" shape=Msquare style=filled] + outadd [label="outADD\nTable" shape=Msquare style=filled] + outdel [label="outDEL\nTable" shape=Msquare style=filled] + outerr [label="ErrDS Table" shape=Msquare fillcolor=Orange style=filled] + start -> del [label="Delete"] + start -> add [label="Insert"] + start -> mod [label="Update"] - start[label="Transaction Type?" shape=Mdiamond] - del[label="Base Row Exists?" shape=rectangle] - add [label="Base Row Exists?" shape=rectangle] - mod [label="Base Row Exists?" shape=rectangle] - outmod [label="outMOD Table" shape=box3d] - outadd [label="outADD Table" shape=box3d] - outdel [label="outDEL Table" shape=box3d] - outerr [label="ErrDS Table" shape=box3d] - start -> del [label="Delete"] - start -> add [label="Insert"] - start -> mod [label="Update"] + del -> outdel [label="Yes"] + del -> outerr [label="No" color="Red" fontcolor="Red"] + add -> chkadd [label="No"] + add -> outerr [label="Yes" color="Red" fontcolor="Red"] + mod -> outerr [label="No" color="Red" fontcolor="Red"] + mod -> chkmod [label="Yes"] + chkmod -> outerr [label="No" color="Red" fontcolor="Red"] + chkmod -> outmod [label="Yes"] + chkadd -> outerr [label="No" color="Red" fontcolor="Red"] + chkadd -> outadd [label="Yes"] - del -> outdel [label="Yes"] - del -> outerr [label="No" color="Red" fontcolor="Red"] - add -> outadd [label="Yes"] - add -> outerr [label="No" color="Red" fontcolor="Red"] - mod -> outerr [label="Yes" color="Red" fontcolor="Red"] - mod -> outmod [label="No"] - - } - @enddot + } + @enddot For examples of usage, check out the mp_stackdiffs.test.sas program. @@ -158,13 +186,17 @@

SAS Macros

+ @li mf_existvarlist.sas + @li mf_getquotedstr.sas @li mf_getuniquename.sas @li mf_islibds.sas + @li mf_nobs.sas @li mp_abort.sas

Related Macros

@li mp_coretable.sas + @li mp_stackdiffs.test.sas @li mp_storediffs.sas @version 9.2 @@ -202,22 +234,189 @@ ,mac=&sysmacroname ,msg=%str(Missing key variables!) ) +%mp_abort(iftrue= ( + %mf_existVarList(&auditlibds,LIBREF DSN MOVE_TYPE KEY_HASH TGTVAR_NM IS_PK + IS_DIFF TGTVAR_TYPE OLDVAL_NUM NEWVAL_NUM OLDVAL_CHAR NEWVAL_CHAR)=0 + ) + ,mac=&sysmacroname + ,msg=%str(Input &auditlibds is missing required columns!) +) + /* set up unique and temporary vars */ -%local ds1 ds2 ds3 ds4 hashkey inds_auto inds_keep dslist; -%let ds1=%upcase(work.%mf_getuniquename(prefix=mpsd_ds1)); -%let ds2=%upcase(work.%mf_getuniquename(prefix=mpsd_ds2)); -%let ds3=%upcase(work.%mf_getuniquename(prefix=mpsd_ds3)); -%let ds4=%upcase(work.%mf_getuniquename(prefix=mpsd_ds4)); -%let hashkey=%upcase(%mf_getuniquename(prefix=mpsd_hashkey)); -%let inds_auto=%upcase(%mf_getuniquename(prefix=mpsd_inds_auto)); -%let inds_keep=%upcase(%mf_getuniquename(prefix=mpsd_inds_keep)); +%local prefix dslist x var keyjoin commakey keepvars; +%let prefix=%substr(%mf_getuniquename(),1,25); +%let dslist=ds1d ds2d ds3d ds1a ds2a ds3a ds1m ds2m ds3m pks dups base + delrec delerr; +%do x=1 %to %sysfunc(countw(&dslist)); + %let var=%scan(&dslist,&x); + %local &var; + %let &var=%upcase(&prefix._&var); +%end; + +%let keyjoin=1=1; +%do x=1 %to %sysfunc(countw(&key)); + %let var=%scan(&key,&x); + %let keyjoin=&keyjoin and a.&var=b.&var; +%end; + +%let commakey=%mf_getquotedstr(&key,quote=N); + +data &errds; + length pk_vars $256 pk_vals $4098 err_msg $512; + call missing (of _all_); + stop; +run; + +/** + * Prepare DELETE table + * Records are in the OLDVAL_xxx columns + */ +%let keepvars=MOVE_TYPE KEY_HASH TGTVAR_NM TGTVAR_TYPE IS_PK + OLDVAL_NUM OLDVAL_CHAR + NEWVAL_NUM NEWVAL_CHAR; +proc sort data=&auditlibds(where=(move_type='D') keep=&keepvars) + out=&ds1d(drop=move_type); +by KEY_HASH TGTVAR_NM; +run; +proc transpose data=&ds1d(where=(tgtvar_type='N')) + out=&ds2d(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var OLDVAL_NUM; +run; +proc transpose data=&ds1d(where=(tgtvar_type='C')) + out=&ds3d(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var OLDVAL_CHAR; +run; +data &outdel; + set &ds2d; + set &ds3d; +run; +proc sort; + by &key; +run; + +/** + * Prepare APPEND table + * Records are in the NEWVAL_xxx columns + */ +proc sort data=&auditlibds(where=(move_type='A') keep=&keepvars) + out=&ds1a(drop=move_type); + by KEY_HASH TGTVAR_NM; +run; +proc transpose data=&ds1a(where=(tgtvar_type='N')) + out=&ds2a(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var NEWVAL_NUM; +run; +proc transpose data=&ds1a(where=(tgtvar_type='C')) + out=&ds3a(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var NEWVAL_CHAR; +run; +data &outadd; + set &ds2a; + set &ds3a; +run; + +/** + * Prepare MODIFY table + * Keep only primary key - will add modified values later + */ +proc sort data=&auditlibds( + where=(move_type='M' and is_pk=1) keep=&keepvars + ) out=&ds1m(drop=move_type); + by KEY_HASH TGTVAR_NM; +run; +proc transpose data=&ds1m(where=(tgtvar_type='N')) + out=&ds2m(drop=_name_); + by KEY_HASH ; + id TGTVAR_NM; + var NEWVAL_NUM; +run; +proc transpose data=&ds1m(where=(tgtvar_type='C')) + out=&ds3m(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var NEWVAL_CHAR; +run; +data &outmod; + set &ds2m; + set &ds3m; +run; + +/** + * Extract matching records from the base table + * Do this in one join for efficiency. + * At a later date, this should be optimised for large database tables by using + * passthrough and a temporary table. + */ +data &pks; + set &outadd &outmod &outdel; + keep &key; +run; + +proc sort noduprec dupout=&dups; +by &key; +run; +%mp_abort(iftrue= (%mf_nobs(&dups) ne 0) + ,mac=&sysmacroname + ,msg=%str(duplicates (%mf_nobs(&dups)) found on &auditlibds!) +) + +proc sql; +create table &base as + select a.* + from &baselibds a, &pks b + where &keyjoin; + +/** + * delete check + * This is straightforward as it relates to records only + */ +proc sql; +create table &delrec as + select a.* + from &outdel a + left join &base b + on &keyjoin + where a.%scan(&key,1) is null + order by &commakey; + +data &delerr; + if 0 then set &errds; + set &delrec; + PK_VARS="&key"; + PK_VALS=catx('/',&commakey); + ERR_MSG="Rows cannot be deleted as they do not exist on the Base dataset"; + keep PK_VARS PK_VALS ERR_MSG; +run; + +proc append base=&errds data=&delerr; +run; + +data &outdel; + merge &outdel (in=a) &delrec (in=b); + by &key; + if not b; +run; + +/* +LIBREF DSN MOVE_TYPE TGTVAR_NM IS_PK IS_DIFF + TGTVAR_TYPE OLDVAL_NUM NEWVAL_NUM OLDVAL_CHAR NEWVAL_CHAR +*/ %let key=%upcase(&key); %if &mdebug=0 %then %do; - proc sql; - drop table &ds1, &ds2, &ds3, &ds4; + proc datasets lib=work; + delete &prefix:; + run; %end; %mend mp_stackdiffs; diff --git a/sasjs/sasjsconfig.json b/sasjs/sasjsconfig.json index ccc76ce..9840f6d 100644 --- a/sasjs/sasjsconfig.json +++ b/sasjs/sasjsconfig.json @@ -40,6 +40,7 @@ "tests/viyaonly" ], "programFolders": [], + "binaryFolders": [], "deployConfig": { "deployServicePack": true, "deployScripts": [] diff --git a/tests/crossplatform/mp_stackdiffs.test.sas b/tests/crossplatform/mp_stackdiffs.test.sas index 37bbdbe..32022a8 100644 --- a/tests/crossplatform/mp_stackdiffs.test.sas +++ b/tests/crossplatform/mp_stackdiffs.test.sas @@ -6,6 +6,7 @@ @li mp_assert.sas @li mp_assertcolvals.sas @li mp_assertdsobs.sas + @li mp_assertscope.sas @li mp_stackdiffs.sas @li mp_storediffs.sas @@ -16,7 +17,7 @@ data work.orig work.deleted work.changed work.appended; set sashelp.electric; if _n_ le 10 then do; - output work.orig work.deleted; + output work.deleted; end; else if _n_ le 20 then do; output work.orig; @@ -38,4 +39,52 @@ run; ,mdebug=1 ) -/* now, stack it back */ \ No newline at end of file +%mp_assertscope(SNAPSHOT) + +/** + * Deletions test - where record does not exist + */ +%mp_stackdiffs(work.orig + ,work.final + ,CUSTOMER YEAR + ,mdebug=1 + ,errds=work.errds1 + ,outmod=work.mod1 + ,outadd=work.add1 + ,outdel=work.del1 +) +%mp_assertdsobs(work.errds1, + desc=Delete1 - no errors, + test=EQUALS 0 +) +%mp_assertdsobs(work.del1, + desc=Delete1 - records populated, + test=EQUALS 10 +) +/** + * Deletions test - where record DOES exist + */ +data work.orig2; + set sashelp.electric; + if _n_ le 10; +run; +%mp_stackdiffs(work.orig2 + ,work.final + ,CUSTOMER YEAR + ,mdebug=1 + ,errds=work.errds2 + ,outmod=work.mod2 + ,outadd=work.add2 + ,outdel=work.del2 +) +%mp_assertdsobs(work.errds2, + desc=Delete1 - has errors, + test=EQUALS 10 +) +%mp_assertdsobs(work.del1, + desc=Delete1 - records not populated, + test=EQUALS 0 +) + + +%mp_assertscope(COMPARE,Desc=MacVar Scope Check) \ No newline at end of file