1
0
mirror of https://github.com/sasjs/core.git synced 2025-12-10 22:14:35 +00:00

feat: adding delete capability (and tests) for mp_stackdiffs

This commit is contained in:
munja
2022-01-19 22:05:56 +01:00
parent 75f712a305
commit bdd22abc55
4 changed files with 311 additions and 64 deletions

View File

@@ -59,9 +59,7 @@
/* declare local vars */
%local var vars vcnt i j k l tmpvar tmpds rows posspks ppkcnt;
%let vars=%upcase(%mf_getvarlist(&baseds));
%put &=vars;
%let vars=%mf_wordsInStr1ButNotStr2(str1=&vars,str2=%upcase(&ignore_cols));
%put &=vars;
%let vcnt=%sysfunc(countw(&vars));
%if &vcnt=0 %then %do;

View File

@@ -23,23 +23,23 @@
Essentially then, what this macro does, is turn a table like this:
|LIBREF:$8.|DSN:$32.|MOVE_TYPE:$1.|TGTVAR_NM:$32.|IS_PK:best.|IS_DIFF:best.|TGTVAR_TYPE:$1.|OLDVAL_NUM:best32.|NEWVAL_NUM:best32.|OLDVAL_CHAR:$32765.|NEWVAL_CHAR:$32765.|
|---|---|---|---|---|---|---|---|---|---|---|
|`SASHELP `|`CLASS `|`A `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|` `|`Newbie `|
|`SASHELP `|`CLASS `|`A `|`AGE `|`0 `|`-1 `|`N `|`. `|`13 `|` `|` `|
|`SASHELP `|`CLASS `|`A `|`HEIGHT `|`0 `|`-1 `|`N `|`. `|`65.3 `|` `|` `|
|`SASHELP `|`CLASS `|`A `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|` `|`F `|
|`SASHELP `|`CLASS `|`A `|`WEIGHT `|`0 `|`-1 `|`N `|`. `|`98 `|` `|` `|
|`SASHELP `|`CLASS `|`D `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|`Alfred `|` `|
|`SASHELP `|`CLASS `|`D `|`AGE `|`0 `|`-1 `|`N `|`14 `|`. `|` `|` `|
|`SASHELP `|`CLASS `|`D `|`HEIGHT `|`0 `|`-1 `|`N `|`69 `|`. `|` `|` `|
|`SASHELP `|`CLASS `|`D `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|`M `|` `|
|`SASHELP `|`CLASS `|`D `|`WEIGHT `|`0 `|`-1 `|`N `|`112.5 `|`. `|` `|` `|
|`SASHELP `|`CLASS `|`M `|`NAME `|`1 `|`0 `|`C `|`. `|`. `|`Alice `|`Alice `|
|`SASHELP `|`CLASS `|`M `|`AGE `|`0 `|`1 `|`N `|`13 `|`99 `|` `|` `|
|`SASHELP `|`CLASS `|`M `|`HEIGHT `|`0 `|`0 `|`N `|`56.5 `|`56.5 `|` `|` `|
|`SASHELP `|`CLASS `|`M `|`SEX `|`0 `|`0 `|`C `|`. `|`. `|`F `|`F `|
|`SASHELP `|`CLASS `|`M `|`WEIGHT `|`0 `|`0 `|`N `|`84 `|`84 `|` `|` `|
|MOVE_TYPE:$1.|TGTVAR_NM:$32.|IS_PK:best.|IS_DIFF:best.|TGTVAR_TYPE:$1.|OLDVAL_NUM:best32.|NEWVAL_NUM:best32.|OLDVAL_CHAR:$32765.|NEWVAL_CHAR:$32765.|
|---|---|---|---|---|---|---|---|---|
|`A `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|` `|`Newbie `|
|`A `|`AGE `|`0 `|`-1 `|`N `|`. `|`13 `|` `|` `|
|`A `|`HEIGHT `|`0 `|`-1 `|`N `|`. `|`65.3 `|` `|` `|
|`A `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|` `|`F `|
|`A `|`WEIGHT `|`0 `|`-1 `|`N `|`. `|`98 `|` `|` `|
|`D `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|`Alfred `|` `|
|`D `|`AGE `|`0 `|`-1 `|`N `|`14 `|`. `|` `|` `|
|`D `|`HEIGHT `|`0 `|`-1 `|`N `|`69 `|`. `|` `|` `|
|`D `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|`M `|` `|
|`D `|`WEIGHT `|`0 `|`-1 `|`N `|`112.5 `|`. `|` `|` `|
|`M `|`NAME `|`1 `|`0 `|`C `|`. `|`. `|`Alice `|`Alice `|
|`M `|`AGE `|`0 `|`1 `|`N `|`13 `|`99 `|` `|` `|
|`M `|`HEIGHT `|`0 `|`0 `|`N `|`56.5 `|`56.5 `|` `|` `|
|`M `|`SEX `|`0 `|`0 `|`C `|`. `|`. `|`F `|`F `|
|`M `|`WEIGHT `|`0 `|`0 `|`N `|`84 `|`84 `|` `|` `|
Into three tables like this:
@@ -64,23 +64,34 @@
base table contains a PROCESSED_DTTM column (or similar), this can be
ignored by declaring it in the `processed_dttm_var` parameter.
If the structure of the Base Table has changed, the following rules apply:
The macro is also flexible where columns have been added or removed from
the base table UNLESS there is a change to the primary key.
@li New Columns - Irrelevant for deletes. For inserts, the new column
values are set to missing. For updates, the base table values are used.
@li Deleted Columns - These will also be absent in the output tables.
@li Change in Primary Key - This is not (well, cannot be) supported!!
Changes to the primary key are NOT supported, and are likely to cause
unexpected results.
The following pre-flight checks are made:
@li All primary key columns exist on the base table
@li There is no change in variable TYPE for any of the columns
@li There is no reduction in variable LENGTH below the max-length of the
supplied values
Rules for stacking changes are as follows:
<table>
<tr><th>Transaction Type</th><th>Behaviour</th></tr>
<tr>
<th>Transaction Type</th><th>Key Behaviour</th><th>Column Behaviour</th>
</tr>
<tr>
<td>Deletes</td>
<td>
For previously deleted rows, the PK is added to the `outDEL` table<br>
If the row no longer exists in the base table, the row is added to the
`errDS` table instead.
The row is added to `&outDEL.` UNLESS it no longer exists
in the base table, in which case it is added to `&errDS.` instead.
</td>
<td>
Deletes are unaffected by the addition or removal of non Primary-Key
columns.
</td>
</tr>
<tr>
@@ -88,7 +99,13 @@
<td>
Previously newly added rows are added to the `outADD` table UNLESS they
are present in the Base table.<br>In this case they are added to the
`errDS` table instead.
`&errDS.` table instead.
</td>
<td>
Inserts are unaffected by the addition of columns in the Base Table
(they are padded with blanks). Deleted columns are only a problem if
they appear on the previous insert - in which case the record is added
to `&errDS.`.
</td>
</tr>
<tr>
@@ -102,36 +119,47 @@
If the row no longer exists on the base table, then the row is added to
the `errDS` table instead.
</td>
<td>
Updates are unaffected by the addition of columns in the Base Table -
the new cells are simply populated with Base Table values. Deleted
columns are only a problem if they relate to a modified cell
(`is_diff=1`) - in which case the record is added to `&errDS.`.
</td>
</tr>
</table>
To illustrate the above with a diagram:
@dot
digraph {
rankdir="TB"
@dot
digraph {
rankdir="TB"
start[label="Transaction Type?" shape=Mdiamond]
del[label="Does Base Row exist?" shape=rectangle]
add [label="Does Base Row exist?" shape=rectangle]
mod [label="Does Base Row exist?" shape=rectangle]
chkmod [label="Do all modified\n(is_diff=1) cells exist?" shape=rectangle]
chkadd [label="Do all inserted cells exist?" shape=rectangle]
outmod [label="outMOD\nTable" shape=Msquare style=filled]
outadd [label="outADD\nTable" shape=Msquare style=filled]
outdel [label="outDEL\nTable" shape=Msquare style=filled]
outerr [label="ErrDS Table" shape=Msquare fillcolor=Orange style=filled]
start -> del [label="Delete"]
start -> add [label="Insert"]
start -> mod [label="Update"]
start[label="Transaction Type?" shape=Mdiamond]
del[label="Base Row Exists?" shape=rectangle]
add [label="Base Row Exists?" shape=rectangle]
mod [label="Base Row Exists?" shape=rectangle]
outmod [label="outMOD Table" shape=box3d]
outadd [label="outADD Table" shape=box3d]
outdel [label="outDEL Table" shape=box3d]
outerr [label="ErrDS Table" shape=box3d]
start -> del [label="Delete"]
start -> add [label="Insert"]
start -> mod [label="Update"]
del -> outdel [label="Yes"]
del -> outerr [label="No" color="Red" fontcolor="Red"]
add -> chkadd [label="No"]
add -> outerr [label="Yes" color="Red" fontcolor="Red"]
mod -> outerr [label="No" color="Red" fontcolor="Red"]
mod -> chkmod [label="Yes"]
chkmod -> outerr [label="No" color="Red" fontcolor="Red"]
chkmod -> outmod [label="Yes"]
chkadd -> outerr [label="No" color="Red" fontcolor="Red"]
chkadd -> outadd [label="Yes"]
del -> outdel [label="Yes"]
del -> outerr [label="No" color="Red" fontcolor="Red"]
add -> outadd [label="Yes"]
add -> outerr [label="No" color="Red" fontcolor="Red"]
mod -> outerr [label="Yes" color="Red" fontcolor="Red"]
mod -> outmod [label="No"]
}
@enddot
}
@enddot
For examples of usage, check out the mp_stackdiffs.test.sas program.
@@ -158,13 +186,17 @@
<h4> SAS Macros </h4>
@li mf_existvarlist.sas
@li mf_getquotedstr.sas
@li mf_getuniquename.sas
@li mf_islibds.sas
@li mf_nobs.sas
@li mp_abort.sas
<h4> Related Macros </h4>
@li mp_coretable.sas
@li mp_stackdiffs.test.sas
@li mp_storediffs.sas
@version 9.2
@@ -202,22 +234,189 @@
,mac=&sysmacroname
,msg=%str(Missing key variables!)
)
%mp_abort(iftrue= (
%mf_existVarList(&auditlibds,LIBREF DSN MOVE_TYPE KEY_HASH TGTVAR_NM IS_PK
IS_DIFF TGTVAR_TYPE OLDVAL_NUM NEWVAL_NUM OLDVAL_CHAR NEWVAL_CHAR)=0
)
,mac=&sysmacroname
,msg=%str(Input &auditlibds is missing required columns!)
)
/* set up unique and temporary vars */
%local ds1 ds2 ds3 ds4 hashkey inds_auto inds_keep dslist;
%let ds1=%upcase(work.%mf_getuniquename(prefix=mpsd_ds1));
%let ds2=%upcase(work.%mf_getuniquename(prefix=mpsd_ds2));
%let ds3=%upcase(work.%mf_getuniquename(prefix=mpsd_ds3));
%let ds4=%upcase(work.%mf_getuniquename(prefix=mpsd_ds4));
%let hashkey=%upcase(%mf_getuniquename(prefix=mpsd_hashkey));
%let inds_auto=%upcase(%mf_getuniquename(prefix=mpsd_inds_auto));
%let inds_keep=%upcase(%mf_getuniquename(prefix=mpsd_inds_keep));
%local prefix dslist x var keyjoin commakey keepvars;
%let prefix=%substr(%mf_getuniquename(),1,25);
%let dslist=ds1d ds2d ds3d ds1a ds2a ds3a ds1m ds2m ds3m pks dups base
delrec delerr;
%do x=1 %to %sysfunc(countw(&dslist));
%let var=%scan(&dslist,&x);
%local &var;
%let &var=%upcase(&prefix._&var);
%end;
%let keyjoin=1=1;
%do x=1 %to %sysfunc(countw(&key));
%let var=%scan(&key,&x);
%let keyjoin=&keyjoin and a.&var=b.&var;
%end;
%let commakey=%mf_getquotedstr(&key,quote=N);
data &errds;
length pk_vars $256 pk_vals $4098 err_msg $512;
call missing (of _all_);
stop;
run;
/**
* Prepare DELETE table
* Records are in the OLDVAL_xxx columns
*/
%let keepvars=MOVE_TYPE KEY_HASH TGTVAR_NM TGTVAR_TYPE IS_PK
OLDVAL_NUM OLDVAL_CHAR
NEWVAL_NUM NEWVAL_CHAR;
proc sort data=&auditlibds(where=(move_type='D') keep=&keepvars)
out=&ds1d(drop=move_type);
by KEY_HASH TGTVAR_NM;
run;
proc transpose data=&ds1d(where=(tgtvar_type='N'))
out=&ds2d(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var OLDVAL_NUM;
run;
proc transpose data=&ds1d(where=(tgtvar_type='C'))
out=&ds3d(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var OLDVAL_CHAR;
run;
data &outdel;
set &ds2d;
set &ds3d;
run;
proc sort;
by &key;
run;
/**
* Prepare APPEND table
* Records are in the NEWVAL_xxx columns
*/
proc sort data=&auditlibds(where=(move_type='A') keep=&keepvars)
out=&ds1a(drop=move_type);
by KEY_HASH TGTVAR_NM;
run;
proc transpose data=&ds1a(where=(tgtvar_type='N'))
out=&ds2a(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var NEWVAL_NUM;
run;
proc transpose data=&ds1a(where=(tgtvar_type='C'))
out=&ds3a(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var NEWVAL_CHAR;
run;
data &outadd;
set &ds2a;
set &ds3a;
run;
/**
* Prepare MODIFY table
* Keep only primary key - will add modified values later
*/
proc sort data=&auditlibds(
where=(move_type='M' and is_pk=1) keep=&keepvars
) out=&ds1m(drop=move_type);
by KEY_HASH TGTVAR_NM;
run;
proc transpose data=&ds1m(where=(tgtvar_type='N'))
out=&ds2m(drop=_name_);
by KEY_HASH ;
id TGTVAR_NM;
var NEWVAL_NUM;
run;
proc transpose data=&ds1m(where=(tgtvar_type='C'))
out=&ds3m(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var NEWVAL_CHAR;
run;
data &outmod;
set &ds2m;
set &ds3m;
run;
/**
* Extract matching records from the base table
* Do this in one join for efficiency.
* At a later date, this should be optimised for large database tables by using
* passthrough and a temporary table.
*/
data &pks;
set &outadd &outmod &outdel;
keep &key;
run;
proc sort noduprec dupout=&dups;
by &key;
run;
%mp_abort(iftrue= (%mf_nobs(&dups) ne 0)
,mac=&sysmacroname
,msg=%str(duplicates (%mf_nobs(&dups)) found on &auditlibds!)
)
proc sql;
create table &base as
select a.*
from &baselibds a, &pks b
where &keyjoin;
/**
* delete check
* This is straightforward as it relates to records only
*/
proc sql;
create table &delrec as
select a.*
from &outdel a
left join &base b
on &keyjoin
where a.%scan(&key,1) is null
order by &commakey;
data &delerr;
if 0 then set &errds;
set &delrec;
PK_VARS="&key";
PK_VALS=catx('/',&commakey);
ERR_MSG="Rows cannot be deleted as they do not exist on the Base dataset";
keep PK_VARS PK_VALS ERR_MSG;
run;
proc append base=&errds data=&delerr;
run;
data &outdel;
merge &outdel (in=a) &delrec (in=b);
by &key;
if not b;
run;
/*
LIBREF DSN MOVE_TYPE TGTVAR_NM IS_PK IS_DIFF
TGTVAR_TYPE OLDVAL_NUM NEWVAL_NUM OLDVAL_CHAR NEWVAL_CHAR
*/
%let key=%upcase(&key);
%if &mdebug=0 %then %do;
proc sql;
drop table &ds1, &ds2, &ds3, &ds4;
proc datasets lib=work;
delete &prefix:;
run;
%end;
%mend mp_stackdiffs;

View File

@@ -40,6 +40,7 @@
"tests/viyaonly"
],
"programFolders": [],
"binaryFolders": [],
"deployConfig": {
"deployServicePack": true,
"deployScripts": []

View File

@@ -6,6 +6,7 @@
@li mp_assert.sas
@li mp_assertcolvals.sas
@li mp_assertdsobs.sas
@li mp_assertscope.sas
@li mp_stackdiffs.sas
@li mp_storediffs.sas
@@ -16,7 +17,7 @@
data work.orig work.deleted work.changed work.appended;
set sashelp.electric;
if _n_ le 10 then do;
output work.orig work.deleted;
output work.deleted;
end;
else if _n_ le 20 then do;
output work.orig;
@@ -38,4 +39,52 @@ run;
,mdebug=1
)
/* now, stack it back */
%mp_assertscope(SNAPSHOT)
/**
* Deletions test - where record does not exist
*/
%mp_stackdiffs(work.orig
,work.final
,CUSTOMER YEAR
,mdebug=1
,errds=work.errds1
,outmod=work.mod1
,outadd=work.add1
,outdel=work.del1
)
%mp_assertdsobs(work.errds1,
desc=Delete1 - no errors,
test=EQUALS 0
)
%mp_assertdsobs(work.del1,
desc=Delete1 - records populated,
test=EQUALS 10
)
/**
* Deletions test - where record DOES exist
*/
data work.orig2;
set sashelp.electric;
if _n_ le 10;
run;
%mp_stackdiffs(work.orig2
,work.final
,CUSTOMER YEAR
,mdebug=1
,errds=work.errds2
,outmod=work.mod2
,outadd=work.add2
,outdel=work.del2
)
%mp_assertdsobs(work.errds2,
desc=Delete1 - has errors,
test=EQUALS 10
)
%mp_assertdsobs(work.del1,
desc=Delete1 - records not populated,
test=EQUALS 0
)
%mp_assertscope(COMPARE,Desc=MacVar Scope Check)