1
0
mirror of https://github.com/sasjs/core.git synced 2026-01-06 17:10:05 +00:00

feat: adding delete capability (and tests) for mp_stackdiffs

This commit is contained in:
munja
2022-01-19 22:05:56 +01:00
parent 75f712a305
commit bdd22abc55
4 changed files with 311 additions and 64 deletions

View File

@@ -59,9 +59,7 @@
/* declare local vars */ /* declare local vars */
%local var vars vcnt i j k l tmpvar tmpds rows posspks ppkcnt; %local var vars vcnt i j k l tmpvar tmpds rows posspks ppkcnt;
%let vars=%upcase(%mf_getvarlist(&baseds)); %let vars=%upcase(%mf_getvarlist(&baseds));
%put &=vars;
%let vars=%mf_wordsInStr1ButNotStr2(str1=&vars,str2=%upcase(&ignore_cols)); %let vars=%mf_wordsInStr1ButNotStr2(str1=&vars,str2=%upcase(&ignore_cols));
%put &=vars;
%let vcnt=%sysfunc(countw(&vars)); %let vcnt=%sysfunc(countw(&vars));
%if &vcnt=0 %then %do; %if &vcnt=0 %then %do;

View File

@@ -23,23 +23,23 @@
Essentially then, what this macro does, is turn a table like this: Essentially then, what this macro does, is turn a table like this:
|LIBREF:$8.|DSN:$32.|MOVE_TYPE:$1.|TGTVAR_NM:$32.|IS_PK:best.|IS_DIFF:best.|TGTVAR_TYPE:$1.|OLDVAL_NUM:best32.|NEWVAL_NUM:best32.|OLDVAL_CHAR:$32765.|NEWVAL_CHAR:$32765.| |MOVE_TYPE:$1.|TGTVAR_NM:$32.|IS_PK:best.|IS_DIFF:best.|TGTVAR_TYPE:$1.|OLDVAL_NUM:best32.|NEWVAL_NUM:best32.|OLDVAL_CHAR:$32765.|NEWVAL_CHAR:$32765.|
|---|---|---|---|---|---|---|---|---|---|---| |---|---|---|---|---|---|---|---|---|
|`SASHELP `|`CLASS `|`A `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|` `|`Newbie `| |`A `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|` `|`Newbie `|
|`SASHELP `|`CLASS `|`A `|`AGE `|`0 `|`-1 `|`N `|`. `|`13 `|` `|` `| |`A `|`AGE `|`0 `|`-1 `|`N `|`. `|`13 `|` `|` `|
|`SASHELP `|`CLASS `|`A `|`HEIGHT `|`0 `|`-1 `|`N `|`. `|`65.3 `|` `|` `| |`A `|`HEIGHT `|`0 `|`-1 `|`N `|`. `|`65.3 `|` `|` `|
|`SASHELP `|`CLASS `|`A `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|` `|`F `| |`A `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|` `|`F `|
|`SASHELP `|`CLASS `|`A `|`WEIGHT `|`0 `|`-1 `|`N `|`. `|`98 `|` `|` `| |`A `|`WEIGHT `|`0 `|`-1 `|`N `|`. `|`98 `|` `|` `|
|`SASHELP `|`CLASS `|`D `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|`Alfred `|` `| |`D `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|`Alfred `|` `|
|`SASHELP `|`CLASS `|`D `|`AGE `|`0 `|`-1 `|`N `|`14 `|`. `|` `|` `| |`D `|`AGE `|`0 `|`-1 `|`N `|`14 `|`. `|` `|` `|
|`SASHELP `|`CLASS `|`D `|`HEIGHT `|`0 `|`-1 `|`N `|`69 `|`. `|` `|` `| |`D `|`HEIGHT `|`0 `|`-1 `|`N `|`69 `|`. `|` `|` `|
|`SASHELP `|`CLASS `|`D `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|`M `|` `| |`D `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|`M `|` `|
|`SASHELP `|`CLASS `|`D `|`WEIGHT `|`0 `|`-1 `|`N `|`112.5 `|`. `|` `|` `| |`D `|`WEIGHT `|`0 `|`-1 `|`N `|`112.5 `|`. `|` `|` `|
|`SASHELP `|`CLASS `|`M `|`NAME `|`1 `|`0 `|`C `|`. `|`. `|`Alice `|`Alice `| |`M `|`NAME `|`1 `|`0 `|`C `|`. `|`. `|`Alice `|`Alice `|
|`SASHELP `|`CLASS `|`M `|`AGE `|`0 `|`1 `|`N `|`13 `|`99 `|` `|` `| |`M `|`AGE `|`0 `|`1 `|`N `|`13 `|`99 `|` `|` `|
|`SASHELP `|`CLASS `|`M `|`HEIGHT `|`0 `|`0 `|`N `|`56.5 `|`56.5 `|` `|` `| |`M `|`HEIGHT `|`0 `|`0 `|`N `|`56.5 `|`56.5 `|` `|` `|
|`SASHELP `|`CLASS `|`M `|`SEX `|`0 `|`0 `|`C `|`. `|`. `|`F `|`F `| |`M `|`SEX `|`0 `|`0 `|`C `|`. `|`. `|`F `|`F `|
|`SASHELP `|`CLASS `|`M `|`WEIGHT `|`0 `|`0 `|`N `|`84 `|`84 `|` `|` `| |`M `|`WEIGHT `|`0 `|`0 `|`N `|`84 `|`84 `|` `|` `|
Into three tables like this: Into three tables like this:
@@ -64,23 +64,34 @@
base table contains a PROCESSED_DTTM column (or similar), this can be base table contains a PROCESSED_DTTM column (or similar), this can be
ignored by declaring it in the `processed_dttm_var` parameter. ignored by declaring it in the `processed_dttm_var` parameter.
If the structure of the Base Table has changed, the following rules apply: The macro is also flexible where columns have been added or removed from
the base table UNLESS there is a change to the primary key.
@li New Columns - Irrelevant for deletes. For inserts, the new column Changes to the primary key are NOT supported, and are likely to cause
values are set to missing. For updates, the base table values are used. unexpected results.
@li Deleted Columns - These will also be absent in the output tables.
@li Change in Primary Key - This is not (well, cannot be) supported!! The following pre-flight checks are made:
@li All primary key columns exist on the base table
@li There is no change in variable TYPE for any of the columns
@li There is no reduction in variable LENGTH below the max-length of the
supplied values
Rules for stacking changes are as follows: Rules for stacking changes are as follows:
<table> <table>
<tr><th>Transaction Type</th><th>Behaviour</th></tr> <tr>
<th>Transaction Type</th><th>Key Behaviour</th><th>Column Behaviour</th>
</tr>
<tr> <tr>
<td>Deletes</td> <td>Deletes</td>
<td> <td>
For previously deleted rows, the PK is added to the `outDEL` table<br> The row is added to `&outDEL.` UNLESS it no longer exists
If the row no longer exists in the base table, the row is added to the in the base table, in which case it is added to `&errDS.` instead.
`errDS` table instead. </td>
<td>
Deletes are unaffected by the addition or removal of non Primary-Key
columns.
</td> </td>
</tr> </tr>
<tr> <tr>
@@ -88,7 +99,13 @@
<td> <td>
Previously newly added rows are added to the `outADD` table UNLESS they Previously newly added rows are added to the `outADD` table UNLESS they
are present in the Base table.<br>In this case they are added to the are present in the Base table.<br>In this case they are added to the
`errDS` table instead. `&errDS.` table instead.
</td>
<td>
Inserts are unaffected by the addition of columns in the Base Table
(they are padded with blanks). Deleted columns are only a problem if
they appear on the previous insert - in which case the record is added
to `&errDS.`.
</td> </td>
</tr> </tr>
<tr> <tr>
@@ -102,36 +119,47 @@
If the row no longer exists on the base table, then the row is added to If the row no longer exists on the base table, then the row is added to
the `errDS` table instead. the `errDS` table instead.
</td> </td>
<td>
Updates are unaffected by the addition of columns in the Base Table -
the new cells are simply populated with Base Table values. Deleted
columns are only a problem if they relate to a modified cell
(`is_diff=1`) - in which case the record is added to `&errDS.`.
</td>
</tr> </tr>
</table> </table>
To illustrate the above with a diagram: To illustrate the above with a diagram:
@dot @dot
digraph { digraph {
rankdir="TB" rankdir="TB"
start[label="Transaction Type?" shape=Mdiamond]
del[label="Does Base Row exist?" shape=rectangle]
add [label="Does Base Row exist?" shape=rectangle]
mod [label="Does Base Row exist?" shape=rectangle]
chkmod [label="Do all modified\n(is_diff=1) cells exist?" shape=rectangle]
chkadd [label="Do all inserted cells exist?" shape=rectangle]
outmod [label="outMOD\nTable" shape=Msquare style=filled]
outadd [label="outADD\nTable" shape=Msquare style=filled]
outdel [label="outDEL\nTable" shape=Msquare style=filled]
outerr [label="ErrDS Table" shape=Msquare fillcolor=Orange style=filled]
start -> del [label="Delete"]
start -> add [label="Insert"]
start -> mod [label="Update"]
start[label="Transaction Type?" shape=Mdiamond] del -> outdel [label="Yes"]
del[label="Base Row Exists?" shape=rectangle] del -> outerr [label="No" color="Red" fontcolor="Red"]
add [label="Base Row Exists?" shape=rectangle] add -> chkadd [label="No"]
mod [label="Base Row Exists?" shape=rectangle] add -> outerr [label="Yes" color="Red" fontcolor="Red"]
outmod [label="outMOD Table" shape=box3d] mod -> outerr [label="No" color="Red" fontcolor="Red"]
outadd [label="outADD Table" shape=box3d] mod -> chkmod [label="Yes"]
outdel [label="outDEL Table" shape=box3d] chkmod -> outerr [label="No" color="Red" fontcolor="Red"]
outerr [label="ErrDS Table" shape=box3d] chkmod -> outmod [label="Yes"]
start -> del [label="Delete"] chkadd -> outerr [label="No" color="Red" fontcolor="Red"]
start -> add [label="Insert"] chkadd -> outadd [label="Yes"]
start -> mod [label="Update"]
del -> outdel [label="Yes"] }
del -> outerr [label="No" color="Red" fontcolor="Red"] @enddot
add -> outadd [label="Yes"]
add -> outerr [label="No" color="Red" fontcolor="Red"]
mod -> outerr [label="Yes" color="Red" fontcolor="Red"]
mod -> outmod [label="No"]
}
@enddot
For examples of usage, check out the mp_stackdiffs.test.sas program. For examples of usage, check out the mp_stackdiffs.test.sas program.
@@ -158,13 +186,17 @@
<h4> SAS Macros </h4> <h4> SAS Macros </h4>
@li mf_existvarlist.sas
@li mf_getquotedstr.sas
@li mf_getuniquename.sas @li mf_getuniquename.sas
@li mf_islibds.sas @li mf_islibds.sas
@li mf_nobs.sas
@li mp_abort.sas @li mp_abort.sas
<h4> Related Macros </h4> <h4> Related Macros </h4>
@li mp_coretable.sas @li mp_coretable.sas
@li mp_stackdiffs.test.sas
@li mp_storediffs.sas @li mp_storediffs.sas
@version 9.2 @version 9.2
@@ -202,22 +234,189 @@
,mac=&sysmacroname ,mac=&sysmacroname
,msg=%str(Missing key variables!) ,msg=%str(Missing key variables!)
) )
%mp_abort(iftrue= (
%mf_existVarList(&auditlibds,LIBREF DSN MOVE_TYPE KEY_HASH TGTVAR_NM IS_PK
IS_DIFF TGTVAR_TYPE OLDVAL_NUM NEWVAL_NUM OLDVAL_CHAR NEWVAL_CHAR)=0
)
,mac=&sysmacroname
,msg=%str(Input &auditlibds is missing required columns!)
)
/* set up unique and temporary vars */ /* set up unique and temporary vars */
%local ds1 ds2 ds3 ds4 hashkey inds_auto inds_keep dslist; %local prefix dslist x var keyjoin commakey keepvars;
%let ds1=%upcase(work.%mf_getuniquename(prefix=mpsd_ds1)); %let prefix=%substr(%mf_getuniquename(),1,25);
%let ds2=%upcase(work.%mf_getuniquename(prefix=mpsd_ds2)); %let dslist=ds1d ds2d ds3d ds1a ds2a ds3a ds1m ds2m ds3m pks dups base
%let ds3=%upcase(work.%mf_getuniquename(prefix=mpsd_ds3)); delrec delerr;
%let ds4=%upcase(work.%mf_getuniquename(prefix=mpsd_ds4)); %do x=1 %to %sysfunc(countw(&dslist));
%let hashkey=%upcase(%mf_getuniquename(prefix=mpsd_hashkey)); %let var=%scan(&dslist,&x);
%let inds_auto=%upcase(%mf_getuniquename(prefix=mpsd_inds_auto)); %local &var;
%let inds_keep=%upcase(%mf_getuniquename(prefix=mpsd_inds_keep)); %let &var=%upcase(&prefix._&var);
%end;
%let keyjoin=1=1;
%do x=1 %to %sysfunc(countw(&key));
%let var=%scan(&key,&x);
%let keyjoin=&keyjoin and a.&var=b.&var;
%end;
%let commakey=%mf_getquotedstr(&key,quote=N);
data &errds;
length pk_vars $256 pk_vals $4098 err_msg $512;
call missing (of _all_);
stop;
run;
/**
* Prepare DELETE table
* Records are in the OLDVAL_xxx columns
*/
%let keepvars=MOVE_TYPE KEY_HASH TGTVAR_NM TGTVAR_TYPE IS_PK
OLDVAL_NUM OLDVAL_CHAR
NEWVAL_NUM NEWVAL_CHAR;
proc sort data=&auditlibds(where=(move_type='D') keep=&keepvars)
out=&ds1d(drop=move_type);
by KEY_HASH TGTVAR_NM;
run;
proc transpose data=&ds1d(where=(tgtvar_type='N'))
out=&ds2d(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var OLDVAL_NUM;
run;
proc transpose data=&ds1d(where=(tgtvar_type='C'))
out=&ds3d(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var OLDVAL_CHAR;
run;
data &outdel;
set &ds2d;
set &ds3d;
run;
proc sort;
by &key;
run;
/**
* Prepare APPEND table
* Records are in the NEWVAL_xxx columns
*/
proc sort data=&auditlibds(where=(move_type='A') keep=&keepvars)
out=&ds1a(drop=move_type);
by KEY_HASH TGTVAR_NM;
run;
proc transpose data=&ds1a(where=(tgtvar_type='N'))
out=&ds2a(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var NEWVAL_NUM;
run;
proc transpose data=&ds1a(where=(tgtvar_type='C'))
out=&ds3a(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var NEWVAL_CHAR;
run;
data &outadd;
set &ds2a;
set &ds3a;
run;
/**
* Prepare MODIFY table
* Keep only primary key - will add modified values later
*/
proc sort data=&auditlibds(
where=(move_type='M' and is_pk=1) keep=&keepvars
) out=&ds1m(drop=move_type);
by KEY_HASH TGTVAR_NM;
run;
proc transpose data=&ds1m(where=(tgtvar_type='N'))
out=&ds2m(drop=_name_);
by KEY_HASH ;
id TGTVAR_NM;
var NEWVAL_NUM;
run;
proc transpose data=&ds1m(where=(tgtvar_type='C'))
out=&ds3m(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var NEWVAL_CHAR;
run;
data &outmod;
set &ds2m;
set &ds3m;
run;
/**
* Extract matching records from the base table
* Do this in one join for efficiency.
* At a later date, this should be optimised for large database tables by using
* passthrough and a temporary table.
*/
data &pks;
set &outadd &outmod &outdel;
keep &key;
run;
proc sort noduprec dupout=&dups;
by &key;
run;
%mp_abort(iftrue= (%mf_nobs(&dups) ne 0)
,mac=&sysmacroname
,msg=%str(duplicates (%mf_nobs(&dups)) found on &auditlibds!)
)
proc sql;
create table &base as
select a.*
from &baselibds a, &pks b
where &keyjoin;
/**
* delete check
* This is straightforward as it relates to records only
*/
proc sql;
create table &delrec as
select a.*
from &outdel a
left join &base b
on &keyjoin
where a.%scan(&key,1) is null
order by &commakey;
data &delerr;
if 0 then set &errds;
set &delrec;
PK_VARS="&key";
PK_VALS=catx('/',&commakey);
ERR_MSG="Rows cannot be deleted as they do not exist on the Base dataset";
keep PK_VARS PK_VALS ERR_MSG;
run;
proc append base=&errds data=&delerr;
run;
data &outdel;
merge &outdel (in=a) &delrec (in=b);
by &key;
if not b;
run;
/*
LIBREF DSN MOVE_TYPE TGTVAR_NM IS_PK IS_DIFF
TGTVAR_TYPE OLDVAL_NUM NEWVAL_NUM OLDVAL_CHAR NEWVAL_CHAR
*/
%let key=%upcase(&key); %let key=%upcase(&key);
%if &mdebug=0 %then %do; %if &mdebug=0 %then %do;
proc sql; proc datasets lib=work;
drop table &ds1, &ds2, &ds3, &ds4; delete &prefix:;
run;
%end; %end;
%mend mp_stackdiffs; %mend mp_stackdiffs;

View File

@@ -40,6 +40,7 @@
"tests/viyaonly" "tests/viyaonly"
], ],
"programFolders": [], "programFolders": [],
"binaryFolders": [],
"deployConfig": { "deployConfig": {
"deployServicePack": true, "deployServicePack": true,
"deployScripts": [] "deployScripts": []

View File

@@ -6,6 +6,7 @@
@li mp_assert.sas @li mp_assert.sas
@li mp_assertcolvals.sas @li mp_assertcolvals.sas
@li mp_assertdsobs.sas @li mp_assertdsobs.sas
@li mp_assertscope.sas
@li mp_stackdiffs.sas @li mp_stackdiffs.sas
@li mp_storediffs.sas @li mp_storediffs.sas
@@ -16,7 +17,7 @@
data work.orig work.deleted work.changed work.appended; data work.orig work.deleted work.changed work.appended;
set sashelp.electric; set sashelp.electric;
if _n_ le 10 then do; if _n_ le 10 then do;
output work.orig work.deleted; output work.deleted;
end; end;
else if _n_ le 20 then do; else if _n_ le 20 then do;
output work.orig; output work.orig;
@@ -38,4 +39,52 @@ run;
,mdebug=1 ,mdebug=1
) )
/* now, stack it back */ %mp_assertscope(SNAPSHOT)
/**
* Deletions test - where record does not exist
*/
%mp_stackdiffs(work.orig
,work.final
,CUSTOMER YEAR
,mdebug=1
,errds=work.errds1
,outmod=work.mod1
,outadd=work.add1
,outdel=work.del1
)
%mp_assertdsobs(work.errds1,
desc=Delete1 - no errors,
test=EQUALS 0
)
%mp_assertdsobs(work.del1,
desc=Delete1 - records populated,
test=EQUALS 10
)
/**
* Deletions test - where record DOES exist
*/
data work.orig2;
set sashelp.electric;
if _n_ le 10;
run;
%mp_stackdiffs(work.orig2
,work.final
,CUSTOMER YEAR
,mdebug=1
,errds=work.errds2
,outmod=work.mod2
,outadd=work.add2
,outdel=work.del2
)
%mp_assertdsobs(work.errds2,
desc=Delete1 - has errors,
test=EQUALS 10
)
%mp_assertdsobs(work.del1,
desc=Delete1 - records not populated,
test=EQUALS 0
)
%mp_assertscope(COMPARE,Desc=MacVar Scope Check)