From 0fd1e470e8a03f2dd581ebe3d0df2c27f10959f1 Mon Sep 17 00:00:00 2001 From: munja Date: Fri, 14 Jan 2022 20:32:03 +0100 Subject: [PATCH] feat: initial header for mp_stackdiffs. Introduces a dependency on DOT (graphviz) for doc generation. --- base/mp_coretable.sas | 28 +++++- base/mp_stackdiffs.sas | 224 +++++++++++++++++++++++++++++++++++++++++ sasjs/doxy/Doxyfile | 1 + 3 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 base/mp_stackdiffs.sas diff --git a/base/mp_coretable.sas b/base/mp_coretable.sas index d0eca56..1de4c43 100644 --- a/base/mp_coretable.sas +++ b/base/mp_coretable.sas @@ -10,6 +10,8 @@ %mp_coretable(LOCKTABLE,libds=work.locktable) @param [in] table_ref The type of table to create. Example values: + @li DIFFTABLE - Used to store changes to tables. Used by mp_storediffs.sas + and mp_stackdiffs.sas @li FILTER_DETAIL - For storing detailed filter values. Used by mp_filterstore.sas. @li FILTER_SUMMARY - For storing summary filter values. Used by @@ -25,6 +27,8 @@ @li mp_filterstore.sas @li mp_lockanytable.sas @li mp_retainedkey.sas + @li mp_storediffs.sas + @li mp_stackdiffs.sas @version 9.2 @author Allan Bowe @@ -36,7 +40,29 @@ %local outds ; %let outds=%sysfunc(ifc(&libds=0,_data_,&libds)); proc sql; -%if &table_ref=LOCKTABLE %then %do; +%if &table_ref=DIFFTABLE %then %do; + create table &outds( + load_ref char(36) label='unique load reference', + processed_dttm num format=E8601DT26.6 label='Processed at timestamp', + libref char(8) label='Library Reference (8 chars)', + dsn char(32) label='Dataset Name (32 chars)', + key_hash char(32) label= + 'MD5 Hash of primary key values (pipe seperated)', + move_type char(1) label='Either (A)ppended, (D)eleted or (M)odified', + is_pk num label='Is Primary Key Field? (1/0)', + is_diff num label= + 'Did value change? (1/0/-1). Always -1 for appends and deletes.', + tgtvar_type char(1) label='Either (C)haracter or (N)umeric', + tgtvar_nm char(32) label='Target variable name (32 chars)', + oldval_num num format=best32. label='Old (numeric) value', + newval_num num format=best32. label='New (numeric) value', + oldval_char char(32765) label='Old (character) value', + newval_char char(32765) label='New (character) value', + constraint pk_mpe_audit + primary key(load_ref,libref,dsn,key_hash,tgtvar_nm) + ); +%end; +%else %if &table_ref=LOCKTABLE %then %do; create table &outds( lock_lib char(8), lock_ds char(32), diff --git a/base/mp_stackdiffs.sas b/base/mp_stackdiffs.sas new file mode 100644 index 0000000..a6da2be --- /dev/null +++ b/base/mp_stackdiffs.sas @@ -0,0 +1,224 @@ +/** + @file + @brief Prepares an audit table for stacking (re-applying) the changes. + @details WORK IN PROGRESS!! + + When the underlying data from a Base Table is refreshed, it can be helpful + to have any previously-applied changes, re-applied. + + Such situation might arise if you are applying those changes using a tool + like [Data Controller for SASĀ®](https://datacontroller.io) - which records + all such changes in an audit table. + It may also apply if you are preparing a series of specific cell-level + transactions, that you would like to apply to multiple sets of (similarly + structured) Base Tables. + + In both cases, it is necessary that the transactions are stored using + the mp_storediffs.sas macro, or at least that the underlying table is + structured as per the definition in mp_coretable.sas (DIFFTABLE entry) + + This macro is used to convert the stored changes (tall format) into + staged changes (wide format), with base table values incorporated (in the + case of modified rows), ready for the subsequent load process. + + Essentially then, what this macro does, is turn a table like this: + + |LIBREF:$8.|DSN:$32.|MOVE_TYPE:$1.|TGTVAR_NM:$32.|IS_PK:best.|IS_DIFF:best.|TGTVAR_TYPE:$1.|OLDVAL_NUM:best32.|NEWVAL_NUM:best32.|OLDVAL_CHAR:$32765.|NEWVAL_CHAR:$32765.| + |---|---|---|---|---|---|---|---|---|---|---| + |`SASHELP `|`CLASS `|`A `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|` `|`Newbie `| + |`SASHELP `|`CLASS `|`A `|`AGE `|`0 `|`-1 `|`N `|`. `|`13 `|` `|` `| + |`SASHELP `|`CLASS `|`A `|`HEIGHT `|`0 `|`-1 `|`N `|`. `|`65.3 `|` `|` `| + |`SASHELP `|`CLASS `|`A `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|` `|`F `| + |`SASHELP `|`CLASS `|`A `|`WEIGHT `|`0 `|`-1 `|`N `|`. `|`98 `|` `|` `| + |`SASHELP `|`CLASS `|`D `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|`Alfred `|` `| + |`SASHELP `|`CLASS `|`D `|`AGE `|`0 `|`-1 `|`N `|`14 `|`. `|` `|` `| + |`SASHELP `|`CLASS `|`D `|`HEIGHT `|`0 `|`-1 `|`N `|`69 `|`. `|` `|` `| + |`SASHELP `|`CLASS `|`D `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|`M `|` `| + |`SASHELP `|`CLASS `|`D `|`WEIGHT `|`0 `|`-1 `|`N `|`112.5 `|`. `|` `|` `| + |`SASHELP `|`CLASS `|`M `|`NAME `|`1 `|`0 `|`C `|`. `|`. `|`Alice `|`Alice `| + |`SASHELP `|`CLASS `|`M `|`AGE `|`0 `|`1 `|`N `|`13 `|`99 `|` `|` `| + |`SASHELP `|`CLASS `|`M `|`HEIGHT `|`0 `|`0 `|`N `|`56.5 `|`56.5 `|` `|` `| + |`SASHELP `|`CLASS `|`M `|`SEX `|`0 `|`0 `|`C `|`. `|`. `|`F `|`F `| + |`SASHELP `|`CLASS `|`M `|`WEIGHT `|`0 `|`0 `|`N `|`84 `|`84 `|` `|` `| + + Into three tables like this: + + `work.outmod`: + |NAME:$8.|SEX:$1.|AGE:best.|HEIGHT:best.|WEIGHT:best.| + |---|---|---|---|---| + |`Alice `|`F `|`99 `|`56.5 `|`84 `| + + `work.outadd`: + |NAME:$8.|SEX:$1.|AGE:best.|HEIGHT:best.|WEIGHT:best.| + |---|---|---|---|---| + |`Newbie `|`F `|`13 `|`65.3 `|`98 `| + + `work.outdel`: + |NAME:$8.|SEX:$1.|AGE:best.|HEIGHT:best.|WEIGHT:best.| + |---|---|---|---|---| + |`Alfred `|`M `|`14 `|`69 `|`112.5 `| + + As you might expect, there are a bunch of extra features and checks. + + The macro supports both SCD2 (TXTEMPORAL) and UPDATE loadtypes. If the + base table contains a PROCESSED_DTTM column (or similar), this can be + ignored by declaring it in the `processed_dttm_var` parameter. + + If the structure of the Base Table has changed, the following rules apply: + + @li New Columns - Irrelevant for deletes. For inserts, the new column + values are set to missing. For updates, the base table values are used. + @li Deleted Columns - These will also be absent in the output tables. + @li Change in Primary Key - This is not (well, cannot be) supported!! + + Rules for stacking changes are as follows: + + + + + + + + + + + + + + + +
Transaction TypeBehaviour
Deletes + For previously deleted rows, the PK is added to the `outDEL` table
+ If the row no longer exists in the base table, the row is added to the + `errDS` table instead. +
Inserts + Previously newly added rows are added to the `outADD` table UNLESS they + are present in the Base table.
In this case they are added to the + `errDS` table instead. +
Updates + Previously modified rows are merged with base table values such that + only the individual cells that were _previously_ changed are re-applied. + Where the row contains cells that were not marked as having changed in + the prior transaction, the 'blanks' are filled with base table values in + the `outMOD` table.
+ If the row no longer exists on the base table, then the row is added to + the `errDS` table instead. +
+ + To illustrate the above with a diagram: + + @dot + digraph { + rankdir="TB" + + start[label="Transaction Type?" shape=Mdiamond] + del[label="Base Row Exists?" shape=rectangle] + add [label="Base Row Exists?" shape=rectangle] + mod [label="Base Row Exists?" shape=rectangle] + outmod [label="outMOD Table" shape=box3d] + outadd [label="outADD Table" shape=box3d] + outdel [label="outDEL Table" shape=box3d] + outerr [label="ErrDS Table" shape=box3d] + start -> del [label="Delete"] + start -> add [label="Insert"] + start -> mod [label="Update"] + + del -> outdel [label="Yes"] + del -> outerr [label="No" color="Red" fontcolor="Red"] + add -> outadd [label="Yes"] + add -> outerr [label="No" color="Red" fontcolor="Red"] + mod -> outerr [label="Yes" color="Red" fontcolor="Red"] + mod -> outmod [label="No"] + + } + @enddot + + For examples of usage, check out the mp_stackdiffs.test.sas program. + + + @param [in] baselibds Base Table against which the changes will be applied, + in libref.dataset format. + @param [in] auditlibds Dataset with previously applied transactions, to be + re-applied. Use libref.dataset format. + DDL as follows: %mp_coretable(DIFFTABLE) + @param [in] key Space seperated list of key variables + @param [in] mdebug= Set to 1 to enable DEBUG messages and preserve outputs + @param [in] processed_dttm_var= (0) If a variable is being used to mark + the processed datetime, put the name of the variable here. It will NOT + be included in the staged dataset (the load process is expected to + provide this) + @param [out] errds= (work.errds) Output table containing problematic records. + The columns of this table are: + @li PK_VARS - Space separated list of primary key variable names + @li PK_VALS - Slash separted list of PK variable values + @li ERR_MSG - Explanation of why this record is problematic + @param [out] outmod= (work.outmod) Output table containing modified records + @param [out] outadd= (work.outadd) Output table containing additional records + @param [out] outdel= (work.outdel) Output table containing deleted records + + +

SAS Macros

+ @li mf_getuniquename.sas + @li mf_islibds.sas + @li mp_abort.sas + + +

Related Macros

+ @li mp_coretable.sas + @li mp_storediffs.sas + + @version 9.2 + @author Allan Bowe +**/ +/** @cond */ + +%macro mp_stackdiffs(baselibds + ,auditlibds + ,key + ,mdebug=0 + ,processed_dttm_var=0 + ,errds=work.errds + ,outmod=work.outmod + ,outadd=work.outadd + ,outdel=work.outdel +)/*/STORE SOURCE*/; +%local dbg; +%if &mdebug=1 %then %do; + %put &sysmacroname entry vars:; + %put _local_; +%end; +%else %let dbg=*; + +/* input parameter validations */ +%mp_abort(iftrue= (%mf_islibds(&baselibds) ne 1) + ,mac=&sysmacroname + ,msg=%str(Invalid baselibds: &baselibds) +) +%mp_abort(iftrue= (%mf_islibds(&auditlibds) ne 1) + ,mac=&sysmacroname + ,msg=%str(Invalid auditlibds: &auditlibds) +) +%mp_abort(iftrue= (%length(&key)=0) + ,mac=&sysmacroname + ,msg=%str(Missing key variables!) +) + +/* set up unique and temporary vars */ +%local ds1 ds2 ds3 ds4 hashkey inds_auto inds_keep dslist; +%let ds1=%upcase(work.%mf_getuniquename(prefix=mpsd_ds1)); +%let ds2=%upcase(work.%mf_getuniquename(prefix=mpsd_ds2)); +%let ds3=%upcase(work.%mf_getuniquename(prefix=mpsd_ds3)); +%let ds4=%upcase(work.%mf_getuniquename(prefix=mpsd_ds4)); +%let hashkey=%upcase(%mf_getuniquename(prefix=mpsd_hashkey)); +%let inds_auto=%upcase(%mf_getuniquename(prefix=mpsd_inds_auto)); +%let inds_keep=%upcase(%mf_getuniquename(prefix=mpsd_inds_keep)); + +%let key=%upcase(&key); + +%if &mdebug=0 %then %do; + proc sql; + drop table &ds1, &ds2, &ds3, &ds4; +%end; + +%mend mp_stackdiffs; +/** @endcond */ \ No newline at end of file diff --git a/sasjs/doxy/Doxyfile b/sasjs/doxy/Doxyfile index fcc2bd9..8e5b066 100644 --- a/sasjs/doxy/Doxyfile +++ b/sasjs/doxy/Doxyfile @@ -8,6 +8,7 @@ FILE_PATTERNS = *.sas \ *.dox GENERATE_LATEX = NO GENERATE_TREEVIEW = YES +HAVE_DOT = YES HIDE_FRIEND_COMPOUNDS = YES HIDE_IN_BODY_DOCS = YES HIDE_SCOPE_NAMES = YES