diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index ec24915..3465960 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -12,7 +12,7 @@ This repository makes use of the [SASjs](https://sasjs.io) framework for code or * [VSCode](https://sasjs.io/windows/#vscode) - feature packed IDE for code editing (warning - highly effective!) * [GIT](https://sasjs.io/windows/#git) - a safety net you cannot (and should not) do without. -For generating the documentation (`sasjs doc`) it is also necessary to install [doxygen](https://www.doxygen.nl/manual/install.html). +For generating the documentation (`sasjs doc`) it is also necessary to install [doxygen](https://www.doxygen.nl/manual/install.html) and GraphViz (`sudo port install graphviz` on mac, or `sudo apt-get install graphviz` on Ubuntu). To get configured: diff --git a/.gitpod.dockerfile b/.gitpod.dockerfile index eb685b9..6b680a1 100644 --- a/.gitpod.dockerfile +++ b/.gitpod.dockerfile @@ -1,6 +1,6 @@ FROM gitpod/workspace-full RUN sudo apt-get update \ - && sudo apt-get install -y \ - doxygen \ + && sudo apt-get install -y doxygen \ + && sudo apt-get install -y graphviz \ && sudo rm -rf /var/lib/apt/lists/* diff --git a/.gitpod.yml b/.gitpod.yml index e6933a4..47c1990 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,5 +1,7 @@ tasks: - - init: nvm install --lts && npm i -g @sasjs/cli + - init: | + nvm install --lts + npm i -g @sasjs/cli image: file: .gitpod.dockerfile diff --git a/all.sas b/all.sas index dfc509b..4e9f2d3 100644 --- a/all.sas +++ b/all.sas @@ -67,7 +67,6 @@ options noquotelenmax;

Related Macros

@li mf_trimstr.sas - @li mf_wordsinstr1butnotstr2.sas @version 9.2 @author Allan Bowe @@ -243,11 +242,11 @@ https://github.com/yabwon/SAS_PACKAGES/blob/main/packages/baseplus.md#functionex /** @endcond *//** @file @brief Checks if a variable exists in a data set. - @details Returns 0 if the variable does NOT exist, and return the position of - the var if it does. - Usage: + @details Returns 0 if the variable does NOT exist, and the position of the var + if it does. + Usage: - %put %mf_existvar(work.someds, somevar) + %put %mf_existvar(work.someds, somevar) @param [in] libds 2 part dataset or view reference @param [in] var variable name @@ -534,18 +533,19 @@ https://github.com/yabwon/SAS_PACKAGES/blob/main/packages/baseplus.md#functionex %put %mf_getfilesize(fpath=C:\temp\myfile.txt); - or + or, provide a libds value as follows: data x;do x=1 to 100000;y=x;output;end;run; %put %mf_getfilesize(libds=work.x,format=yes); - gives: + Which gives: - 2mb + > 2mb + + @param [in] fpath= Full path and filename. Provide this OR the libds value. + @param [in] libds= (0) Library.dataset value (assumes library is BASE engine) + @param [in] format= (NO) Set to yes to apply sizekmg. format - @param fpath= full path and filename. Provide this OR the libds value. - @param libds= library.dataset value (assumes library is BASE engine) - @param format= set to yes to apply sizekmg. format @returns bytes @version 9.2 @@ -555,16 +555,32 @@ https://github.com/yabwon/SAS_PACKAGES/blob/main/packages/baseplus.md#functionex %macro mf_getfilesize(fpath=,libds=0,format=NO )/*/STORE SOURCE*/; - %if &libds ne 0 %then %do; - %let fpath=%sysfunc(pathname(%scan(&libds,1,.)))/%scan(&libds,2,.).sas7bdat; - %end; + %local rc fid fref bytes dsid lib vnum; - %local rc fid fref bytes; - %let rc=%sysfunc(filename(fref,&fpath)); - %let fid=%sysfunc(fopen(&fref)); - %let bytes=%sysfunc(finfo(&fid,File Size (bytes))); - %let rc=%sysfunc(fclose(&fid)); - %let rc=%sysfunc(filename(fref)); + %if &libds ne 0 %then %do; + %let libds=%upcase(&libds); + %if %index(&libds,.)=0 %then %let lib=WORK; + %else %let lib=%scan(&libds,1,.); + %let dsid=%sysfunc(open( + sashelp.vtable(where=(libname="&lib" and memname="%scan(&libds,-1,.)") + keep=libname memname filesize + ) + )); + %if (&dsid ^= 0) %then %do; + %let vnum=%sysfunc(varnum(&dsid,FILESIZE)); + %let rc=%sysfunc(fetch(&dsid)); + %let bytes=%sysfunc(getvarn(&dsid,&vnum)); + %let rc= %sysfunc(close(&dsid)); + %end; + %else %put &sysmacroname: &libds could not be opened! %sysfunc(sysmsg()); + %end; + %else %do; + %let rc=%sysfunc(filename(fref,&fpath)); + %let fid=%sysfunc(fopen(&fref)); + %let bytes=%sysfunc(finfo(&fid,File Size (bytes))); + %let rc=%sysfunc(fclose(&fid)); + %let rc=%sysfunc(filename(fref)); + %end; %if &format=NO %then %do; &bytes @@ -1881,7 +1897,6 @@ Usage: %local count_base count_extr i i2 extr_word base_word match outvar; %if %length(&str1)=0 or %length(&str2)=0 %then %do; - %put %str(WARN)ING: empty string provided!; %put base string (str1)= &str1; %put compare string (str2) = &str2; %return; @@ -1908,6 +1923,9 @@ Usage: @brief Returns words that are in string 1 but not in string 2 @details Compares two space separated strings and returns the words that are in the first but not in the second. + + Note - case sensitive! + Usage: %let x= %mf_wordsInStr1ButNotStr2( @@ -1918,10 +1936,8 @@ Usage: returns: > sss bram boo - @param str1= string containing words to extract - @param str2= used to compare with the extract string - - @warning CASE SENSITIVE! + @param [in] str1= string containing words to extract + @param [in] str2= used to compare with the extract string @version 9.2 @author Allan Bowe @@ -1935,7 +1951,6 @@ Usage: %local count_base count_extr i i2 extr_word base_word match outvar; %if %length(&str1)=0 or %length(&str2)=0 %then %do; - %put %str(WARN)ING: empty string provided!; %put base string (str1)= &str1; %put compare string (str2) = &str2; %return; @@ -2898,10 +2913,6 @@ run; %mp_assertdsobs(sashelp.class,test=ATMOST 20) %* pass if <21 obs present; -

SAS Macros

- @li mf_nobs.sas - @li mp_abort.sas - @param [in] inds input dataset to test for presence of observations @param [in] desc= (Testing observations) The user provided test description @@ -2919,6 +2930,11 @@ run; |---|---|---| |User Provided description|PASS|Dataset &inds has XX obs| +

SAS Macros

+ @li mf_getuniquename.sas + @li mf_nobs.sas + @li mp_abort.sas +

Related Macros

@li mp_assertcolvals.sas @li mp_assert.sas @@ -2935,9 +2951,10 @@ run; outds=work.test_results )/*/STORE SOURCE*/; - %local nobs; + %local nobs ds; %let nobs=%mf_nobs(&inds); %let test=%upcase(&test); + %let ds=%mf_getuniquename(prefix=mp_assertdsobs); %if %substr(&test.xxxxx,1,6)=EQUALS %then %do; %let val=%scan(&test,2,%str( )); @@ -2970,7 +2987,7 @@ run; ) %end; - data; + data &ds; length test_description $256 test_result $4 test_comments $256; test_description=symget('desc'); test_result='FAIL'; @@ -2996,9 +3013,6 @@ run; %end; run; - %local ds; - %let ds=&syslast; - proc append base=&outds data=&ds; run; @@ -3008,18 +3022,25 @@ run; %mend mp_assertdsobs;/** @file @brief Used to capture scope leakage of macro variables - @details A common 'difficult to detect' bug in macros is where a nested - macro over-writes variables in a higher level macro. + @details - This assertion takes a snapshot of the macro variables before and after - a macro invocation. This makes it easy to detect whether any macro - variables were modified or changed. + A common 'difficult to detect' bug in macros is where a nested macro + over-writes variables in a higher level macro. - Currently, the macro only checks for global scope variables. In the future - it may be extended to work at multiple levels of nesting. + This assertion takes a snapshot of the macro variables before and after + a macro invocation. Differences are captured in the `&outds` table. This + makes it easy to detect whether any macro variables were modified or + changed. - If you would like this feature, feel free to contribute / raise an issue / - engage the SASjs team directly. + The following variables are NOT tested (as they are known, global variables + used in SASjs): + + @li &sasjs_prefix._FUNCTIONS + + Global variables are initialised in mp_init.sas - which will also trigger + "strict mode" in your SAS session. Whilst this is a default in SASjs + produced apps, if you prefer not to use this mode, simply instantiate the + following variable to prevent the macro from running: `SASJS_PREFIX` Example usage: @@ -3031,12 +3052,17 @@ run; desc=Checking macro variables against previous snapshot ) + This macro is designed to work alongside `sasjs test` - for more information + about this facility, visit [cli.sasjs.io/test](https://cli.sasjs.io/test). + @param [in] action (SNAPSHOT) The action to take. Valid values: @li SNAPSHOT - take a copy of the current macro variables @li COMPARE - compare the current macro variables against previous values @param [in] scope= (GLOBAL) The scope of the variables to be checked. This corresponds to the values in the SCOPE column in `sashelp.vmacro`. @param [in] desc= (Testing scope leakage) The user provided test description + @param [in] ignorelist= Provide a list of macro variable names to ignore from + the comparison @param [in,out] scopeds= (work.mp_assertscope) The dataset to contain the scope snapshot @param [out] outds= (work.test_results) The output dataset to contain the @@ -3045,6 +3071,10 @@ run; |---|---|---| |User Provided description|PASS|No out of scope variables created or modified| +

SAS Macros

+ @li mf_getquotedstr.sas + @li mp_init.sas +

Related Macros

@li mp_assert.sas @li mp_assertcols.sas @@ -3061,9 +3091,18 @@ run; desc=Testing Scope Leakage, scope=GLOBAL, scopeds=work.mp_assertscope, + ignorelist=, outds=work.test_results )/*/STORE SOURCE*/; -%local ds test_result test_comments del add mod; +%local ds test_result test_comments del add mod ilist; +%let ilist=%upcase(&sasjs_prefix._FUNCTIONS &ignorelist); + +/** + * this sets up the global vars, it will also enter STRICT mode. If this + * behaviour is not desired, simply initiate the following global macro + * variable to prevent the macro from running: SASJS_PREFIX + */ +%mp_init() /* get current variables */ %if &action=SNAPSHOT %then %do; @@ -3071,7 +3110,7 @@ run; create table &scopeds as select name,offset,value from dictionary.macros - where scope="&scope" + where scope="&scope" and name not in (%mf_getquotedstr(&ilist)) order by name,offset; %end; %else %if &action=COMPARE %then %do; @@ -3080,7 +3119,7 @@ run; create table _data_ as select name,offset,value from dictionary.macros - where scope="&scope" + where scope="&scope" and name not in (%mf_getquotedstr(&ilist)) order by name,offset; %let ds=&syslast; @@ -3484,6 +3523,8 @@ run; %mp_coretable(LOCKTABLE,libds=work.locktable) @param [in] table_ref The type of table to create. Example values: + @li DIFFTABLE - Used to store changes to tables. Used by mp_storediffs.sas + and mp_stackdiffs.sas @li FILTER_DETAIL - For storing detailed filter values. Used by mp_filterstore.sas. @li FILTER_SUMMARY - For storing summary filter values. Used by @@ -3499,6 +3540,8 @@ run; @li mp_filterstore.sas @li mp_lockanytable.sas @li mp_retainedkey.sas + @li mp_storediffs.sas + @li mp_stackdiffs.sas @version 9.2 @author Allan Bowe @@ -3510,7 +3553,29 @@ run; %local outds ; %let outds=%sysfunc(ifc(&libds=0,_data_,&libds)); proc sql; -%if &table_ref=LOCKTABLE %then %do; +%if &table_ref=DIFFTABLE %then %do; + create table &outds( + load_ref char(36) label='unique load reference', + processed_dttm num format=E8601DT26.6 label='Processed at timestamp', + libref char(8) label='Library Reference (8 chars)', + dsn char(32) label='Dataset Name (32 chars)', + key_hash char(32) label= + 'MD5 Hash of primary key values (pipe seperated)', + move_type char(1) label='Either (A)ppended, (D)eleted or (M)odified', + is_pk num label='Is Primary Key Field? (1/0)', + is_diff num label= + 'Did value change? (1/0/-1). Always -1 for appends and deletes.', + tgtvar_type char(1) label='Either (C)haracter or (N)umeric', + tgtvar_nm char(32) label='Target variable name (32 chars)', + oldval_num num format=best32. label='Old (numeric) value', + newval_num num format=best32. label='New (numeric) value', + oldval_char char(32765) label='Old (character) value', + newval_char char(32765) label='New (character) value', + constraint pk_mpe_audit + primary key(load_ref,libref,dsn,key_hash,tgtvar_nm) + ); +%end; +%else %if &table_ref=LOCKTABLE %then %do; create table &outds( lock_lib char(8), lock_ds char(32), @@ -3763,10 +3828,6 @@ Usage: ,mac=&sysmacroname ,msg=%str(the BASEDS variable must be provided) ) -%mp_abort(iftrue=( &baseds=0 ) - ,mac=&sysmacroname - ,msg=%str(the BASEDS variable must be provided) -) %mp_abort(iftrue=( %mf_existds(&baseds)=0 ) ,mac=&sysmacroname ,msg=%str(the BASEDS dataset (&baseds) needs to be assigned, and to exist) @@ -5078,7 +5139,7 @@ run; %macro mp_ds2squeeze( libds, - outds=work.work.mp_ds2squeeze, + outds=work.mp_ds2squeeze, mdebug=0 )/*/STORE SOURCE*/; %local dbg source; @@ -5091,9 +5152,10 @@ run; %let source=/source2; %end; -%local optval ds fref; +%local optval ds fref startsize; %let ds=%mf_getuniquename(); %let fref=%mf_getuniquefileref(); +%let startsize=%mf_getfilesize(libds=&libds,format=yes); %mp_getmaxvarlengths(&libds,outds=&ds) @@ -5142,7 +5204,7 @@ options varlenchk=&optval; filename &fref clear; %end; -%put &sysmacroname: &libds was %mf_getfilesize(libds=&libds,format=yes); +%put &sysmacroname: &libds was &startsize; %put &sysmacroname: &outds is %mf_getfilesize(libds=&outds,format=yes); %mend mp_ds2squeeze;/** @@ -7255,16 +7317,21 @@ create table &outds as %inc mc; %mp_guesspk(sashelp.class,outds=classpks) - @param baseds The dataset to analyse - @param outds= The output dataset to contain the possible PKs - @param max_guesses= (3) The total number of possible primary keys to generate. - A table may have multiple unlikely PKs, so no need to list them all. - @param min_rows= (5) The minimum number of rows a table should have in order - to try and guess the PK. + @param [in] baseds The dataset to analyse + @param [out] outds= The output dataset to contain the possible PKs + @param [in] max_guesses= (3) The total number of possible primary keys to + generate. A table may have multiple (unlikely) PKs, so no need to list them + all. + @param [in] min_rows= (5) The minimum number of rows a table should have in + order to try and guess the PK. + @param [in] ignore_cols (0) Space seperated list of columns which you are + sure are not part of the primary key (helps to avoid false positives) + @param [in] mdebug= Set to 1 to enable DEBUG messages and preserve outputs

SAS Macros

@li mf_getvarlist.sas @li mf_getuniquename.sas + @li mf_wordsInstr1butnotstr2.sas @li mf_nobs.sas

Related Macros

@@ -7276,179 +7343,226 @@ create table &outds as **/ %macro mp_guesspk(baseds - ,outds=mp_guesspk - ,max_guesses=3 - ,min_rows=5 + ,outds=mp_guesspk + ,max_guesses=3 + ,min_rows=5 + ,ignore_cols=0 + ,mdebug=0 )/*/STORE SOURCE*/; +%local dbg; +%if &mdebug=1 %then %do; + %put &sysmacroname entry vars:; + %put _local_; +%end; +%else %let dbg=*; - /* declare local vars */ - %local var vars vcnt i j k l tmpvar tmpds rows posspks ppkcnt; - %let vars=%mf_getvarlist(&baseds); - %let vcnt=%sysfunc(countw(&vars)); +/* declare local vars */ +%local var vars vcnt i j k l tmpvar tmpds rows posspks ppkcnt; +%let vars=%upcase(%mf_getvarlist(&baseds)); +%let vars=%mf_wordsInStr1ButNotStr2(str1=&vars,str2=%upcase(&ignore_cols)); +%let vcnt=%sysfunc(countw(&vars)); - %if &vcnt=0 %then %do; - %put &sysmacroname: &baseds has no variables! Exiting.; - %return; +%if &vcnt=0 %then %do; + %put &sysmacroname: &baseds has no variables! Exiting.; + %return; +%end; + +/* get null count and row count */ +%let tmpvar=%mf_getuniquename(); +proc sql noprint; +create table _data_ as select + count(*) as &tmpvar +%do i=1 %to &vcnt; + %let var=%scan(&vars,&i); + ,sum(case when &var is missing then 1 else 0 end) as &var +%end; + from &baseds; + +/* transpose table and scan for not null cols */ +proc transpose; +data _null_; + set &syslast end=last; + length vars $32767; + retain vars ; + if _name_="&tmpvar" then call symputx('rows',col1,'l'); + else if col1=0 then vars=catx(' ',vars,_name_); + if last then call symputx('posspks',vars,'l'); +run; + +%let ppkcnt=%sysfunc(countw(&posspks)); +%if &ppkcnt=0 %then %do; + %put &sysmacroname: &baseds has no non-missing variables! Exiting.; + %return; +%end; + +proc sort data=&baseds(keep=&posspks) out=_data_ noduprec; + by _all_; +run; +%local pkds; %let pkds=&syslast; + +%if &rows > %mf_nobs(&pkds) %then %do; + %put &sysmacroname: &baseds has no combination of unique records! Exiting.; + %return; +%end; + +/* now check cardinality */ +proc sql noprint; +create table _data_ as select +%do i=1 %to &ppkcnt; + %let var=%scan(&posspks,&i); + count(distinct &var) as &var + %if &i<&ppkcnt %then ,; +%end; + from &pkds; + +/* transpose and sort by cardinality */ +proc transpose; +proc sort; by descending col1; +run; + +/* create initial PK list and re-order posspks list */ +data &outds(keep=pkguesses); + length pkguesses $5000 vars $5000; + set &syslast end=last; + retain vars ; + vars=catx(' ',vars,_name_); + if col1=&rows then do; + pkguesses=_name_; + output; + end; + if last then call symputx('posspks',vars,'l'); +run; + +%if %mf_nobs(&outds) ge &max_guesses %then %do; + %put &sysmacroname: %mf_nobs(&outds) possible primary key values found; + %return; +%end; + +%if &ppkcnt=1 %then %do; + %put &sysmacroname: No more PK guess possible; + %return; +%end; + +/* begin scanning for uniques on pairs of PKs */ +%let tmpds=%mf_getuniquename(); +%local lev1 lev2; +%do i=1 %to &ppkcnt; + %let lev1=%scan(&posspks,&i); + %do j=2 %to &ppkcnt; + %let lev2=%scan(&posspks,&j); + %if &lev1 ne &lev2 %then %do; + /* check for two level uniqueness */ + proc sort data=&pkds(keep=&lev1 &lev2) out=&tmpds noduprec; + by _all_; + run; + %if %mf_nobs(&tmpds)=&rows %then %do; + proc sql; + insert into &outds values("&lev1 &lev2"); + %if %mf_nobs(&outds) ge &max_guesses %then %do; + %put &sysmacroname: Max PKs reached at Level 2 for &baseds; + %goto exit; + %end; + %end; + %end; %end; +%end; - /* get null count and row count */ - %let tmpvar=%mf_getuniquename(); - proc sql noprint; - create table _data_ as select - count(*) as &tmpvar - %do i=1 %to &vcnt; - %let var=%scan(&vars,&i); - ,sum(case when &var is missing then 1 else 0 end) as &var - %end; - from &baseds; +%if &ppkcnt=2 %then %do; + %put &sysmacroname: No more PK guess possible; + %goto exit; +%end; - /* transpose table and scan for not null cols */ - proc transpose; - data _null_; - set &syslast end=last; - length vars $32767; - retain vars ; - if _name_="&tmpvar" then call symputx('rows',col1,'l'); - else if col1=0 then vars=catx(' ',vars,_name_); - if last then call symputx('posspks',vars,'l'); - run; - - %let ppkcnt=%sysfunc(countw(&posspks)); - %if &ppkcnt=0 %then %do; - %put &sysmacroname: &baseds has no non-missing variables! Exiting.; - %return; - %end; - - proc sort data=&baseds(keep=&posspks) out=_data_ noduprec; - by _all_; - run; - %local pkds; %let pkds=&syslast; - - %if &rows > %mf_nobs(&pkds) %then %do; - %put &sysmacroname: &baseds has no combination of unique records! Exiting.; - %return; - %end; - - /* now check cardinality */ - proc sql noprint; - create table _data_ as select - %do i=1 %to &ppkcnt; - %let var=%scan(&posspks,&i); - count(distinct &var) as &var - %if &i<&ppkcnt %then ,; - %end; - from &pkds; - - /* transpose and sort by cardinality */ - proc transpose; - proc sort; by descending col1; - run; - - /* create initial PK list and re-order posspks list */ - data &outds(keep=pkguesses); - length pkguesses $5000 vars $5000; - set &syslast end=last; - retain vars ; - vars=catx(' ',vars,_name_); - if col1=&rows then do; - pkguesses=_name_; - output; - end; - if last then call symputx('posspks',vars,'l'); - run; - - %if %mf_nobs(&outds) ge &max_guesses %then %do; - %put &sysmacroname: %mf_nobs(&outds) possible primary key values found; - %return; - %end; - - %if &ppkcnt=1 %then %do; - %put &sysmacroname: No more PK guess possible; - %return; - %end; - - /* begin scanning for uniques on pairs of PKs */ - %let tmpds=%mf_getuniquename(); - %local lev1 lev2; - %do i=1 %to &ppkcnt; - %let lev1=%scan(&posspks,&i); - %do j=2 %to &ppkcnt; - %let lev2=%scan(&posspks,&j); - %if &lev1 ne &lev2 %then %do; - /* check for two level uniqueness */ - proc sort data=&pkds(keep=&lev1 &lev2) out=&tmpds noduprec; +/* begin scanning for uniques on PK triplets */ +%local lev3; +%do i=1 %to &ppkcnt; + %let lev1=%scan(&posspks,&i); + %do j=2 %to &ppkcnt; + %let lev2=%scan(&posspks,&j); + %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; + %let lev3=%scan(&posspks,&k); + %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do; + /* check for three level uniqueness */ + proc sort data=&pkds(keep=&lev1 &lev2 &lev3) out=&tmpds noduprec; by _all_; run; %if %mf_nobs(&tmpds)=&rows %then %do; proc sql; - insert into &outds values("&lev1 &lev2"); + insert into &outds values("&lev1 &lev2 &lev3"); %if %mf_nobs(&outds) ge &max_guesses %then %do; - %put &sysmacroname: Max PKs reached at Level 2 for &baseds; - %return; + %put &sysmacroname: Max PKs reached at Level 3 for &baseds; + %goto exit; %end; %end; %end; %end; %end; +%end; - %if &ppkcnt=2 %then %do; - %put &sysmacroname: No more PK guess possible; - %return; - %end; +%if &ppkcnt=3 %then %do; + %put &sysmacroname: No more PK guess possible; + %goto exit; +%end; - /* begin scanning for uniques on PK triplets */ - %local lev3; - %do i=1 %to &ppkcnt; - %let lev1=%scan(&posspks,&i); - %do j=2 %to &ppkcnt; - %let lev2=%scan(&posspks,&j); - %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; - %let lev3=%scan(&posspks,&k); - %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do; - /* check for three level uniqueness */ - proc sort data=&pkds(keep=&lev1 &lev2 &lev3) out=&tmpds noduprec; +/* scan for uniques on up to 4 PK fields */ +%local lev4; +%do i=1 %to &ppkcnt; + %let lev1=%scan(&posspks,&i); + %do j=2 %to &ppkcnt; + %let lev2=%scan(&posspks,&j); + %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; + %let lev3=%scan(&posspks,&k); + %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do l=4 %to &ppkcnt; + %let lev4=%scan(&posspks,&l); + %if &lev1 ne &lev4 and &lev2 ne &lev4 and &lev3 ne &lev4 %then %do; + /* check for four level uniqueness */ + proc sort data=&pkds(keep=&lev1 &lev2 &lev3 &lev4) + out=&tmpds noduprec; by _all_; run; %if %mf_nobs(&tmpds)=&rows %then %do; proc sql; - insert into &outds values("&lev1 &lev2 &lev3"); + insert into &outds values("&lev1 &lev2 &lev3 &lev4"); %if %mf_nobs(&outds) ge &max_guesses %then %do; - %put &sysmacroname: Max PKs reached at Level 3 for &baseds; - %return; + %put &sysmacroname: Max PKs reached at Level 4 for &baseds; + %goto exit; %end; %end; %end; %end; %end; %end; +%end; - %if &ppkcnt=3 %then %do; - %put &sysmacroname: No more PK guess possible; - %return; - %end; +%if &ppkcnt=4 %then %do; + %put &sysmacroname: No more PK guess possible; + %goto exit; +%end; - /* scan for uniques on up to 4 PK fields */ - %local lev4; - %do i=1 %to &ppkcnt; - %let lev1=%scan(&posspks,&i); - %do j=2 %to &ppkcnt; - %let lev2=%scan(&posspks,&j); - %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; - %let lev3=%scan(&posspks,&k); - %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do l=4 %to &ppkcnt; - %let lev4=%scan(&posspks,&l); - %if &lev1 ne &lev4 and &lev2 ne &lev4 and &lev3 ne &lev4 %then %do; +/* scan for uniques on up to 4 PK fields */ +%local lev5 m; +%do i=1 %to &ppkcnt; + %let lev1=%scan(&posspks,&i); + %do j=2 %to &ppkcnt; + %let lev2=%scan(&posspks,&j); + %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; + %let lev3=%scan(&posspks,&k); + %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do l=4 %to &ppkcnt; + %let lev4=%scan(&posspks,&l); + %if &lev1 ne &lev4 and &lev2 ne &lev4 and &lev3 ne &lev4 %then + %do m=5 %to &ppkcnt; + %let lev5=%scan(&posspks,&m); + %if &lev1 ne &lev5 & &lev2 ne &lev5 & &lev3 ne &lev5 & &lev4 ne &lev5 %then %do; /* check for four level uniqueness */ - proc sort data=&pkds(keep=&lev1 &lev2 &lev3 &lev4) + proc sort data=&pkds(keep=&lev1 &lev2 &lev3 &lev4 &lev5) out=&tmpds noduprec; by _all_; run; %if %mf_nobs(&tmpds)=&rows %then %do; proc sql; - insert into &outds values("&lev1 &lev2 &lev3 &lev4"); + insert into &outds values("&lev1 &lev2 &lev3 &lev4 &lev5"); %if %mf_nobs(&outds) ge &max_guesses %then %do; - %put &sysmacroname: Max PKs reached at Level 4 for &baseds; - %return; + %put &sysmacroname: Max PKs reached at Level 5 for &baseds; + %goto exit; %end; %end; %end; @@ -7456,37 +7570,44 @@ create table &outds as %end; %end; %end; +%end; - %if &ppkcnt=4 %then %do; - %put &sysmacroname: No more PK guess possible; - %return; - %end; +%if &ppkcnt=5 %then %do; + %put &sysmacroname: No more PK guess possible; + %goto exit; +%end; - /* scan for uniques on up to 4 PK fields */ - %local lev5 m; - %do i=1 %to &ppkcnt; - %let lev1=%scan(&posspks,&i); - %do j=2 %to &ppkcnt; - %let lev2=%scan(&posspks,&j); - %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; - %let lev3=%scan(&posspks,&k); - %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do l=4 %to &ppkcnt; - %let lev4=%scan(&posspks,&l); - %if &lev1 ne &lev4 and &lev2 ne &lev4 and &lev3 ne &lev4 %then - %do m=5 %to &ppkcnt; - %let lev5=%scan(&posspks,&m); - %if &lev1 ne &lev5 & &lev2 ne &lev5 & &lev3 ne &lev5 & &lev4 ne &lev5 %then %do; +/* scan for uniques on up to 4 PK fields */ +%local lev6 n; +%do i=1 %to &ppkcnt; + %let lev1=%scan(&posspks,&i); + %do j=2 %to &ppkcnt; + %let lev2=%scan(&posspks,&j); + %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; + %let lev3=%scan(&posspks,&k); + %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do l=4 %to &ppkcnt; + %let lev4=%scan(&posspks,&l); + %if &lev1 ne &lev4 and &lev2 ne &lev4 and &lev3 ne &lev4 %then + %do m=5 %to &ppkcnt; + %let lev5=%scan(&posspks,&m); + %if &lev1 ne &lev5 & &lev2 ne &lev5 & &lev3 ne &lev5 & &lev4 ne &lev5 + %then %do n=6 %to &ppkcnt; + %let lev6=%scan(&posspks,&n); + %if &lev1 ne &lev6 & &lev2 ne &lev6 & &lev3 ne &lev6 + & &lev4 ne &lev6 & &lev5 ne &lev6 %then + %do; /* check for four level uniqueness */ - proc sort data=&pkds(keep=&lev1 &lev2 &lev3 &lev4 &lev5) - out=&tmpds noduprec; + proc sort data=&pkds(keep=&lev1 &lev2 &lev3 &lev4 &lev5 &lev6) + out=&tmpds noduprec; by _all_; run; %if %mf_nobs(&tmpds)=&rows %then %do; proc sql; - insert into &outds values("&lev1 &lev2 &lev3 &lev4 &lev5"); + insert into &outds + values("&lev1 &lev2 &lev3 &lev4 &lev5 &lev6"); %if %mf_nobs(&outds) ge &max_guesses %then %do; - %put &sysmacroname: Max PKs reached at Level 5 for &baseds; - %return; + %put &sysmacroname: Max PKs reached at Level 6 for &baseds; + %goto exit; %end; %end; %end; @@ -7495,57 +7616,18 @@ create table &outds as %end; %end; %end; +%end; - %if &ppkcnt=5 %then %do; - %put &sysmacroname: No more PK guess possible; - %return; - %end; +%if &ppkcnt=6 %then %do; + %put &sysmacroname: No more PK guess possible; + %goto exit; +%end; - /* scan for uniques on up to 4 PK fields */ - %local lev6 n; - %do i=1 %to &ppkcnt; - %let lev1=%scan(&posspks,&i); - %do j=2 %to &ppkcnt; - %let lev2=%scan(&posspks,&j); - %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; - %let lev3=%scan(&posspks,&k); - %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do l=4 %to &ppkcnt; - %let lev4=%scan(&posspks,&l); - %if &lev1 ne &lev4 and &lev2 ne &lev4 and &lev3 ne &lev4 %then - %do m=5 %to &ppkcnt; - %let lev5=%scan(&posspks,&m); - %if &lev1 ne &lev5 & &lev2 ne &lev5 & &lev3 ne &lev5 & &lev4 ne &lev5 %then - %do n=6 %to &ppkcnt; - %let lev6=%scan(&posspks,&n); - %if &lev1 ne &lev6 & &lev2 ne &lev6 & &lev3 ne &lev6 - & &lev4 ne &lev6 & &lev5 ne &lev6 %then - %do; - /* check for four level uniqueness */ - proc sort data=&pkds(keep=&lev1 &lev2 &lev3 &lev4 &lev5 &lev6) - out=&tmpds noduprec; - by _all_; - run; - %if %mf_nobs(&tmpds)=&rows %then %do; - proc sql; - insert into &outds - values("&lev1 &lev2 &lev3 &lev4 &lev5 &lev6"); - %if %mf_nobs(&outds) ge &max_guesses %then %do; - %put &sysmacroname: Max PKs reached at Level 6 for &baseds; - %return; - %end; - %end; - %end; - %end; - %end; - %end; - %end; - %end; - %end; - - %if &ppkcnt=6 %then %do; - %put &sysmacroname: No more PK guess possible; - %return; - %end; +%exit: +%if &mdebug=0 %then %do; + proc sql; + drop table &tmpds; +%end; %mend mp_guesspk;/** @file @@ -7771,6 +7853,7 @@ filename &tempref clear; %global SASJS_PREFIX /* the ONLY hard-coded global macro variable in SASjs */ + &prefix._FUNCTIONS /* used in mcf_init() to track core function compilation */ &prefix._INIT_NUM /* initialisation time as numeric */ &prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */ &prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */ @@ -9649,6 +9732,601 @@ run; %mend mp_sortinplace;/** + @file + @brief Prepares an audit table for stacking (re-applying) the changes. + @details WORK IN PROGRESS!! + + When the underlying data from a Base Table is refreshed, it can be helpful + to have any previously-applied changes, re-applied. + + Such situation might arise if you are applying those changes using a tool + like [Data Controller for SASĀ®](https://datacontroller.io) - which records + all such changes in an audit table. + It may also apply if you are preparing a series of specific cell-level + transactions, that you would like to apply to multiple sets of (similarly + structured) Base Tables. + + In both cases, it is necessary that the transactions are stored using + the mp_storediffs.sas macro, or at least that the underlying table is + structured as per the definition in mp_coretable.sas (DIFFTABLE entry) + + This macro is used to convert the stored changes (tall format) into + staged changes (wide format), with base table values incorporated (in the + case of modified rows), ready for the subsequent load process. + + Essentially then, what this macro does, is turn a table like this: + + |MOVE_TYPE:$1.|TGTVAR_NM:$32.|IS_PK:best.|IS_DIFF:best.|TGTVAR_TYPE:$1.|OLDVAL_NUM:best32.|NEWVAL_NUM:best32.|OLDVAL_CHAR:$32765.|NEWVAL_CHAR:$32765.| + |---|---|---|---|---|---|---|---|---| + |`A `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|` `|`Newbie `| + |`A `|`AGE `|`0 `|`-1 `|`N `|`. `|`13 `|` `|` `| + |`A `|`HEIGHT `|`0 `|`-1 `|`N `|`. `|`65.3 `|` `|` `| + |`A `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|` `|`F `| + |`A `|`WEIGHT `|`0 `|`-1 `|`N `|`. `|`98 `|` `|` `| + |`D `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|`Alfred `|` `| + |`D `|`AGE `|`0 `|`-1 `|`N `|`14 `|`. `|` `|` `| + |`D `|`HEIGHT `|`0 `|`-1 `|`N `|`69 `|`. `|` `|` `| + |`D `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|`M `|` `| + |`D `|`WEIGHT `|`0 `|`-1 `|`N `|`112.5 `|`. `|` `|` `| + |`M `|`NAME `|`1 `|`0 `|`C `|`. `|`. `|`Alice `|`Alice `| + |`M `|`AGE `|`0 `|`1 `|`N `|`13 `|`99 `|` `|` `| + |`M `|`HEIGHT `|`0 `|`0 `|`N `|`56.5 `|`56.5 `|` `|` `| + |`M `|`SEX `|`0 `|`0 `|`C `|`. `|`. `|`F `|`F `| + |`M `|`WEIGHT `|`0 `|`0 `|`N `|`84 `|`84 `|` `|` `| + + Into three tables like this: + + `work.outmod`: + |NAME:$8.|SEX:$1.|AGE:best.|HEIGHT:best.|WEIGHT:best.| + |---|---|---|---|---| + |`Alice `|`F `|`99 `|`56.5 `|`84 `| + + `work.outadd`: + |NAME:$8.|SEX:$1.|AGE:best.|HEIGHT:best.|WEIGHT:best.| + |---|---|---|---|---| + |`Newbie `|`F `|`13 `|`65.3 `|`98 `| + + `work.outdel`: + |NAME:$8.|SEX:$1.|AGE:best.|HEIGHT:best.|WEIGHT:best.| + |---|---|---|---|---| + |`Alfred `|`M `|`14 `|`69 `|`112.5 `| + + As you might expect, there are a bunch of extra features and checks. + + The macro supports both SCD2 (TXTEMPORAL) and UPDATE loadtypes. If the + base table contains a PROCESSED_DTTM column (or similar), this can be + ignored by declaring it in the `processed_dttm_var` parameter. + + The macro is also flexible where columns have been added or removed from + the base table UNLESS there is a change to the primary key. + + Changes to the primary key are NOT supported, and are likely to cause + unexpected results. + + The following pre-flight checks are made: + + @li All primary key columns exist on the base table + @li There is no change in variable TYPE for any of the columns + @li There is no reduction in variable LENGTH below the max-length of the + supplied values + + Rules for stacking changes are as follows: + + + + + + + + + + + + + + + + + + + + +
Transaction TypeKey BehaviourColumn Behaviour
Deletes + The row is added to `&outDEL.` UNLESS it no longer exists + in the base table, in which case it is added to `&errDS.` instead. + + Deletes are unaffected by the addition or removal of non Primary-Key + columns. +
Inserts + Previously newly added rows are added to the `outADD` table UNLESS they + are present in the Base table.
In this case they are added to the + `&errDS.` table instead. +
+ Inserts are unaffected by the addition of columns in the Base Table + (they are padded with blanks). Deleted columns are only a problem if + they appear on the previous insert - in which case the record is added + to `&errDS.`. +
Updates + Previously modified rows are merged with base table values such that + only the individual cells that were _previously_ changed are re-applied. + Where the row contains cells that were not marked as having changed in + the prior transaction, the 'blanks' are filled with base table values in + the `outMOD` table.
+ If the row no longer exists on the base table, then the row is added to + the `errDS` table instead. +
+ Updates are unaffected by the addition of columns in the Base Table - + the new cells are simply populated with Base Table values. Deleted + columns are only a problem if they relate to a modified cell + (`is_diff=1`) - in which case the record is added to `&errDS.`. +
+ + To illustrate the above with a diagram: + + @dot + digraph { + rankdir="TB" + start[label="Transaction Type?" shape=Mdiamond] + del[label="Does Base Row exist?" shape=rectangle] + add [label="Does Base Row exist?" shape=rectangle] + mod [label="Does Base Row exist?" shape=rectangle] + chkmod [label="Do all modified\n(is_diff=1) cells exist?" shape=rectangle] + chkadd [label="Do all inserted cells exist?" shape=rectangle] + outmod [label="outMOD\nTable" shape=Msquare style=filled] + outadd [label="outADD\nTable" shape=Msquare style=filled] + outdel [label="outDEL\nTable" shape=Msquare style=filled] + outerr [label="ErrDS Table" shape=Msquare fillcolor=Orange style=filled] + start -> del [label="Delete"] + start -> add [label="Insert"] + start -> mod [label="Update"] + + del -> outdel [label="Yes"] + del -> outerr [label="No" color="Red" fontcolor="Red"] + add -> chkadd [label="No"] + add -> outerr [label="Yes" color="Red" fontcolor="Red"] + mod -> outerr [label="No" color="Red" fontcolor="Red"] + mod -> chkmod [label="Yes"] + chkmod -> outerr [label="No" color="Red" fontcolor="Red"] + chkmod -> outmod [label="Yes"] + chkadd -> outerr [label="No" color="Red" fontcolor="Red"] + chkadd -> outadd [label="Yes"] + + } + @enddot + + For examples of usage, check out the mp_stackdiffs.test.sas program. + + + @param [in] baselibds Base Table against which the changes will be applied, + in libref.dataset format. + @param [in] auditlibds Dataset with previously applied transactions, to be + re-applied. Use libref.dataset format. + DDL as follows: %mp_coretable(DIFFTABLE) + @param [in] key Space seperated list of key variables + @param [in] mdebug= Set to 1 to enable DEBUG messages and preserve outputs + @param [in] processed_dttm_var= (0) If a variable is being used to mark + the processed datetime, put the name of the variable here. It will NOT + be included in the staged dataset (the load process is expected to + provide this) + @param [out] errds= (work.errds) Output table containing problematic records. + The columns of this table are: + @li PK_VARS - Space separated list of primary key variable names + @li PK_VALS - Slash separted list of PK variable values + @li ERR_MSG - Explanation of why this record is problematic + @param [out] outmod= (work.outmod) Output table containing modified records + @param [out] outadd= (work.outadd) Output table containing additional records + @param [out] outdel= (work.outdel) Output table containing deleted records + + +

SAS Macros

+ @li mf_existvarlist.sas + @li mf_getquotedstr.sas + @li mf_getuniquefileref.sas + @li mf_getuniquename.sas + @li mf_islibds.sas + @li mf_nobs.sas + @li mf_wordsinstr1butnotstr2.sas + @li mp_abort.sas + @li mp_ds2squeeze.sas + + +

Related Macros

+ @li mp_coretable.sas + @li mp_stackdiffs.test.sas + @li mp_storediffs.sas + + @todo The current approach assumes that a variable called KEY_HASH is not on + the base table. This part will need to be refactored (eg using + mf_getuniquename.sas) when such a use case arises. + + @version 9.2 + @author Allan Bowe +**/ +/** @cond */ + +%macro mp_stackdiffs(baselibds + ,auditlibds + ,key + ,mdebug=0 + ,processed_dttm_var=0 + ,errds=work.errds + ,outmod=work.outmod + ,outadd=work.outadd + ,outdel=work.outdel +)/*/STORE SOURCE*/; +%local dbg; +%if &mdebug=1 %then %do; + %put &sysmacroname entry vars:; + %put _local_; +%end; +%else %let dbg=*; + +/* input parameter validations */ +%mp_abort(iftrue= (%mf_islibds(&baselibds) ne 1) + ,mac=&sysmacroname + ,msg=%str(Invalid baselibds: &baselibds) +) +%mp_abort(iftrue= (%mf_islibds(&auditlibds) ne 1) + ,mac=&sysmacroname + ,msg=%str(Invalid auditlibds: &auditlibds) +) +%mp_abort(iftrue= (%length(&key)=0) + ,mac=&sysmacroname + ,msg=%str(Missing key variables!) +) +%mp_abort(iftrue= ( + %mf_existVarList(&auditlibds,LIBREF DSN MOVE_TYPE KEY_HASH TGTVAR_NM IS_PK + IS_DIFF TGTVAR_TYPE OLDVAL_NUM NEWVAL_NUM OLDVAL_CHAR NEWVAL_CHAR)=0 + ) + ,mac=&sysmacroname + ,msg=%str(Input &auditlibds is missing required columns!) +) + + +/* set up macro vars */ +%local prefix dslist x var keyjoin commakey keepvars missvars fref; +%let prefix=%substr(%mf_getuniquename(),1,25); +%let dslist=ds1d ds2d ds3d ds1a ds2a ds3a ds1m ds2m ds3m pks dups base + delrec delerr addrec adderr modrec moderr; +%do x=1 %to %sysfunc(countw(&dslist)); + %let var=%scan(&dslist,&x); + %local &var; + %let &var=%upcase(&prefix._&var); +%end; + +%let key=%upcase(&key); +%let commakey=%mf_getquotedstr(&key,quote=N); + +%let keyjoin=1=1; +%do x=1 %to %sysfunc(countw(&key)); + %let var=%scan(&key,&x); + %let keyjoin=&keyjoin and a.&var=b.&var; +%end; + +data &errds; + length pk_vars $256 pk_vals $4098 err_msg $512; + call missing (of _all_); + stop; +run; + +/** + * Prepare raw DELETE table + * Records are in the OLDVAL_xxx columns + */ +%let keepvars=MOVE_TYPE KEY_HASH TGTVAR_NM TGTVAR_TYPE IS_PK + OLDVAL_NUM OLDVAL_CHAR + NEWVAL_NUM NEWVAL_CHAR; +proc sort data=&auditlibds(where=(move_type='D') keep=&keepvars) + out=&ds1d(drop=move_type); +by KEY_HASH TGTVAR_NM; +run; +proc transpose data=&ds1d(where=(tgtvar_type='N')) + out=&ds2d(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var OLDVAL_NUM; +run; +proc transpose data=&ds1d(where=(tgtvar_type='C')) + out=&ds3d(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var OLDVAL_CHAR; +run; +%mp_ds2squeeze(&ds2d,outds=&ds2d) +%mp_ds2squeeze(&ds3d,outds=&ds3d) +data &outdel; + if 0 then set &baselibds; + set &ds2d; + set &ds3d; + drop key_hash; + if not missing(%scan(&key,1)); +run; +proc sort; + by &key; +run; + +/** + * Prepare raw APPEND table + * Records are in the NEWVAL_xxx columns + */ +proc sort data=&auditlibds(where=(move_type='A') keep=&keepvars) + out=&ds1a(drop=move_type); + by KEY_HASH TGTVAR_NM; +run; +proc transpose data=&ds1a(where=(tgtvar_type='N')) + out=&ds2a(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var NEWVAL_NUM; +run; +proc transpose data=&ds1a(where=(tgtvar_type='C')) + out=&ds3a(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var NEWVAL_CHAR; +run; +%mp_ds2squeeze(&ds2a,outds=&ds2a) +%mp_ds2squeeze(&ds3a,outds=&ds3a) +data &outadd; + if 0 then set &baselibds; + set &ds2a; + set &ds3a; + drop key_hash; + if not missing(%scan(&key,1)); +run; +proc sort; + by &key; +run; + +/** + * Prepare raw MODIFY table + * Keep only primary key - will add modified values later + */ +proc sort data=&auditlibds( + where=(move_type='M' and is_pk=1) keep=&keepvars + ) out=&ds1m(drop=move_type); + by KEY_HASH TGTVAR_NM; +run; +proc transpose data=&ds1m(where=(tgtvar_type='N')) + out=&ds2m(drop=_name_); + by KEY_HASH ; + id TGTVAR_NM; + var NEWVAL_NUM; +run; +proc transpose data=&ds1m(where=(tgtvar_type='C')) + out=&ds3m(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var NEWVAL_CHAR; +run; +%mp_ds2squeeze(&ds2m,outds=&ds2m) +%mp_ds2squeeze(&ds3m,outds=&ds3m) +data &outmod; + if 0 then set &baselibds; + set &ds2m; + set &ds3m; + if not missing(%scan(&key,1)); +run; +proc sort; + by &key; +run; + +/** + * Extract matching records from the base table + * Do this in one join for efficiency. + * At a later date, this should be optimised for large database tables by using + * passthrough and a temporary table. + */ +data &pks; + if 0 then set &baselibds; + set &outadd &outmod &outdel; + keep &key; +run; + +proc sort noduprec dupout=&dups; +by &key; +run; +data _null_; + set &dups; + putlog (_all_)(=); +run; +%mp_abort(iftrue= (%mf_nobs(&dups) ne 0) + ,mac=&sysmacroname + ,msg=%str(duplicates (%mf_nobs(&dups)) found on &auditlibds!) +) + +proc sql; +create table &base as + select a.* + from &baselibds a, &pks b + where &keyjoin; + +/** + * delete check + * This is straightforward as it relates to records only + */ +proc sql; +create table &delrec as + select a.* + from &outdel a + left join &base b + on &keyjoin + where b.%scan(&key,1) is null + order by &commakey; + +data &delerr; + if 0 then set &errds; + set &delrec; + PK_VARS="&key"; + PK_VALS=catx('/',&commakey); + ERR_MSG="Rows cannot be deleted as they do not exist on the Base dataset"; + keep PK_VARS PK_VALS ERR_MSG; +run; +proc append base=&errds data=&delerr; +run; + +data &outdel; + merge &outdel (in=a) &delrec (in=b); + by &key; + if not b; +run; + +/** + * add check + * Problems - where record already exists, or base table has columns missing + */ +%let missvars=%mf_wordsinstr1butnotstr2( + Str1=%upcase(%mf_getvarlist(&outadd)), + Str2=%upcase(%mf_getvarlist(&baselibds)) +); +%if %length(&missvars)>0 %then %do; + /* add them to the err table */ + data &adderr; + if 0 then set &errds; + set &outadd; + PK_VARS="&key"; + PK_VALS=catx('/',&commakey); + ERR_MSG="Rows cannot be added due to missing base vars: &missvars"; + keep PK_VARS PK_VALS ERR_MSG; + run; + proc append base=&errds data=&adderr; + run; + proc sql; + delete * from &outadd; +%end; +%else %do; + proc sql; + /* find records that already exist on base table */ + create table &addrec as + select a.* + from &outadd a + inner join &base b + on &keyjoin + order by &commakey; + + /* add them to the err table */ + data &adderr; + if 0 then set &errds; + set &addrec; + PK_VARS="&key"; + PK_VALS=catx('/',&commakey); + ERR_MSG="Rows cannot be added as they already exist on the Base dataset"; + keep PK_VARS PK_VALS ERR_MSG; + run; + proc append base=&errds data=&adderr; + run; + + /* remove invalid rows from the outadd table */ + data &outadd; + merge &outadd (in=a) &addrec (in=b); + by &key; + if not b; + run; +%end; + +/** + * mod check + * Problems - where record does not exist or baseds has modified cols missing + */ +proc sql noprint; +select distinct tgtvar_nm into: missvars separated by ' ' + from &auditlibds + where move_type='M' and is_diff=1; +%let missvars=%mf_wordsinstr1butnotstr2( + Str1=&missvars, + Str2=%upcase(%mf_getvarlist(&baselibds)) +); +%if %length(&missvars)>0 %then %do; + /* add them to the err table */ + data &moderr; + if 0 then set &errds; + set &outmod; + PK_VARS="&key"; + PK_VALS=catx('/',&commakey); + ERR_MSG="Rows cannot be modified due to missing base vars: &missvars"; + keep PK_VARS PK_VALS ERR_MSG; + run; + proc append base=&errds data=&moderr; + run; + proc sql; + delete * from &outmod; +%end; +%else %do; + /* now check for records that do not exist (therefore cannot be modified) */ + proc sql; + create table &modrec as + select a.* + from &outmod a + left join &base b + on &keyjoin + where b.%scan(&key,1) is null + order by &commakey; + data &moderr; + if 0 then set &errds; + set &modrec; + PK_VARS="&key"; + PK_VALS=catx('/',&commakey); + ERR_MSG="Rows cannot be modified as they do not exist on the Base dataset"; + keep PK_VARS PK_VALS ERR_MSG; + run; + proc append base=&errds data=&moderr; + run; + /* delete the above records from the outmod table */ + data &outmod; + merge &outmod (in=a) &modrec (in=b); + by &key; + if not b; + run; + /* now - we can prepare the final MOD table (which is currently PK only) */ + proc sql undo_policy=none; + create table &outmod as + select a.key_hash + ,b.* + from &outmod a + inner join &base b + on &keyjoin + order by &commakey; + /* now - to update outmod with modified (is_diff=1) values */ + %let fref=%mf_getuniquefileref(); + data _null_; + file &fref; + set &auditlibds(where=(move_type='M')) end=lastobs; + by key_hash; + retain comma 'N'; + if _n_=1 then put 'proc sql;'; + if first.key_hash then do; + comma='N'; + put "update &outmod set " @@; + end; + if is_diff=1 then do; + if comma='N' then do; + put ' '@@; + comma='Y'; + end; + else put ' ,'@@; + if tgtvar_type='C' then do; + length qstr $32767; + qstr=quote(trim(NEWVAL_CHAR)); + put tgtvar_nm '=' qstr; + end; + else put tgtvar_nm '=' newval_num; + if comma=' ' then comma=' ,'; + end; + if last.key_hash then put ' where key_hash=trim("' key_hash '");'; + if lastobs then put "alter table &outmod drop key_hash;"; + run; + %inc &fref/source2; +%end; + +%if &mdebug=0 %then %do; + proc datasets lib=work; + delete &prefix:; + run; + %put &sysmacroname exit vars:; + %put _local_; +%end; +%mend mp_stackdiffs; +/** @endcond *//** @file @brief Converts deletes/changes/appends into a single audit table. @details When tracking changes to data over time, it can be helpful to have @@ -9699,41 +10377,23 @@ run; @param [in] appds= (0) Dataset with appended records @param [in] modds= (0) Dataset with modified records @param [out] outds= (work.mp_storediffs) Output table containing stored data. - Has the following format: + DDL as follows: %mp_coretable(DIFFTABLE) - proc sql; - create table &outds( - load_ref char(36) label='unique load reference', - processed_dttm num format=E8601DT26.6 label='Processed at timestamp', - libref char(8) label='Library Reference (8 chars)', - dsn char(32) label='Dataset Name (32 chars)', - key_hash char(32) label= - 'MD5 Hash of primary key values (pipe seperated)', - move_type char(1) label='Either (A)ppended, (D)eleted or (M)odified', - is_pk num label='Is Primary Key Field? (1/0)', - is_diff num label= - 'Did value change? (1/0/-1). Always -1 for appends and deletes.', - tgtvar_type char(1) label='Either (C)haracter or (N)umeric', - tgtvar_nm char(32) label='Target variable name (32 chars)', - oldval_num num format=best32. label='Old (numeric) value', - newval_num num format=best32. label='New (numeric) value', - oldval_char char(32765) label='Old (character) value', - newval_char char(32765) label='New (character) value', - constraint pk_mpe_audit - primary key(load_ref,libref,dsn,key_hash,tgtvar_nm) - ); - - @param [in] processed_dttm= (0) Provide a datetime constant in relation to - the actual load time. If not provided, current timestamp is used. - @param [in] mdebug= set to 1 to enable DEBUG messages and preserve outputs - @param [out] loadref= (0) Provide a unique key to reference the load, - otherwise a UUID will be generated. + @param [in] processed_dttm= (0) Provide a datetime constant in relation to + the actual load time. If not provided, current timestamp is used. + @param [in] mdebug= set to 1 to enable DEBUG messages and preserve outputs + @param [out] loadref= (0) Provide a unique key to reference the load, + otherwise a UUID will be generated.

SAS Macros

@li mf_getquotedstr.sas @li mf_getuniquename.sas @li mf_getvarlist.sas +

Related Macros

+ @li mp_stackdiffs.sas + @li mp_storediffs.test.sas + @version 9.2 @author Allan Bowe **/ @@ -14280,21 +14940,28 @@ run; /** @file @brief Creates dataset with all members of a metadata group - @details + @details This macro will query SAS metadata and return all the members + of a particular group. - usage: + Usage: - %mm_getgroupmembers(someGroupName - ,outds=work.mm_getgroupmembers - ,emails=YES) + %mm_getgroupmembers(someGroupName + ,outds=work.mm_getgroupmembers + ,emails=YES + ) @param group metadata group for which to bring back members - @param outds= the dataset to create that contains the list of members - @param emails= set to YES to bring back email addresses - @param id= set to yes if passing an ID rather than a group name + @param outds= (work.mm_getgroupmembers) The dataset to create that contains + the list of members + @param emails= (NO) Set to YES to bring back email addresses + @param id= (NO) Set to yes if passing an ID rather than a group name @returns outds dataset containing all members of the metadata group +

Related Macros

+ @li mm_getgorups.sas + @li mm_adduser2group.sas + @version 9.2 @author Allan Bowe @@ -22738,6 +23405,50 @@ run; %inc "%sysfunc(pathname(work))/ml_json.lua" /source2; %mend ml_json; +/** + @file + @brief Sets up the mcf_xx functions + @details + There is no (efficient) way to determine if an mcf_xx macro has already been + invoked. So, we make use of a global macro variable list to keep track. + + Usage: + + %mcf_init(MCF_LENGTH) + + Returns: + + > 1 (if already initialised) else 0 + + @param [in] func The function to be initialised + +

Related Macros

+ @li mcf_init.test.sas + +**/ + +%macro mcf_init(func +)/*/STORE SOURCE*/; + +%if not (%symexist(SASJS_PREFIX)) %then %do; + %global SASJS_PREFIX; + %let SASJS_PREFIX=SASJS; +%end; + +%let func=%upcase(&func); + +/* the / character is just a seperator */ +%global &sasjs_prefix._FUNCTIONS; +%if %index(&&&sasjs_prefix._FUNCTIONS,&func/)>0 %then %do; + 1 + %return; +%end; +%else %do; + %let &sasjs_prefix._FUNCTIONS=&&&sasjs_prefix._FUNCTIONS &func/; + 0 +%end; + +%mend mcf_init; /** @file @brief Returns the length of a numeric value @@ -22780,10 +23491,11 @@ run; Uses a 3 part format: libref.catalog.package

SAS Macros

- @li mf_existfunction.sas + @li mcf_init.sas -

Related Macros

+

Related Programs

@li mcf_length.test.sas + @li mp_init.sas **/ @@ -22794,7 +23506,7 @@ run; ,pkg=UTILS )/*/STORE SOURCE*/; -%if %mf_existfunction(mcf_length)=1 %then %return; +%if %mcf_init(mcf_length)=1 %then %return; %if &wrap=YES %then %do; proc fcmp outlib=&lib..&cat..&pkg; @@ -22876,7 +23588,11 @@ endsub; Uses a 3 part format: libref.catalog.package

SAS Macros

- @li mf_existfunction.sas + @li mcf_init.sas + +

Related Programs

+ @li mcf_stpsrv_header.test.sas + @li mp_init.sas **/ @@ -22887,7 +23603,7 @@ endsub; ,pkg=UTILS )/*/STORE SOURCE*/; -%if %mf_existfunction(stpsrv_header)=1 %then %return; +%if %mcf_init(stpsrv_header)=1 %then %return; %if &wrap=YES %then %do; proc fcmp outlib=&lib..&cat..&pkg; @@ -22959,7 +23675,11 @@ endsub; Uses a 3 part format: libref.catalog.package

SAS Macros

- @li mf_existfunction.sas + @li mcf_init.sas + +

Related Programs

+ @li mcf_stpsrv_header.test.sas + @li mp_init.sas **/ @@ -22970,7 +23690,7 @@ endsub; ,pkg=UTILS )/*/STORE SOURCE*/; -%if %mf_existfunction(mcf_string2file)=1 %then %return; +%if %mcf_init(mcf_string2file)=1 %then %return; %if &wrap=YES %then %do; proc fcmp outlib=&lib..&cat..&pkg; diff --git a/base/mf_dedup.sas b/base/mf_dedup.sas index d34555e..0fd5ea6 100644 --- a/base/mf_dedup.sas +++ b/base/mf_dedup.sas @@ -23,7 +23,6 @@

Related Macros

@li mf_trimstr.sas - @li mf_wordsinstr1butnotstr2.sas @version 9.2 @author Allan Bowe diff --git a/base/mf_existvar.sas b/base/mf_existvar.sas index 4ca518d..368cd97 100755 --- a/base/mf_existvar.sas +++ b/base/mf_existvar.sas @@ -1,11 +1,11 @@ /** @file @brief Checks if a variable exists in a data set. - @details Returns 0 if the variable does NOT exist, and return the position of - the var if it does. - Usage: + @details Returns 0 if the variable does NOT exist, and the position of the var + if it does. + Usage: - %put %mf_existvar(work.someds, somevar) + %put %mf_existvar(work.someds, somevar) @param [in] libds 2 part dataset or view reference @param [in] var variable name diff --git a/base/mf_getfilesize.sas b/base/mf_getfilesize.sas index 0d4b433..551f9a2 100644 --- a/base/mf_getfilesize.sas +++ b/base/mf_getfilesize.sas @@ -5,18 +5,19 @@ %put %mf_getfilesize(fpath=C:\temp\myfile.txt); - or + or, provide a libds value as follows: data x;do x=1 to 100000;y=x;output;end;run; %put %mf_getfilesize(libds=work.x,format=yes); - gives: + Which gives: - 2mb + > 2mb + + @param [in] fpath= Full path and filename. Provide this OR the libds value. + @param [in] libds= (0) Library.dataset value (assumes library is BASE engine) + @param [in] format= (NO) Set to yes to apply sizekmg. format - @param fpath= full path and filename. Provide this OR the libds value. - @param libds= library.dataset value (assumes library is BASE engine) - @param format= set to yes to apply sizekmg. format @returns bytes @version 9.2 @@ -26,16 +27,32 @@ %macro mf_getfilesize(fpath=,libds=0,format=NO )/*/STORE SOURCE*/; - %if &libds ne 0 %then %do; - %let fpath=%sysfunc(pathname(%scan(&libds,1,.)))/%scan(&libds,2,.).sas7bdat; - %end; + %local rc fid fref bytes dsid lib vnum; - %local rc fid fref bytes; - %let rc=%sysfunc(filename(fref,&fpath)); - %let fid=%sysfunc(fopen(&fref)); - %let bytes=%sysfunc(finfo(&fid,File Size (bytes))); - %let rc=%sysfunc(fclose(&fid)); - %let rc=%sysfunc(filename(fref)); + %if &libds ne 0 %then %do; + %let libds=%upcase(&libds); + %if %index(&libds,.)=0 %then %let lib=WORK; + %else %let lib=%scan(&libds,1,.); + %let dsid=%sysfunc(open( + sashelp.vtable(where=(libname="&lib" and memname="%scan(&libds,-1,.)") + keep=libname memname filesize + ) + )); + %if (&dsid ^= 0) %then %do; + %let vnum=%sysfunc(varnum(&dsid,FILESIZE)); + %let rc=%sysfunc(fetch(&dsid)); + %let bytes=%sysfunc(getvarn(&dsid,&vnum)); + %let rc= %sysfunc(close(&dsid)); + %end; + %else %put &sysmacroname: &libds could not be opened! %sysfunc(sysmsg()); + %end; + %else %do; + %let rc=%sysfunc(filename(fref,&fpath)); + %let fid=%sysfunc(fopen(&fref)); + %let bytes=%sysfunc(finfo(&fid,File Size (bytes))); + %let rc=%sysfunc(fclose(&fid)); + %let rc=%sysfunc(filename(fref)); + %end; %if &format=NO %then %do; &bytes diff --git a/base/mf_wordsinstr1andstr2.sas b/base/mf_wordsinstr1andstr2.sas index 1abadbd..bfa2338 100644 --- a/base/mf_wordsinstr1andstr2.sas +++ b/base/mf_wordsinstr1andstr2.sas @@ -30,7 +30,6 @@ %local count_base count_extr i i2 extr_word base_word match outvar; %if %length(&str1)=0 or %length(&str2)=0 %then %do; - %put %str(WARN)ING: empty string provided!; %put base string (str1)= &str1; %put compare string (str2) = &str2; %return; diff --git a/base/mf_wordsinstr1butnotstr2.sas b/base/mf_wordsinstr1butnotstr2.sas index 0485bd4..40b0219 100755 --- a/base/mf_wordsinstr1butnotstr2.sas +++ b/base/mf_wordsinstr1butnotstr2.sas @@ -3,6 +3,9 @@ @brief Returns words that are in string 1 but not in string 2 @details Compares two space separated strings and returns the words that are in the first but not in the second. + + Note - case sensitive! + Usage: %let x= %mf_wordsInStr1ButNotStr2( @@ -13,10 +16,8 @@ returns: > sss bram boo - @param str1= string containing words to extract - @param str2= used to compare with the extract string - - @warning CASE SENSITIVE! + @param [in] str1= string containing words to extract + @param [in] str2= used to compare with the extract string @version 9.2 @author Allan Bowe @@ -30,7 +31,6 @@ %local count_base count_extr i i2 extr_word base_word match outvar; %if %length(&str1)=0 or %length(&str2)=0 %then %do; - %put %str(WARN)ING: empty string provided!; %put base string (str1)= &str1; %put compare string (str2) = &str2; %return; diff --git a/base/mp_assertdsobs.sas b/base/mp_assertdsobs.sas index 6682da9..4abe423 100644 --- a/base/mp_assertdsobs.sas +++ b/base/mp_assertdsobs.sas @@ -12,10 +12,6 @@ %mp_assertdsobs(sashelp.class,test=ATMOST 20) %* pass if <21 obs present; -

SAS Macros

- @li mf_nobs.sas - @li mp_abort.sas - @param [in] inds input dataset to test for presence of observations @param [in] desc= (Testing observations) The user provided test description @@ -33,6 +29,11 @@ |---|---|---| |User Provided description|PASS|Dataset &inds has XX obs| +

SAS Macros

+ @li mf_getuniquename.sas + @li mf_nobs.sas + @li mp_abort.sas +

Related Macros

@li mp_assertcolvals.sas @li mp_assert.sas @@ -49,9 +50,10 @@ outds=work.test_results )/*/STORE SOURCE*/; - %local nobs; + %local nobs ds; %let nobs=%mf_nobs(&inds); %let test=%upcase(&test); + %let ds=%mf_getuniquename(prefix=mp_assertdsobs); %if %substr(&test.xxxxx,1,6)=EQUALS %then %do; %let val=%scan(&test,2,%str( )); @@ -84,7 +86,7 @@ ) %end; - data; + data &ds; length test_description $256 test_result $4 test_comments $256; test_description=symget('desc'); test_result='FAIL'; @@ -110,9 +112,6 @@ %end; run; - %local ds; - %let ds=&syslast; - proc append base=&outds data=&ds; run; diff --git a/base/mp_assertscope.sas b/base/mp_assertscope.sas index b1a9bd3..e1dd54c 100644 --- a/base/mp_assertscope.sas +++ b/base/mp_assertscope.sas @@ -1,18 +1,25 @@ /** @file @brief Used to capture scope leakage of macro variables - @details A common 'difficult to detect' bug in macros is where a nested - macro over-writes variables in a higher level macro. + @details - This assertion takes a snapshot of the macro variables before and after - a macro invocation. This makes it easy to detect whether any macro - variables were modified or changed. + A common 'difficult to detect' bug in macros is where a nested macro + over-writes variables in a higher level macro. - Currently, the macro only checks for global scope variables. In the future - it may be extended to work at multiple levels of nesting. + This assertion takes a snapshot of the macro variables before and after + a macro invocation. Differences are captured in the `&outds` table. This + makes it easy to detect whether any macro variables were modified or + changed. - If you would like this feature, feel free to contribute / raise an issue / - engage the SASjs team directly. + The following variables are NOT tested (as they are known, global variables + used in SASjs): + + @li &sasjs_prefix._FUNCTIONS + + Global variables are initialised in mp_init.sas - which will also trigger + "strict mode" in your SAS session. Whilst this is a default in SASjs + produced apps, if you prefer not to use this mode, simply instantiate the + following variable to prevent the macro from running: `SASJS_PREFIX` Example usage: @@ -24,12 +31,17 @@ desc=Checking macro variables against previous snapshot ) + This macro is designed to work alongside `sasjs test` - for more information + about this facility, visit [cli.sasjs.io/test](https://cli.sasjs.io/test). + @param [in] action (SNAPSHOT) The action to take. Valid values: @li SNAPSHOT - take a copy of the current macro variables @li COMPARE - compare the current macro variables against previous values @param [in] scope= (GLOBAL) The scope of the variables to be checked. This corresponds to the values in the SCOPE column in `sashelp.vmacro`. @param [in] desc= (Testing scope leakage) The user provided test description + @param [in] ignorelist= Provide a list of macro variable names to ignore from + the comparison @param [in,out] scopeds= (work.mp_assertscope) The dataset to contain the scope snapshot @param [out] outds= (work.test_results) The output dataset to contain the @@ -38,6 +50,10 @@ |---|---|---| |User Provided description|PASS|No out of scope variables created or modified| +

SAS Macros

+ @li mf_getquotedstr.sas + @li mp_init.sas +

Related Macros

@li mp_assert.sas @li mp_assertcols.sas @@ -54,9 +70,18 @@ desc=Testing Scope Leakage, scope=GLOBAL, scopeds=work.mp_assertscope, + ignorelist=, outds=work.test_results )/*/STORE SOURCE*/; -%local ds test_result test_comments del add mod; +%local ds test_result test_comments del add mod ilist; +%let ilist=%upcase(&sasjs_prefix._FUNCTIONS &ignorelist); + +/** + * this sets up the global vars, it will also enter STRICT mode. If this + * behaviour is not desired, simply initiate the following global macro + * variable to prevent the macro from running: SASJS_PREFIX + */ +%mp_init() /* get current variables */ %if &action=SNAPSHOT %then %do; @@ -64,7 +89,7 @@ create table &scopeds as select name,offset,value from dictionary.macros - where scope="&scope" + where scope="&scope" and name not in (%mf_getquotedstr(&ilist)) order by name,offset; %end; %else %if &action=COMPARE %then %do; @@ -73,7 +98,7 @@ create table _data_ as select name,offset,value from dictionary.macros - where scope="&scope" + where scope="&scope" and name not in (%mf_getquotedstr(&ilist)) order by name,offset; %let ds=&syslast; diff --git a/base/mp_coretable.sas b/base/mp_coretable.sas index d0eca56..1de4c43 100644 --- a/base/mp_coretable.sas +++ b/base/mp_coretable.sas @@ -10,6 +10,8 @@ %mp_coretable(LOCKTABLE,libds=work.locktable) @param [in] table_ref The type of table to create. Example values: + @li DIFFTABLE - Used to store changes to tables. Used by mp_storediffs.sas + and mp_stackdiffs.sas @li FILTER_DETAIL - For storing detailed filter values. Used by mp_filterstore.sas. @li FILTER_SUMMARY - For storing summary filter values. Used by @@ -25,6 +27,8 @@ @li mp_filterstore.sas @li mp_lockanytable.sas @li mp_retainedkey.sas + @li mp_storediffs.sas + @li mp_stackdiffs.sas @version 9.2 @author Allan Bowe @@ -36,7 +40,29 @@ %local outds ; %let outds=%sysfunc(ifc(&libds=0,_data_,&libds)); proc sql; -%if &table_ref=LOCKTABLE %then %do; +%if &table_ref=DIFFTABLE %then %do; + create table &outds( + load_ref char(36) label='unique load reference', + processed_dttm num format=E8601DT26.6 label='Processed at timestamp', + libref char(8) label='Library Reference (8 chars)', + dsn char(32) label='Dataset Name (32 chars)', + key_hash char(32) label= + 'MD5 Hash of primary key values (pipe seperated)', + move_type char(1) label='Either (A)ppended, (D)eleted or (M)odified', + is_pk num label='Is Primary Key Field? (1/0)', + is_diff num label= + 'Did value change? (1/0/-1). Always -1 for appends and deletes.', + tgtvar_type char(1) label='Either (C)haracter or (N)umeric', + tgtvar_nm char(32) label='Target variable name (32 chars)', + oldval_num num format=best32. label='Old (numeric) value', + newval_num num format=best32. label='New (numeric) value', + oldval_char char(32765) label='Old (character) value', + newval_char char(32765) label='New (character) value', + constraint pk_mpe_audit + primary key(load_ref,libref,dsn,key_hash,tgtvar_nm) + ); +%end; +%else %if &table_ref=LOCKTABLE %then %do; create table &outds( lock_lib char(8), lock_ds char(32), diff --git a/base/mp_csv2ds.sas b/base/mp_csv2ds.sas index 5cac84f..b23c9cd 100644 --- a/base/mp_csv2ds.sas +++ b/base/mp_csv2ds.sas @@ -49,10 +49,6 @@ ,mac=&sysmacroname ,msg=%str(the BASEDS variable must be provided) ) -%mp_abort(iftrue=( &baseds=0 ) - ,mac=&sysmacroname - ,msg=%str(the BASEDS variable must be provided) -) %mp_abort(iftrue=( %mf_existds(&baseds)=0 ) ,mac=&sysmacroname ,msg=%str(the BASEDS dataset (&baseds) needs to be assigned, and to exist) diff --git a/base/mp_ds2squeeze.sas b/base/mp_ds2squeeze.sas index a9c1e16..21e3957 100644 --- a/base/mp_ds2squeeze.sas +++ b/base/mp_ds2squeeze.sas @@ -49,7 +49,7 @@ %macro mp_ds2squeeze( libds, - outds=work.work.mp_ds2squeeze, + outds=work.mp_ds2squeeze, mdebug=0 )/*/STORE SOURCE*/; %local dbg source; @@ -62,9 +62,10 @@ %let source=/source2; %end; -%local optval ds fref; +%local optval ds fref startsize; %let ds=%mf_getuniquename(); %let fref=%mf_getuniquefileref(); +%let startsize=%mf_getfilesize(libds=&libds,format=yes); %mp_getmaxvarlengths(&libds,outds=&ds) @@ -113,7 +114,7 @@ options varlenchk=&optval; filename &fref clear; %end; -%put &sysmacroname: &libds was %mf_getfilesize(libds=&libds,format=yes); +%put &sysmacroname: &libds was &startsize; %put &sysmacroname: &outds is %mf_getfilesize(libds=&outds,format=yes); %mend mp_ds2squeeze; \ No newline at end of file diff --git a/base/mp_guesspk.sas b/base/mp_guesspk.sas index 2ef1049..bdbdd3b 100644 --- a/base/mp_guesspk.sas +++ b/base/mp_guesspk.sas @@ -17,16 +17,21 @@ %inc mc; %mp_guesspk(sashelp.class,outds=classpks) - @param baseds The dataset to analyse - @param outds= The output dataset to contain the possible PKs - @param max_guesses= (3) The total number of possible primary keys to generate. - A table may have multiple unlikely PKs, so no need to list them all. - @param min_rows= (5) The minimum number of rows a table should have in order - to try and guess the PK. + @param [in] baseds The dataset to analyse + @param [out] outds= The output dataset to contain the possible PKs + @param [in] max_guesses= (3) The total number of possible primary keys to + generate. A table may have multiple (unlikely) PKs, so no need to list them + all. + @param [in] min_rows= (5) The minimum number of rows a table should have in + order to try and guess the PK. + @param [in] ignore_cols (0) Space seperated list of columns which you are + sure are not part of the primary key (helps to avoid false positives) + @param [in] mdebug= Set to 1 to enable DEBUG messages and preserve outputs

SAS Macros

@li mf_getvarlist.sas @li mf_getuniquename.sas + @li mf_wordsInstr1butnotstr2.sas @li mf_nobs.sas

Related Macros

@@ -38,179 +43,226 @@ **/ %macro mp_guesspk(baseds - ,outds=mp_guesspk - ,max_guesses=3 - ,min_rows=5 + ,outds=mp_guesspk + ,max_guesses=3 + ,min_rows=5 + ,ignore_cols=0 + ,mdebug=0 )/*/STORE SOURCE*/; +%local dbg; +%if &mdebug=1 %then %do; + %put &sysmacroname entry vars:; + %put _local_; +%end; +%else %let dbg=*; - /* declare local vars */ - %local var vars vcnt i j k l tmpvar tmpds rows posspks ppkcnt; - %let vars=%mf_getvarlist(&baseds); - %let vcnt=%sysfunc(countw(&vars)); +/* declare local vars */ +%local var vars vcnt i j k l tmpvar tmpds rows posspks ppkcnt; +%let vars=%upcase(%mf_getvarlist(&baseds)); +%let vars=%mf_wordsInStr1ButNotStr2(str1=&vars,str2=%upcase(&ignore_cols)); +%let vcnt=%sysfunc(countw(&vars)); - %if &vcnt=0 %then %do; - %put &sysmacroname: &baseds has no variables! Exiting.; - %return; +%if &vcnt=0 %then %do; + %put &sysmacroname: &baseds has no variables! Exiting.; + %return; +%end; + +/* get null count and row count */ +%let tmpvar=%mf_getuniquename(); +proc sql noprint; +create table _data_ as select + count(*) as &tmpvar +%do i=1 %to &vcnt; + %let var=%scan(&vars,&i); + ,sum(case when &var is missing then 1 else 0 end) as &var +%end; + from &baseds; + +/* transpose table and scan for not null cols */ +proc transpose; +data _null_; + set &syslast end=last; + length vars $32767; + retain vars ; + if _name_="&tmpvar" then call symputx('rows',col1,'l'); + else if col1=0 then vars=catx(' ',vars,_name_); + if last then call symputx('posspks',vars,'l'); +run; + +%let ppkcnt=%sysfunc(countw(&posspks)); +%if &ppkcnt=0 %then %do; + %put &sysmacroname: &baseds has no non-missing variables! Exiting.; + %return; +%end; + +proc sort data=&baseds(keep=&posspks) out=_data_ noduprec; + by _all_; +run; +%local pkds; %let pkds=&syslast; + +%if &rows > %mf_nobs(&pkds) %then %do; + %put &sysmacroname: &baseds has no combination of unique records! Exiting.; + %return; +%end; + +/* now check cardinality */ +proc sql noprint; +create table _data_ as select +%do i=1 %to &ppkcnt; + %let var=%scan(&posspks,&i); + count(distinct &var) as &var + %if &i<&ppkcnt %then ,; +%end; + from &pkds; + +/* transpose and sort by cardinality */ +proc transpose; +proc sort; by descending col1; +run; + +/* create initial PK list and re-order posspks list */ +data &outds(keep=pkguesses); + length pkguesses $5000 vars $5000; + set &syslast end=last; + retain vars ; + vars=catx(' ',vars,_name_); + if col1=&rows then do; + pkguesses=_name_; + output; + end; + if last then call symputx('posspks',vars,'l'); +run; + +%if %mf_nobs(&outds) ge &max_guesses %then %do; + %put &sysmacroname: %mf_nobs(&outds) possible primary key values found; + %return; +%end; + +%if &ppkcnt=1 %then %do; + %put &sysmacroname: No more PK guess possible; + %return; +%end; + +/* begin scanning for uniques on pairs of PKs */ +%let tmpds=%mf_getuniquename(); +%local lev1 lev2; +%do i=1 %to &ppkcnt; + %let lev1=%scan(&posspks,&i); + %do j=2 %to &ppkcnt; + %let lev2=%scan(&posspks,&j); + %if &lev1 ne &lev2 %then %do; + /* check for two level uniqueness */ + proc sort data=&pkds(keep=&lev1 &lev2) out=&tmpds noduprec; + by _all_; + run; + %if %mf_nobs(&tmpds)=&rows %then %do; + proc sql; + insert into &outds values("&lev1 &lev2"); + %if %mf_nobs(&outds) ge &max_guesses %then %do; + %put &sysmacroname: Max PKs reached at Level 2 for &baseds; + %goto exit; + %end; + %end; + %end; %end; +%end; - /* get null count and row count */ - %let tmpvar=%mf_getuniquename(); - proc sql noprint; - create table _data_ as select - count(*) as &tmpvar - %do i=1 %to &vcnt; - %let var=%scan(&vars,&i); - ,sum(case when &var is missing then 1 else 0 end) as &var - %end; - from &baseds; +%if &ppkcnt=2 %then %do; + %put &sysmacroname: No more PK guess possible; + %goto exit; +%end; - /* transpose table and scan for not null cols */ - proc transpose; - data _null_; - set &syslast end=last; - length vars $32767; - retain vars ; - if _name_="&tmpvar" then call symputx('rows',col1,'l'); - else if col1=0 then vars=catx(' ',vars,_name_); - if last then call symputx('posspks',vars,'l'); - run; - - %let ppkcnt=%sysfunc(countw(&posspks)); - %if &ppkcnt=0 %then %do; - %put &sysmacroname: &baseds has no non-missing variables! Exiting.; - %return; - %end; - - proc sort data=&baseds(keep=&posspks) out=_data_ noduprec; - by _all_; - run; - %local pkds; %let pkds=&syslast; - - %if &rows > %mf_nobs(&pkds) %then %do; - %put &sysmacroname: &baseds has no combination of unique records! Exiting.; - %return; - %end; - - /* now check cardinality */ - proc sql noprint; - create table _data_ as select - %do i=1 %to &ppkcnt; - %let var=%scan(&posspks,&i); - count(distinct &var) as &var - %if &i<&ppkcnt %then ,; - %end; - from &pkds; - - /* transpose and sort by cardinality */ - proc transpose; - proc sort; by descending col1; - run; - - /* create initial PK list and re-order posspks list */ - data &outds(keep=pkguesses); - length pkguesses $5000 vars $5000; - set &syslast end=last; - retain vars ; - vars=catx(' ',vars,_name_); - if col1=&rows then do; - pkguesses=_name_; - output; - end; - if last then call symputx('posspks',vars,'l'); - run; - - %if %mf_nobs(&outds) ge &max_guesses %then %do; - %put &sysmacroname: %mf_nobs(&outds) possible primary key values found; - %return; - %end; - - %if &ppkcnt=1 %then %do; - %put &sysmacroname: No more PK guess possible; - %return; - %end; - - /* begin scanning for uniques on pairs of PKs */ - %let tmpds=%mf_getuniquename(); - %local lev1 lev2; - %do i=1 %to &ppkcnt; - %let lev1=%scan(&posspks,&i); - %do j=2 %to &ppkcnt; - %let lev2=%scan(&posspks,&j); - %if &lev1 ne &lev2 %then %do; - /* check for two level uniqueness */ - proc sort data=&pkds(keep=&lev1 &lev2) out=&tmpds noduprec; +/* begin scanning for uniques on PK triplets */ +%local lev3; +%do i=1 %to &ppkcnt; + %let lev1=%scan(&posspks,&i); + %do j=2 %to &ppkcnt; + %let lev2=%scan(&posspks,&j); + %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; + %let lev3=%scan(&posspks,&k); + %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do; + /* check for three level uniqueness */ + proc sort data=&pkds(keep=&lev1 &lev2 &lev3) out=&tmpds noduprec; by _all_; run; %if %mf_nobs(&tmpds)=&rows %then %do; proc sql; - insert into &outds values("&lev1 &lev2"); + insert into &outds values("&lev1 &lev2 &lev3"); %if %mf_nobs(&outds) ge &max_guesses %then %do; - %put &sysmacroname: Max PKs reached at Level 2 for &baseds; - %return; + %put &sysmacroname: Max PKs reached at Level 3 for &baseds; + %goto exit; %end; %end; %end; %end; %end; +%end; - %if &ppkcnt=2 %then %do; - %put &sysmacroname: No more PK guess possible; - %return; - %end; +%if &ppkcnt=3 %then %do; + %put &sysmacroname: No more PK guess possible; + %goto exit; +%end; - /* begin scanning for uniques on PK triplets */ - %local lev3; - %do i=1 %to &ppkcnt; - %let lev1=%scan(&posspks,&i); - %do j=2 %to &ppkcnt; - %let lev2=%scan(&posspks,&j); - %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; - %let lev3=%scan(&posspks,&k); - %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do; - /* check for three level uniqueness */ - proc sort data=&pkds(keep=&lev1 &lev2 &lev3) out=&tmpds noduprec; +/* scan for uniques on up to 4 PK fields */ +%local lev4; +%do i=1 %to &ppkcnt; + %let lev1=%scan(&posspks,&i); + %do j=2 %to &ppkcnt; + %let lev2=%scan(&posspks,&j); + %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; + %let lev3=%scan(&posspks,&k); + %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do l=4 %to &ppkcnt; + %let lev4=%scan(&posspks,&l); + %if &lev1 ne &lev4 and &lev2 ne &lev4 and &lev3 ne &lev4 %then %do; + /* check for four level uniqueness */ + proc sort data=&pkds(keep=&lev1 &lev2 &lev3 &lev4) + out=&tmpds noduprec; by _all_; run; %if %mf_nobs(&tmpds)=&rows %then %do; proc sql; - insert into &outds values("&lev1 &lev2 &lev3"); + insert into &outds values("&lev1 &lev2 &lev3 &lev4"); %if %mf_nobs(&outds) ge &max_guesses %then %do; - %put &sysmacroname: Max PKs reached at Level 3 for &baseds; - %return; + %put &sysmacroname: Max PKs reached at Level 4 for &baseds; + %goto exit; %end; %end; %end; %end; %end; %end; +%end; - %if &ppkcnt=3 %then %do; - %put &sysmacroname: No more PK guess possible; - %return; - %end; +%if &ppkcnt=4 %then %do; + %put &sysmacroname: No more PK guess possible; + %goto exit; +%end; - /* scan for uniques on up to 4 PK fields */ - %local lev4; - %do i=1 %to &ppkcnt; - %let lev1=%scan(&posspks,&i); - %do j=2 %to &ppkcnt; - %let lev2=%scan(&posspks,&j); - %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; - %let lev3=%scan(&posspks,&k); - %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do l=4 %to &ppkcnt; - %let lev4=%scan(&posspks,&l); - %if &lev1 ne &lev4 and &lev2 ne &lev4 and &lev3 ne &lev4 %then %do; +/* scan for uniques on up to 4 PK fields */ +%local lev5 m; +%do i=1 %to &ppkcnt; + %let lev1=%scan(&posspks,&i); + %do j=2 %to &ppkcnt; + %let lev2=%scan(&posspks,&j); + %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; + %let lev3=%scan(&posspks,&k); + %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do l=4 %to &ppkcnt; + %let lev4=%scan(&posspks,&l); + %if &lev1 ne &lev4 and &lev2 ne &lev4 and &lev3 ne &lev4 %then + %do m=5 %to &ppkcnt; + %let lev5=%scan(&posspks,&m); + %if &lev1 ne &lev5 & &lev2 ne &lev5 & &lev3 ne &lev5 & &lev4 ne &lev5 %then %do; /* check for four level uniqueness */ - proc sort data=&pkds(keep=&lev1 &lev2 &lev3 &lev4) + proc sort data=&pkds(keep=&lev1 &lev2 &lev3 &lev4 &lev5) out=&tmpds noduprec; by _all_; run; %if %mf_nobs(&tmpds)=&rows %then %do; proc sql; - insert into &outds values("&lev1 &lev2 &lev3 &lev4"); + insert into &outds values("&lev1 &lev2 &lev3 &lev4 &lev5"); %if %mf_nobs(&outds) ge &max_guesses %then %do; - %put &sysmacroname: Max PKs reached at Level 4 for &baseds; - %return; + %put &sysmacroname: Max PKs reached at Level 5 for &baseds; + %goto exit; %end; %end; %end; @@ -218,37 +270,44 @@ %end; %end; %end; +%end; - %if &ppkcnt=4 %then %do; - %put &sysmacroname: No more PK guess possible; - %return; - %end; +%if &ppkcnt=5 %then %do; + %put &sysmacroname: No more PK guess possible; + %goto exit; +%end; - /* scan for uniques on up to 4 PK fields */ - %local lev5 m; - %do i=1 %to &ppkcnt; - %let lev1=%scan(&posspks,&i); - %do j=2 %to &ppkcnt; - %let lev2=%scan(&posspks,&j); - %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; - %let lev3=%scan(&posspks,&k); - %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do l=4 %to &ppkcnt; - %let lev4=%scan(&posspks,&l); - %if &lev1 ne &lev4 and &lev2 ne &lev4 and &lev3 ne &lev4 %then - %do m=5 %to &ppkcnt; - %let lev5=%scan(&posspks,&m); - %if &lev1 ne &lev5 & &lev2 ne &lev5 & &lev3 ne &lev5 & &lev4 ne &lev5 %then %do; +/* scan for uniques on up to 4 PK fields */ +%local lev6 n; +%do i=1 %to &ppkcnt; + %let lev1=%scan(&posspks,&i); + %do j=2 %to &ppkcnt; + %let lev2=%scan(&posspks,&j); + %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; + %let lev3=%scan(&posspks,&k); + %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do l=4 %to &ppkcnt; + %let lev4=%scan(&posspks,&l); + %if &lev1 ne &lev4 and &lev2 ne &lev4 and &lev3 ne &lev4 %then + %do m=5 %to &ppkcnt; + %let lev5=%scan(&posspks,&m); + %if &lev1 ne &lev5 & &lev2 ne &lev5 & &lev3 ne &lev5 & &lev4 ne &lev5 + %then %do n=6 %to &ppkcnt; + %let lev6=%scan(&posspks,&n); + %if &lev1 ne &lev6 & &lev2 ne &lev6 & &lev3 ne &lev6 + & &lev4 ne &lev6 & &lev5 ne &lev6 %then + %do; /* check for four level uniqueness */ - proc sort data=&pkds(keep=&lev1 &lev2 &lev3 &lev4 &lev5) - out=&tmpds noduprec; + proc sort data=&pkds(keep=&lev1 &lev2 &lev3 &lev4 &lev5 &lev6) + out=&tmpds noduprec; by _all_; run; %if %mf_nobs(&tmpds)=&rows %then %do; proc sql; - insert into &outds values("&lev1 &lev2 &lev3 &lev4 &lev5"); + insert into &outds + values("&lev1 &lev2 &lev3 &lev4 &lev5 &lev6"); %if %mf_nobs(&outds) ge &max_guesses %then %do; - %put &sysmacroname: Max PKs reached at Level 5 for &baseds; - %return; + %put &sysmacroname: Max PKs reached at Level 6 for &baseds; + %goto exit; %end; %end; %end; @@ -257,56 +316,17 @@ %end; %end; %end; +%end; - %if &ppkcnt=5 %then %do; - %put &sysmacroname: No more PK guess possible; - %return; - %end; +%if &ppkcnt=6 %then %do; + %put &sysmacroname: No more PK guess possible; + %goto exit; +%end; - /* scan for uniques on up to 4 PK fields */ - %local lev6 n; - %do i=1 %to &ppkcnt; - %let lev1=%scan(&posspks,&i); - %do j=2 %to &ppkcnt; - %let lev2=%scan(&posspks,&j); - %if &lev1 ne &lev2 %then %do k=3 %to &ppkcnt; - %let lev3=%scan(&posspks,&k); - %if &lev1 ne &lev3 and &lev2 ne &lev3 %then %do l=4 %to &ppkcnt; - %let lev4=%scan(&posspks,&l); - %if &lev1 ne &lev4 and &lev2 ne &lev4 and &lev3 ne &lev4 %then - %do m=5 %to &ppkcnt; - %let lev5=%scan(&posspks,&m); - %if &lev1 ne &lev5 & &lev2 ne &lev5 & &lev3 ne &lev5 & &lev4 ne &lev5 %then - %do n=6 %to &ppkcnt; - %let lev6=%scan(&posspks,&n); - %if &lev1 ne &lev6 & &lev2 ne &lev6 & &lev3 ne &lev6 - & &lev4 ne &lev6 & &lev5 ne &lev6 %then - %do; - /* check for four level uniqueness */ - proc sort data=&pkds(keep=&lev1 &lev2 &lev3 &lev4 &lev5 &lev6) - out=&tmpds noduprec; - by _all_; - run; - %if %mf_nobs(&tmpds)=&rows %then %do; - proc sql; - insert into &outds - values("&lev1 &lev2 &lev3 &lev4 &lev5 &lev6"); - %if %mf_nobs(&outds) ge &max_guesses %then %do; - %put &sysmacroname: Max PKs reached at Level 6 for &baseds; - %return; - %end; - %end; - %end; - %end; - %end; - %end; - %end; - %end; - %end; - - %if &ppkcnt=6 %then %do; - %put &sysmacroname: No more PK guess possible; - %return; - %end; +%exit: +%if &mdebug=0 %then %do; + proc sql; + drop table &tmpds; +%end; %mend mp_guesspk; \ No newline at end of file diff --git a/base/mp_init.sas b/base/mp_init.sas index 734f555..1e191ac 100644 --- a/base/mp_init.sas +++ b/base/mp_init.sas @@ -37,6 +37,7 @@ %global SASJS_PREFIX /* the ONLY hard-coded global macro variable in SASjs */ + &prefix._FUNCTIONS /* used in mcf_init() to track core function compilation */ &prefix._INIT_NUM /* initialisation time as numeric */ &prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */ &prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */ diff --git a/base/mp_stackdiffs.sas b/base/mp_stackdiffs.sas new file mode 100644 index 0000000..e7fbe56 --- /dev/null +++ b/base/mp_stackdiffs.sas @@ -0,0 +1,596 @@ +/** + @file + @brief Prepares an audit table for stacking (re-applying) the changes. + @details WORK IN PROGRESS!! + + When the underlying data from a Base Table is refreshed, it can be helpful + to have any previously-applied changes, re-applied. + + Such situation might arise if you are applying those changes using a tool + like [Data Controller for SASĀ®](https://datacontroller.io) - which records + all such changes in an audit table. + It may also apply if you are preparing a series of specific cell-level + transactions, that you would like to apply to multiple sets of (similarly + structured) Base Tables. + + In both cases, it is necessary that the transactions are stored using + the mp_storediffs.sas macro, or at least that the underlying table is + structured as per the definition in mp_coretable.sas (DIFFTABLE entry) + + This macro is used to convert the stored changes (tall format) into + staged changes (wide format), with base table values incorporated (in the + case of modified rows), ready for the subsequent load process. + + Essentially then, what this macro does, is turn a table like this: + + |MOVE_TYPE:$1.|TGTVAR_NM:$32.|IS_PK:best.|IS_DIFF:best.|TGTVAR_TYPE:$1.|OLDVAL_NUM:best32.|NEWVAL_NUM:best32.|OLDVAL_CHAR:$32765.|NEWVAL_CHAR:$32765.| + |---|---|---|---|---|---|---|---|---| + |`A `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|` `|`Newbie `| + |`A `|`AGE `|`0 `|`-1 `|`N `|`. `|`13 `|` `|` `| + |`A `|`HEIGHT `|`0 `|`-1 `|`N `|`. `|`65.3 `|` `|` `| + |`A `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|` `|`F `| + |`A `|`WEIGHT `|`0 `|`-1 `|`N `|`. `|`98 `|` `|` `| + |`D `|`NAME `|`1 `|`-1 `|`C `|`. `|`. `|`Alfred `|` `| + |`D `|`AGE `|`0 `|`-1 `|`N `|`14 `|`. `|` `|` `| + |`D `|`HEIGHT `|`0 `|`-1 `|`N `|`69 `|`. `|` `|` `| + |`D `|`SEX `|`0 `|`-1 `|`C `|`. `|`. `|`M `|` `| + |`D `|`WEIGHT `|`0 `|`-1 `|`N `|`112.5 `|`. `|` `|` `| + |`M `|`NAME `|`1 `|`0 `|`C `|`. `|`. `|`Alice `|`Alice `| + |`M `|`AGE `|`0 `|`1 `|`N `|`13 `|`99 `|` `|` `| + |`M `|`HEIGHT `|`0 `|`0 `|`N `|`56.5 `|`56.5 `|` `|` `| + |`M `|`SEX `|`0 `|`0 `|`C `|`. `|`. `|`F `|`F `| + |`M `|`WEIGHT `|`0 `|`0 `|`N `|`84 `|`84 `|` `|` `| + + Into three tables like this: + + `work.outmod`: + |NAME:$8.|SEX:$1.|AGE:best.|HEIGHT:best.|WEIGHT:best.| + |---|---|---|---|---| + |`Alice `|`F `|`99 `|`56.5 `|`84 `| + + `work.outadd`: + |NAME:$8.|SEX:$1.|AGE:best.|HEIGHT:best.|WEIGHT:best.| + |---|---|---|---|---| + |`Newbie `|`F `|`13 `|`65.3 `|`98 `| + + `work.outdel`: + |NAME:$8.|SEX:$1.|AGE:best.|HEIGHT:best.|WEIGHT:best.| + |---|---|---|---|---| + |`Alfred `|`M `|`14 `|`69 `|`112.5 `| + + As you might expect, there are a bunch of extra features and checks. + + The macro supports both SCD2 (TXTEMPORAL) and UPDATE loadtypes. If the + base table contains a PROCESSED_DTTM column (or similar), this can be + ignored by declaring it in the `processed_dttm_var` parameter. + + The macro is also flexible where columns have been added or removed from + the base table UNLESS there is a change to the primary key. + + Changes to the primary key are NOT supported, and are likely to cause + unexpected results. + + The following pre-flight checks are made: + + @li All primary key columns exist on the base table + @li There is no change in variable TYPE for any of the columns + @li There is no reduction in variable LENGTH below the max-length of the + supplied values + + Rules for stacking changes are as follows: + + + + + + + + + + + + + + + + + + + + +
Transaction TypeKey BehaviourColumn Behaviour
Deletes + The row is added to `&outDEL.` UNLESS it no longer exists + in the base table, in which case it is added to `&errDS.` instead. + + Deletes are unaffected by the addition or removal of non Primary-Key + columns. +
Inserts + Previously newly added rows are added to the `outADD` table UNLESS they + are present in the Base table.
In this case they are added to the + `&errDS.` table instead. +
+ Inserts are unaffected by the addition of columns in the Base Table + (they are padded with blanks). Deleted columns are only a problem if + they appear on the previous insert - in which case the record is added + to `&errDS.`. +
Updates + Previously modified rows are merged with base table values such that + only the individual cells that were _previously_ changed are re-applied. + Where the row contains cells that were not marked as having changed in + the prior transaction, the 'blanks' are filled with base table values in + the `outMOD` table.
+ If the row no longer exists on the base table, then the row is added to + the `errDS` table instead. +
+ Updates are unaffected by the addition of columns in the Base Table - + the new cells are simply populated with Base Table values. Deleted + columns are only a problem if they relate to a modified cell + (`is_diff=1`) - in which case the record is added to `&errDS.`. +
+ + To illustrate the above with a diagram: + + @dot + digraph { + rankdir="TB" + start[label="Transaction Type?" shape=Mdiamond] + del[label="Does Base Row exist?" shape=rectangle] + add [label="Does Base Row exist?" shape=rectangle] + mod [label="Does Base Row exist?" shape=rectangle] + chkmod [label="Do all modified\n(is_diff=1) cells exist?" shape=rectangle] + chkadd [label="Do all inserted cells exist?" shape=rectangle] + outmod [label="outMOD\nTable" shape=Msquare style=filled] + outadd [label="outADD\nTable" shape=Msquare style=filled] + outdel [label="outDEL\nTable" shape=Msquare style=filled] + outerr [label="ErrDS Table" shape=Msquare fillcolor=Orange style=filled] + start -> del [label="Delete"] + start -> add [label="Insert"] + start -> mod [label="Update"] + + del -> outdel [label="Yes"] + del -> outerr [label="No" color="Red" fontcolor="Red"] + add -> chkadd [label="No"] + add -> outerr [label="Yes" color="Red" fontcolor="Red"] + mod -> outerr [label="No" color="Red" fontcolor="Red"] + mod -> chkmod [label="Yes"] + chkmod -> outerr [label="No" color="Red" fontcolor="Red"] + chkmod -> outmod [label="Yes"] + chkadd -> outerr [label="No" color="Red" fontcolor="Red"] + chkadd -> outadd [label="Yes"] + + } + @enddot + + For examples of usage, check out the mp_stackdiffs.test.sas program. + + + @param [in] baselibds Base Table against which the changes will be applied, + in libref.dataset format. + @param [in] auditlibds Dataset with previously applied transactions, to be + re-applied. Use libref.dataset format. + DDL as follows: %mp_coretable(DIFFTABLE) + @param [in] key Space seperated list of key variables + @param [in] mdebug= Set to 1 to enable DEBUG messages and preserve outputs + @param [in] processed_dttm_var= (0) If a variable is being used to mark + the processed datetime, put the name of the variable here. It will NOT + be included in the staged dataset (the load process is expected to + provide this) + @param [out] errds= (work.errds) Output table containing problematic records. + The columns of this table are: + @li PK_VARS - Space separated list of primary key variable names + @li PK_VALS - Slash separted list of PK variable values + @li ERR_MSG - Explanation of why this record is problematic + @param [out] outmod= (work.outmod) Output table containing modified records + @param [out] outadd= (work.outadd) Output table containing additional records + @param [out] outdel= (work.outdel) Output table containing deleted records + + +

SAS Macros

+ @li mf_existvarlist.sas + @li mf_getquotedstr.sas + @li mf_getuniquefileref.sas + @li mf_getuniquename.sas + @li mf_islibds.sas + @li mf_nobs.sas + @li mf_wordsinstr1butnotstr2.sas + @li mp_abort.sas + @li mp_ds2squeeze.sas + + +

Related Macros

+ @li mp_coretable.sas + @li mp_stackdiffs.test.sas + @li mp_storediffs.sas + + @todo The current approach assumes that a variable called KEY_HASH is not on + the base table. This part will need to be refactored (eg using + mf_getuniquename.sas) when such a use case arises. + + @version 9.2 + @author Allan Bowe +**/ +/** @cond */ + +%macro mp_stackdiffs(baselibds + ,auditlibds + ,key + ,mdebug=0 + ,processed_dttm_var=0 + ,errds=work.errds + ,outmod=work.outmod + ,outadd=work.outadd + ,outdel=work.outdel +)/*/STORE SOURCE*/; +%local dbg; +%if &mdebug=1 %then %do; + %put &sysmacroname entry vars:; + %put _local_; +%end; +%else %let dbg=*; + +/* input parameter validations */ +%mp_abort(iftrue= (%mf_islibds(&baselibds) ne 1) + ,mac=&sysmacroname + ,msg=%str(Invalid baselibds: &baselibds) +) +%mp_abort(iftrue= (%mf_islibds(&auditlibds) ne 1) + ,mac=&sysmacroname + ,msg=%str(Invalid auditlibds: &auditlibds) +) +%mp_abort(iftrue= (%length(&key)=0) + ,mac=&sysmacroname + ,msg=%str(Missing key variables!) +) +%mp_abort(iftrue= ( + %mf_existVarList(&auditlibds,LIBREF DSN MOVE_TYPE KEY_HASH TGTVAR_NM IS_PK + IS_DIFF TGTVAR_TYPE OLDVAL_NUM NEWVAL_NUM OLDVAL_CHAR NEWVAL_CHAR)=0 + ) + ,mac=&sysmacroname + ,msg=%str(Input &auditlibds is missing required columns!) +) + + +/* set up macro vars */ +%local prefix dslist x var keyjoin commakey keepvars missvars fref; +%let prefix=%substr(%mf_getuniquename(),1,25); +%let dslist=ds1d ds2d ds3d ds1a ds2a ds3a ds1m ds2m ds3m pks dups base + delrec delerr addrec adderr modrec moderr; +%do x=1 %to %sysfunc(countw(&dslist)); + %let var=%scan(&dslist,&x); + %local &var; + %let &var=%upcase(&prefix._&var); +%end; + +%let key=%upcase(&key); +%let commakey=%mf_getquotedstr(&key,quote=N); + +%let keyjoin=1=1; +%do x=1 %to %sysfunc(countw(&key)); + %let var=%scan(&key,&x); + %let keyjoin=&keyjoin and a.&var=b.&var; +%end; + +data &errds; + length pk_vars $256 pk_vals $4098 err_msg $512; + call missing (of _all_); + stop; +run; + +/** + * Prepare raw DELETE table + * Records are in the OLDVAL_xxx columns + */ +%let keepvars=MOVE_TYPE KEY_HASH TGTVAR_NM TGTVAR_TYPE IS_PK + OLDVAL_NUM OLDVAL_CHAR + NEWVAL_NUM NEWVAL_CHAR; +proc sort data=&auditlibds(where=(move_type='D') keep=&keepvars) + out=&ds1d(drop=move_type); +by KEY_HASH TGTVAR_NM; +run; +proc transpose data=&ds1d(where=(tgtvar_type='N')) + out=&ds2d(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var OLDVAL_NUM; +run; +proc transpose data=&ds1d(where=(tgtvar_type='C')) + out=&ds3d(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var OLDVAL_CHAR; +run; +%mp_ds2squeeze(&ds2d,outds=&ds2d) +%mp_ds2squeeze(&ds3d,outds=&ds3d) +data &outdel; + if 0 then set &baselibds; + set &ds2d; + set &ds3d; + drop key_hash; + if not missing(%scan(&key,1)); +run; +proc sort; + by &key; +run; + +/** + * Prepare raw APPEND table + * Records are in the NEWVAL_xxx columns + */ +proc sort data=&auditlibds(where=(move_type='A') keep=&keepvars) + out=&ds1a(drop=move_type); + by KEY_HASH TGTVAR_NM; +run; +proc transpose data=&ds1a(where=(tgtvar_type='N')) + out=&ds2a(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var NEWVAL_NUM; +run; +proc transpose data=&ds1a(where=(tgtvar_type='C')) + out=&ds3a(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var NEWVAL_CHAR; +run; +%mp_ds2squeeze(&ds2a,outds=&ds2a) +%mp_ds2squeeze(&ds3a,outds=&ds3a) +data &outadd; + if 0 then set &baselibds; + set &ds2a; + set &ds3a; + drop key_hash; + if not missing(%scan(&key,1)); +run; +proc sort; + by &key; +run; + +/** + * Prepare raw MODIFY table + * Keep only primary key - will add modified values later + */ +proc sort data=&auditlibds( + where=(move_type='M' and is_pk=1) keep=&keepvars + ) out=&ds1m(drop=move_type); + by KEY_HASH TGTVAR_NM; +run; +proc transpose data=&ds1m(where=(tgtvar_type='N')) + out=&ds2m(drop=_name_); + by KEY_HASH ; + id TGTVAR_NM; + var NEWVAL_NUM; +run; +proc transpose data=&ds1m(where=(tgtvar_type='C')) + out=&ds3m(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var NEWVAL_CHAR; +run; +%mp_ds2squeeze(&ds2m,outds=&ds2m) +%mp_ds2squeeze(&ds3m,outds=&ds3m) +data &outmod; + if 0 then set &baselibds; + set &ds2m; + set &ds3m; + if not missing(%scan(&key,1)); +run; +proc sort; + by &key; +run; + +/** + * Extract matching records from the base table + * Do this in one join for efficiency. + * At a later date, this should be optimised for large database tables by using + * passthrough and a temporary table. + */ +data &pks; + if 0 then set &baselibds; + set &outadd &outmod &outdel; + keep &key; +run; + +proc sort noduprec dupout=&dups; +by &key; +run; +data _null_; + set &dups; + putlog (_all_)(=); +run; +%mp_abort(iftrue= (%mf_nobs(&dups) ne 0) + ,mac=&sysmacroname + ,msg=%str(duplicates (%mf_nobs(&dups)) found on &auditlibds!) +) + +proc sql; +create table &base as + select a.* + from &baselibds a, &pks b + where &keyjoin; + +/** + * delete check + * This is straightforward as it relates to records only + */ +proc sql; +create table &delrec as + select a.* + from &outdel a + left join &base b + on &keyjoin + where b.%scan(&key,1) is null + order by &commakey; + +data &delerr; + if 0 then set &errds; + set &delrec; + PK_VARS="&key"; + PK_VALS=catx('/',&commakey); + ERR_MSG="Rows cannot be deleted as they do not exist on the Base dataset"; + keep PK_VARS PK_VALS ERR_MSG; +run; +proc append base=&errds data=&delerr; +run; + +data &outdel; + merge &outdel (in=a) &delrec (in=b); + by &key; + if not b; +run; + +/** + * add check + * Problems - where record already exists, or base table has columns missing + */ +%let missvars=%mf_wordsinstr1butnotstr2( + Str1=%upcase(%mf_getvarlist(&outadd)), + Str2=%upcase(%mf_getvarlist(&baselibds)) +); +%if %length(&missvars)>0 %then %do; + /* add them to the err table */ + data &adderr; + if 0 then set &errds; + set &outadd; + PK_VARS="&key"; + PK_VALS=catx('/',&commakey); + ERR_MSG="Rows cannot be added due to missing base vars: &missvars"; + keep PK_VARS PK_VALS ERR_MSG; + run; + proc append base=&errds data=&adderr; + run; + proc sql; + delete * from &outadd; +%end; +%else %do; + proc sql; + /* find records that already exist on base table */ + create table &addrec as + select a.* + from &outadd a + inner join &base b + on &keyjoin + order by &commakey; + + /* add them to the err table */ + data &adderr; + if 0 then set &errds; + set &addrec; + PK_VARS="&key"; + PK_VALS=catx('/',&commakey); + ERR_MSG="Rows cannot be added as they already exist on the Base dataset"; + keep PK_VARS PK_VALS ERR_MSG; + run; + proc append base=&errds data=&adderr; + run; + + /* remove invalid rows from the outadd table */ + data &outadd; + merge &outadd (in=a) &addrec (in=b); + by &key; + if not b; + run; +%end; + +/** + * mod check + * Problems - where record does not exist or baseds has modified cols missing + */ +proc sql noprint; +select distinct tgtvar_nm into: missvars separated by ' ' + from &auditlibds + where move_type='M' and is_diff=1; +%let missvars=%mf_wordsinstr1butnotstr2( + Str1=&missvars, + Str2=%upcase(%mf_getvarlist(&baselibds)) +); +%if %length(&missvars)>0 %then %do; + /* add them to the err table */ + data &moderr; + if 0 then set &errds; + set &outmod; + PK_VARS="&key"; + PK_VALS=catx('/',&commakey); + ERR_MSG="Rows cannot be modified due to missing base vars: &missvars"; + keep PK_VARS PK_VALS ERR_MSG; + run; + proc append base=&errds data=&moderr; + run; + proc sql; + delete * from &outmod; +%end; +%else %do; + /* now check for records that do not exist (therefore cannot be modified) */ + proc sql; + create table &modrec as + select a.* + from &outmod a + left join &base b + on &keyjoin + where b.%scan(&key,1) is null + order by &commakey; + data &moderr; + if 0 then set &errds; + set &modrec; + PK_VARS="&key"; + PK_VALS=catx('/',&commakey); + ERR_MSG="Rows cannot be modified as they do not exist on the Base dataset"; + keep PK_VARS PK_VALS ERR_MSG; + run; + proc append base=&errds data=&moderr; + run; + /* delete the above records from the outmod table */ + data &outmod; + merge &outmod (in=a) &modrec (in=b); + by &key; + if not b; + run; + /* now - we can prepare the final MOD table (which is currently PK only) */ + proc sql undo_policy=none; + create table &outmod as + select a.key_hash + ,b.* + from &outmod a + inner join &base b + on &keyjoin + order by &commakey; + /* now - to update outmod with modified (is_diff=1) values */ + %let fref=%mf_getuniquefileref(); + data _null_; + file &fref; + set &auditlibds(where=(move_type='M')) end=lastobs; + by key_hash; + retain comma 'N'; + if _n_=1 then put 'proc sql;'; + if first.key_hash then do; + comma='N'; + put "update &outmod set " @@; + end; + if is_diff=1 then do; + if comma='N' then do; + put ' '@@; + comma='Y'; + end; + else put ' ,'@@; + if tgtvar_type='C' then do; + length qstr $32767; + qstr=quote(trim(NEWVAL_CHAR)); + put tgtvar_nm '=' qstr; + end; + else put tgtvar_nm '=' newval_num; + if comma=' ' then comma=' ,'; + end; + if last.key_hash then put ' where key_hash=trim("' key_hash '");'; + if lastobs then put "alter table &outmod drop key_hash;"; + run; + %inc &fref/source2; +%end; + +%if &mdebug=0 %then %do; + proc datasets lib=work; + delete &prefix:; + run; + %put &sysmacroname exit vars:; + %put _local_; +%end; +%mend mp_stackdiffs; +/** @endcond */ \ No newline at end of file diff --git a/base/mp_storediffs.sas b/base/mp_storediffs.sas index 1251b6b..f5ddd4d 100644 --- a/base/mp_storediffs.sas +++ b/base/mp_storediffs.sas @@ -49,41 +49,23 @@ @param [in] appds= (0) Dataset with appended records @param [in] modds= (0) Dataset with modified records @param [out] outds= (work.mp_storediffs) Output table containing stored data. - Has the following format: + DDL as follows: %mp_coretable(DIFFTABLE) - proc sql; - create table &outds( - load_ref char(36) label='unique load reference', - processed_dttm num format=E8601DT26.6 label='Processed at timestamp', - libref char(8) label='Library Reference (8 chars)', - dsn char(32) label='Dataset Name (32 chars)', - key_hash char(32) label= - 'MD5 Hash of primary key values (pipe seperated)', - move_type char(1) label='Either (A)ppended, (D)eleted or (M)odified', - is_pk num label='Is Primary Key Field? (1/0)', - is_diff num label= - 'Did value change? (1/0/-1). Always -1 for appends and deletes.', - tgtvar_type char(1) label='Either (C)haracter or (N)umeric', - tgtvar_nm char(32) label='Target variable name (32 chars)', - oldval_num num format=best32. label='Old (numeric) value', - newval_num num format=best32. label='New (numeric) value', - oldval_char char(32765) label='Old (character) value', - newval_char char(32765) label='New (character) value', - constraint pk_mpe_audit - primary key(load_ref,libref,dsn,key_hash,tgtvar_nm) - ); - - @param [in] processed_dttm= (0) Provide a datetime constant in relation to - the actual load time. If not provided, current timestamp is used. - @param [in] mdebug= set to 1 to enable DEBUG messages and preserve outputs - @param [out] loadref= (0) Provide a unique key to reference the load, - otherwise a UUID will be generated. + @param [in] processed_dttm= (0) Provide a datetime constant in relation to + the actual load time. If not provided, current timestamp is used. + @param [in] mdebug= set to 1 to enable DEBUG messages and preserve outputs + @param [out] loadref= (0) Provide a unique key to reference the load, + otherwise a UUID will be generated.

SAS Macros

@li mf_getquotedstr.sas @li mf_getuniquename.sas @li mf_getvarlist.sas +

Related Macros

+ @li mp_stackdiffs.sas + @li mp_storediffs.test.sas + @version 9.2 @author Allan Bowe **/ diff --git a/fcmp/mcf_init.sas b/fcmp/mcf_init.sas new file mode 100644 index 0000000..84b5735 --- /dev/null +++ b/fcmp/mcf_init.sas @@ -0,0 +1,44 @@ +/** + @file + @brief Sets up the mcf_xx functions + @details + There is no (efficient) way to determine if an mcf_xx macro has already been + invoked. So, we make use of a global macro variable list to keep track. + + Usage: + + %mcf_init(MCF_LENGTH) + + Returns: + + > 1 (if already initialised) else 0 + + @param [in] func The function to be initialised + +

Related Macros

+ @li mcf_init.test.sas + +**/ + +%macro mcf_init(func +)/*/STORE SOURCE*/; + +%if not (%symexist(SASJS_PREFIX)) %then %do; + %global SASJS_PREFIX; + %let SASJS_PREFIX=SASJS; +%end; + +%let func=%upcase(&func); + +/* the / character is just a seperator */ +%global &sasjs_prefix._FUNCTIONS; +%if %index(&&&sasjs_prefix._FUNCTIONS,&func/)>0 %then %do; + 1 + %return; +%end; +%else %do; + %let &sasjs_prefix._FUNCTIONS=&&&sasjs_prefix._FUNCTIONS &func/; + 0 +%end; + +%mend mcf_init; diff --git a/fcmp/mcf_length.sas b/fcmp/mcf_length.sas index 43b2bdb..3e8a6cf 100644 --- a/fcmp/mcf_length.sas +++ b/fcmp/mcf_length.sas @@ -40,10 +40,11 @@ Uses a 3 part format: libref.catalog.package

SAS Macros

- @li mf_existfunction.sas + @li mcf_init.sas -

Related Macros

+

Related Programs

@li mcf_length.test.sas + @li mp_init.sas **/ @@ -54,7 +55,7 @@ ,pkg=UTILS )/*/STORE SOURCE*/; -%if %mf_existfunction(mcf_length)=1 %then %return; +%if %mcf_init(mcf_length)=1 %then %return; %if &wrap=YES %then %do; proc fcmp outlib=&lib..&cat..&pkg; diff --git a/fcmp/mcf_stpsrv_header.sas b/fcmp/mcf_stpsrv_header.sas index e6ba2f0..b9f39f1 100644 --- a/fcmp/mcf_stpsrv_header.sas +++ b/fcmp/mcf_stpsrv_header.sas @@ -55,7 +55,11 @@ Uses a 3 part format: libref.catalog.package

SAS Macros

- @li mf_existfunction.sas + @li mcf_init.sas + +

Related Programs

+ @li mcf_stpsrv_header.test.sas + @li mp_init.sas **/ @@ -66,7 +70,7 @@ ,pkg=UTILS )/*/STORE SOURCE*/; -%if %mf_existfunction(stpsrv_header)=1 %then %return; +%if %mcf_init(stpsrv_header)=1 %then %return; %if &wrap=YES %then %do; proc fcmp outlib=&lib..&cat..&pkg; diff --git a/fcmp/mcf_string2file.sas b/fcmp/mcf_string2file.sas index f41b30c..dce768f 100644 --- a/fcmp/mcf_string2file.sas +++ b/fcmp/mcf_string2file.sas @@ -40,7 +40,11 @@ Uses a 3 part format: libref.catalog.package

SAS Macros

- @li mf_existfunction.sas + @li mcf_init.sas + +

Related Programs

+ @li mcf_stpsrv_header.test.sas + @li mp_init.sas **/ @@ -51,7 +55,7 @@ ,pkg=UTILS )/*/STORE SOURCE*/; -%if %mf_existfunction(mcf_string2file)=1 %then %return; +%if %mcf_init(mcf_string2file)=1 %then %return; %if &wrap=YES %then %do; proc fcmp outlib=&lib..&cat..&pkg; diff --git a/meta/mm_getgroupmembers.sas b/meta/mm_getgroupmembers.sas index c1853dd..05ef4c7 100755 --- a/meta/mm_getgroupmembers.sas +++ b/meta/mm_getgroupmembers.sas @@ -1,21 +1,28 @@ /** @file @brief Creates dataset with all members of a metadata group - @details + @details This macro will query SAS metadata and return all the members + of a particular group. - usage: + Usage: - %mm_getgroupmembers(someGroupName - ,outds=work.mm_getgroupmembers - ,emails=YES) + %mm_getgroupmembers(someGroupName + ,outds=work.mm_getgroupmembers + ,emails=YES + ) @param group metadata group for which to bring back members - @param outds= the dataset to create that contains the list of members - @param emails= set to YES to bring back email addresses - @param id= set to yes if passing an ID rather than a group name + @param outds= (work.mm_getgroupmembers) The dataset to create that contains + the list of members + @param emails= (NO) Set to YES to bring back email addresses + @param id= (NO) Set to yes if passing an ID rather than a group name @returns outds dataset containing all members of the metadata group +

Related Macros

+ @li mm_getgorups.sas + @li mm_adduser2group.sas + @version 9.2 @author Allan Bowe diff --git a/sasjs/doxy/Doxyfile b/sasjs/doxy/Doxyfile index fcc2bd9..8e5b066 100644 --- a/sasjs/doxy/Doxyfile +++ b/sasjs/doxy/Doxyfile @@ -8,6 +8,7 @@ FILE_PATTERNS = *.sas \ *.dox GENERATE_LATEX = NO GENERATE_TREEVIEW = YES +HAVE_DOT = YES HIDE_FRIEND_COMPOUNDS = YES HIDE_IN_BODY_DOCS = YES HIDE_SCOPE_NAMES = YES diff --git a/sasjs/doxy/new_header.html b/sasjs/doxy/new_header.html index 1e53548..1627715 100644 --- a/sasjs/doxy/new_header.html +++ b/sasjs/doxy/new_header.html @@ -17,9 +17,7 @@ - $title - diff --git a/sasjs/sasjsconfig.json b/sasjs/sasjsconfig.json index c1e59cd..ad14bbd 100644 --- a/sasjs/sasjsconfig.json +++ b/sasjs/sasjsconfig.json @@ -14,7 +14,7 @@ "displayMacroCore": false, "enableLineage": false, "doxyContent": { - "favIcon": "runningman.jpg", + "favIcon": "favicon.ico", "logo": "Macro_core_website_1.png", "readMe": "../../README.md" } @@ -40,6 +40,7 @@ "tests/viyaonly" ], "programFolders": [], + "binaryFolders": [], "deployConfig": { "deployServicePack": true, "deployScripts": [] @@ -67,7 +68,7 @@ }, { "name": "server", - "serverUrl": "https://sas.analytium.co.uk:5001", + "serverUrl": "https://sas.analytium.co.uk:5000", "serverType": "SASJS", "appLoc": "/Shared Data/temp/macrocore", "macroFolders": [ diff --git a/tests/crossplatform/mcf_init.test.sas b/tests/crossplatform/mcf_init.test.sas new file mode 100644 index 0000000..ecf1df2 --- /dev/null +++ b/tests/crossplatform/mcf_init.test.sas @@ -0,0 +1,46 @@ +/** + @file + @brief Testing mcf_init.sas macro + +

SAS Macros

+ @li mcf_init.sas + @li mp_assert.sas + +**/ + +%mp_assert( + iftrue=(%mcf_init(test)=0), + desc=Check if new func returns 0 +) +%mp_assert( + iftrue=(&syscc=0), + desc=No errs on basic invocation +) +%mp_assert( + iftrue=(%mcf_init(test)=1), + desc=Check if second invocation returns 1 +) +%mp_assert( + iftrue=(&syscc=0), + desc=No errs on second invocation +) +%mp_assert( + iftrue=(%mcf_init(test2)=0), + desc=Check if new invocation returns 0 +) +%mp_assert( + iftrue=(%mcf_init(test2)=1), + desc=Check if second new invocation returns 1 +) +%mp_assert( + iftrue=(%mcf_init(test)=1), + desc=Check original returns 1 +) +%mp_assert( + iftrue=(%mcf_init(t)=1), + desc=Check subset returns 1 +) +%mp_assert( + iftrue=(&syscc=0), + desc=No errs at end +) \ No newline at end of file diff --git a/tests/crossplatform/mf_existvar.test.sas b/tests/crossplatform/mf_existvar.test.sas index efb6ca9..18e5dff 100644 --- a/tests/crossplatform/mf_existvar.test.sas +++ b/tests/crossplatform/mf_existvar.test.sas @@ -10,13 +10,11 @@ %mp_assert( - iftrue=(%mf_existvar(sashelp.class,age)=1), - desc=Checking existing var exists, - outds=work.test_results + iftrue=(%mf_existvar(sashelp.class,age)>0), + desc=Checking existing var exists ) %mp_assert( iftrue=(%mf_existvar(sashelp.class,isjustanumber)=0), - desc=Checking non existing var does not exist, - outds=work.test_results + desc=Checking non existing var does not exist ) \ No newline at end of file diff --git a/tests/crossplatform/mf_getfilesize.test.sas b/tests/crossplatform/mf_getfilesize.test.sas new file mode 100644 index 0000000..fa3dd0a --- /dev/null +++ b/tests/crossplatform/mf_getfilesize.test.sas @@ -0,0 +1,30 @@ +/** + @file + @brief Testing mf_getfilesize macro + +

SAS Macros

+ @li mf_getfilesize.sas + @li mp_assert.sas + @li mp_assertscope.sas + +**/ + +data test; + x=1; +run; + +%mp_assertscope(SNAPSHOT) +%put %mf_getfilesize(libds=work.test) +%mp_assertscope(COMPARE) + +%mp_assert( + iftrue=(&syscc=0), + desc=Checking syscc +) + +%put %mf_getfilesize(libds=test); + +%mp_assert( + iftrue=(&syscc=0), + desc=Checking syscc with one level name +) \ No newline at end of file diff --git a/tests/crossplatform/mp_applyformats.test.sas b/tests/crossplatform/mp_applyformats.test.sas index d6a5fdf..154f3f8 100644 --- a/tests/crossplatform/mp_applyformats.test.sas +++ b/tests/crossplatform/mp_applyformats.test.sas @@ -34,7 +34,7 @@ run; %mp_applyformats(work.cols2) %mp_assert( - iftrue=("&orig_fmt"=""), + iftrue=("&origfmt"=""), desc=Check that formats were cleared, outds=work.test_results ) diff --git a/tests/crossplatform/mp_filterstore.test.sas b/tests/crossplatform/mp_filterstore.test.sas index 370f824..d8a867c 100644 --- a/tests/crossplatform/mp_filterstore.test.sas +++ b/tests/crossplatform/mp_filterstore.test.sas @@ -41,7 +41,7 @@ run; outquery=work.query, mdebug=1 ) -%mp_assert(iftrue=(&syscc>0), +%mp_assert(iftrue=(&syscc=0), desc=Ensure macro runs without errors, outds=work.test_results ) diff --git a/tests/crossplatform/mp_getpk.test.sas b/tests/crossplatform/mp_getpk.test.sas index 2459597..0d284e3 100644 --- a/tests/crossplatform/mp_getpk.test.sas +++ b/tests/crossplatform/mp_getpk.test.sas @@ -52,14 +52,13 @@ create table work.example2( %mp_getpk(work,ds=example2,outds=test2) data _null_; - set work.test1; + set work.test2; call symputx('test2',pk_fields); run; %mp_assert( iftrue=("&test2"="TX_FROM DD_TYPE"), - desc=mp_getpk gets unique constraint with NOT NULL in correct order, - outds=work.test_results + desc=mp_getpk gets unique constraint with NOT NULL in correct order ) /* unique key without NOT NULL NOT captured */ @@ -71,13 +70,17 @@ create table work.example3( DD_SHORTDESC char(256), constraint unq1 unique(tx_from, dd_type), constraint unq2 unique(tx_from, dd_type, dd_source), - constraint nnn not null(tx_from), - constraint nnnn not null(dd_type) + constraint nnn not null(tx_from) ); %mp_getpk(work,ds=example3,outds=test3) +data _null_; + set work.test3; + call symputx('test3',pk_fields); +run; + %mp_assert( - iftrue=(%mf_nobs(work.test3)=0), + iftrue=("&test3 "=" "), desc=mp_getpk does not capture unique constraint without NOT NULL, outds=work.test_results ) diff --git a/tests/crossplatform/mp_stackdiffs.test.sas b/tests/crossplatform/mp_stackdiffs.test.sas new file mode 100644 index 0000000..fd9a32e --- /dev/null +++ b/tests/crossplatform/mp_stackdiffs.test.sas @@ -0,0 +1,301 @@ +/** + @file + @brief Testing mp_storediffs macro + +

SAS Macros

+ @li mp_assert.sas + @li mp_assertcolvals.sas + @li mp_assertdsobs.sas + @li mp_assertscope.sas + @li mp_stackdiffs.sas + @li mp_storediffs.sas + +**/ + +/* first, make some data */ + +data work.orig work.deleted work.changed work.appended; + set sashelp.electric; + if _n_ le 10 then do; + output work.deleted; + end; + else if _n_ le 20 then do; + output work.orig; + coal=-1; + coaltip='modified'; + output work.changed; + end; + else if _n_ le 30 then do; + year=_n_; + output work.appended; + end; + else stop; +run; + +%mp_storediffs(sashelp.electric + ,work.orig + ,CUSTOMER YEAR + ,delds=work.deleted + ,modds=work.changed + ,appds=work.appended + ,outds=work.final + ,mdebug=1 +) + +%mp_assertscope(SNAPSHOT) + +/** + * Deletions test - where record does exist + */ +data work.orig1; + set sashelp.electric; + if _n_ le 10; +run; +data work.final1; + set work.final; + where move_type='D'; +run; +%mp_stackdiffs(work.orig1 + ,work.final1 + ,CUSTOMER YEAR + ,mdebug=1 + ,errds=work.errds1 + ,outmod=work.mod1 + ,outadd=work.add1 + ,outdel=work.del1 +) +%mp_assertdsobs(work.errds1, + desc=Delete1 - no errs, + test=EQUALS 0 +) +%mp_assertdsobs(work.del1, + desc=Delete1 - records populated, + test=EQUALS 10 +) +/** + * Deletions test - where record does NOT exist + */ +data work.orig2; + set work.orig; + stop; /* empty table */ +run; +data work.final2; + set work.final; + where move_type='D'; +run; +%mp_stackdiffs(work.orig2 + ,work.final2 + ,CUSTOMER YEAR + ,mdebug=1 + ,errds=work.errds2 + ,outmod=work.mod2 + ,outadd=work.add2 + ,outdel=work.del2 +) +%mp_assertdsobs(work.errds2, + desc=Delete2 - has errs, + test=EQUALS 10 +) +%mp_assertdsobs(work.del2, + desc=Delete2 - records not populated, + test=EQUALS 0 +) + +/** + * Additions test - where record does not exist + */ +data work.orig3; + set work.orig; + stop; +run; +data work.final3; + set work.final; + where move_type='A'; +run; +%mp_stackdiffs(work.orig3 + ,work.final3 + ,CUSTOMER YEAR + ,mdebug=1 + ,errds=work.errds3 + ,outmod=work.mod3 + ,outadd=work.add3 + ,outdel=work.del3 +) +%mp_assertdsobs(work.errds3, + desc=Add3 - no errs, + test=EQUALS 0 +) +%mp_assertdsobs(work.add3, + desc=Add3 - records populated, + test=EQUALS 10 +) + +/** + * Additions test - where record does exist + */ +data work.orig4; + set sashelp.electric; + if _n_ ge 20; + year=_n_; + if _n_>25 then stop; +run; +data work.final4; + set work.final; + where move_type='A'; +run; +%mp_stackdiffs(work.orig4 + ,work.final4 + ,CUSTOMER YEAR + ,mdebug=1 + ,errds=work.errds4 + ,outmod=work.mod4 + ,outadd=work.add4 + ,outdel=work.del4 +) +%mp_assertdsobs(work.errds4, + desc=Add4 - 5 errs, + test=EQUALS 5 +) +%mp_assertdsobs(work.add4, + desc=Add4 - records populated, + test=EQUALS 5 +) + +/** + * Additions test - where base table has missing vars + */ +data work.orig5; + set work.orig; + drop Coal; +run; +data work.final5; + set work.final; + where move_type='A'; +run; +%mp_stackdiffs(work.orig5 + ,work.final5 + ,CUSTOMER YEAR + ,mdebug=1 + ,errds=work.errds5 + ,outmod=work.mod5 + ,outadd=work.add5 + ,outdel=work.del5 +) +%mp_assertdsobs(work.errds5, + desc=Add5 - 10 errs, + test=EQUALS 10 +) +%mp_assertdsobs(work.add5, + desc=Add5 - 0 records populated due to structure change, + test=EQUALS 0 +) + +/** + * Additions test - where append table has missing vars + */ +data work.final6; + set work.final; + where tgtvar_nm ne 'COAL' and move_type='A'; +run; +%mp_stackdiffs(work.orig + ,work.final6 + ,CUSTOMER YEAR + ,mdebug=1 + ,errds=work.errds6 + ,outmod=work.mod6 + ,outadd=work.add6 + ,outdel=work.del6 +) +%mp_assertdsobs(work.errds6, + desc=Add6 - 0 errs, + test=EQUALS 0 +) +%mp_assertdsobs(work.add6, + desc=Add6 - 10 records populated (structure change irrelevant), + test=EQUALS 10 +) + +/** + * Modifications test - where base table has missing vars + */ +data work.orig7; + set work.orig; + drop Coal; +run; +data work.final7; + set work.final; + where move_type='M'; +run; +%mp_stackdiffs(work.orig7 + ,work.final7 + ,CUSTOMER YEAR + ,mdebug=1 + ,errds=work.errds7 + ,outmod=work.mod7 + ,outadd=work.add7 + ,outdel=work.del7 +) +%mp_assertdsobs(work.errds7, + desc=Mod7 - 10 errs, + test=EQUALS 10 +) +%mp_assertdsobs(work.Mod7, + desc=Mod7 - 0 records populated (structure change relevant), + test=EQUALS 0 +) +%mp_assertdsobs(work.add7, + desc=add7 - 0 records populated , + test=EQUALS 0 +) +%mp_assertdsobs(work.del7, + desc=del7 - 0 records populated , + test=EQUALS 0 +) +/** + * Modifications (big) test - where base table has missing rows + * Also used as a full integration test (all move_types) + * And a test if the actual values were applied + */ +data work.orig8; + set sashelp.electric; + if _n_ le 16; +run; +%mp_stackdiffs(work.orig8 + ,work.final + ,CUSTOMER YEAR + ,mdebug=1 + ,errds=work.errds8 + ,outmod=work.mod8 + ,outadd=work.add8 + ,outdel=work.del8 +) +%mp_assertdsobs(work.errds8, + desc=Mod4 - 4 errs, + test=EQUALS 4 +) +%mp_assertdsobs(work.Mod8, + desc=Mod8 - 6 records populated (missing rows relevant), + test=EQUALS 6 +) + +/** + * Modifications test - were diffs actually applied? + */ +data work.checkds; + charchk='modified'; + numchk=-1; + output; +run; +%mp_assertcolvals(work.mod8.coal, + checkvals=work.checkds.numchk, + desc=Modified numeric value matches, + test=ALLVALS +) +%mp_assertcolvals(work.mod8.coaltip, + checkvals=work.checkds.charchk, + desc=Modified char value matches, + test=ALLVALS +) + + +%mp_assertscope(COMPARE,ignorelist=SASJS_FUNCTIONS) \ No newline at end of file diff --git a/tests/testinit.sas b/tests/testinit.sas index e1026ea..d96ccee 100644 --- a/tests/testinit.sas +++ b/tests/testinit.sas @@ -14,8 +14,11 @@ /* set defaults */ %mp_init() +%global _debug; + %macro loglevel(); - %if &_debug=2477 %then %do; + %if "&_debug"="2477" or "&_debug"="fields,log,trace" %then %do; + %put debug mode activated; options mprint; %end; %mend loglevel;