From c3b89c7f7d9c568bf8afc71ac841070cb773fe1f Mon Sep 17 00:00:00 2001 From: munja Date: Mon, 24 Jan 2022 11:17:21 +0100 Subject: [PATCH] feat: mp_ds2squeeze macro --- base/mp_assertscope.sas | 4 +- base/mp_ds2squeeze.sas | 118 +++++++++++++++++++++ base/mp_init.sas | 4 +- tests/crossplatform/mp_ds2squeeze.test.sas | 43 ++++++++ 4 files changed, 166 insertions(+), 3 deletions(-) create mode 100644 base/mp_ds2squeeze.sas create mode 100644 tests/crossplatform/mp_ds2squeeze.test.sas diff --git a/base/mp_assertscope.sas b/base/mp_assertscope.sas index 38d6a54..b1a9bd3 100644 --- a/base/mp_assertscope.sas +++ b/base/mp_assertscope.sas @@ -29,7 +29,7 @@ @li COMPARE - compare the current macro variables against previous values @param [in] scope= (GLOBAL) The scope of the variables to be checked. This corresponds to the values in the SCOPE column in `sashelp.vmacro`. - @param [in] desc= (Testing variable scope) The user provided test description + @param [in] desc= (Testing scope leakage) The user provided test description @param [in,out] scopeds= (work.mp_assertscope) The dataset to contain the scope snapshot @param [out] outds= (work.test_results) The output dataset to contain the @@ -51,7 +51,7 @@ **/ %macro mp_assertscope(action, - desc=0, + desc=Testing Scope Leakage, scope=GLOBAL, scopeds=work.mp_assertscope, outds=work.test_results diff --git a/base/mp_ds2squeeze.sas b/base/mp_ds2squeeze.sas new file mode 100644 index 0000000..a729fdb --- /dev/null +++ b/base/mp_ds2squeeze.sas @@ -0,0 +1,118 @@ +/** + @file + @brief Create a smaller version of a dataset, without data loss + @details This macro will scan the input dataset and create a new one, that + has the minimum variable lengths needed to store the data without data loss. + + Inspiration was taken from [How to Reduce the Disk Space Required by a + SASĀ® Data Set](https://www.lexjansen.com/nesug/nesug06/io/io18.pdf) by + Selvaratnam Sridharma. The end of the referenced paper presents a macro named + "squeeze", hence the nomenclature. + + Usage: + + data big; + length my big $32000; + do i=1 to 1e4; + my=repeat('oh my',100); + big='dawg'; + special=._; + output; + end; + run; + + %mp_ds2squeeze(work.big,outds=work.smaller) + + The following will also be printed to the log (exact values may differ + depending on your OS and COMPRESS settings): + + > MP_DS2SQUEEZE: work.big was 625MB + > MP_DS2SQUEEZE: work.smaller is 5MB + + @param [in] libds The library.dataset to be squeezed + @param [out] outds= (work.mp_ds2squeeze) The squeezed dataset to create + @param [in] mdebug= (0) Set to 1 to enable DEBUG messages + +

SAS Macros

+ @li mf_getfilesize.sas + @li mf_getuniquefileref.sas + @li mf_getuniquename.sas + @li mp_getmaxvarlengths.sas + +

Related Programs

+ @li mp_ds2squeeze.test.sas + + @version 9.3 + @author Allan Bowe +**/ + +%macro mp_ds2squeeze( + libds, + outds=work.work.mp_ds2squeeze, + mdebug=0 +)/*/STORE SOURCE*/; +%local dbg source; +%if &mdebug=1 %then %do; + %put &sysmacroname entry vars:; + %put _local_; +%end; +%else %do; + %let dbg=*; + %let source=/source2; +%end; + +%local optval ds fref; +%let ds=%mf_getuniquename(); +%let fref=%mf_getuniquefileref(); + +%mp_getmaxvarlengths(&libds,outds=&ds) + +data _null_; + set &ds end=last; + file &fref; + /* grab the types */ + retain dsid; + if _n_=1 then dsid=open("&libds",'is'); + if dsid le 0 then do; + msg=sysmsg(); + put msg=; + stop; + end; + type=vartype(dsid,varnum(dsid, name)); + if last then rc=close(dsid); + /* write out the length statement */ + if _n_=1 then put 'length '; + length len $6; + if type='C' then do; + if maxlen=0 then len='$1'; + else len=cats('$',maxlen); + end; + else do; + if maxlen=0 then len='3'; + else len=maxlen; + end; + put ' ' name ' ' len; + if last then put ';'; +run; + +/* configure varlenchk - as we are explicitly shortening the variables */ +%let optval=%sysfunc(getoption(varlenchk)); +options varlenchk=NOWARN; + +data &outds; + %inc &fref &source; + set &libds; +run; + +options varlenchk=&optval; + +%if &mdebug=0 %then %do; + proc sql; + drop table &ds; + filename &fref clear; +%end; + +%put &sysmacroname: &libds was %mf_getfilesize(libds=&libds,format=yes); +%put &sysmacroname: &outds is %mf_getfilesize(libds=&outds,format=yes); + +%mend mp_ds2squeeze; \ No newline at end of file diff --git a/base/mp_init.sas b/base/mp_init.sas index 28d67d6..734f555 100644 --- a/base/mp_init.sas +++ b/base/mp_init.sas @@ -33,13 +33,15 @@ %macro mp_init(prefix=SASJS )/*/STORE SOURCE*/; +%if %symexist(SASJS_PREFIX) %then %return; /* only run once */ + %global SASJS_PREFIX /* the ONLY hard-coded global macro variable in SASjs */ &prefix._INIT_NUM /* initialisation time as numeric */ &prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */ &prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */ ; -%if %length(&sasjs_prefix>0) %then %return; /* only run once */ + %let sasjs_prefix=&prefix; data _null_; diff --git a/tests/crossplatform/mp_ds2squeeze.test.sas b/tests/crossplatform/mp_ds2squeeze.test.sas new file mode 100644 index 0000000..db8c246 --- /dev/null +++ b/tests/crossplatform/mp_ds2squeeze.test.sas @@ -0,0 +1,43 @@ +/** + @file + @brief Testing mp_ds2squeeze.sas macro + +

SAS Macros

+ @li mp_assert.sas + @li mp_assertscope.sas + @li mp_ds2squeeze.sas + +**/ + +data big; + length my big $32000; + do i=1 to 1e4; + my=repeat('oh my',100); + big='dawg'; + special=._; + missn=.; + missc=''; + output; + end; +run; + +%mp_assertscope(SNAPSHOT) +%mp_ds2squeeze(work.big,outds=work.smaller) +%mp_assertscope(COMPARE) + +%mp_assert( + iftrue=(&syscc=0), + desc=Checking syscc +) +%mp_assert( + iftrue=(%mf_getvarlen(work.smaller,missn)=3), + desc=Check missing numeric is 3 +) +%mp_assert( + iftrue=(%mf_getvarlen(work.smaller,special)=3), + desc=Check missing special numeric is 3 +) +%mp_assert( + iftrue=(%mf_getvarlen(work.smaller,missc)=1), + desc=Check missing char is 1 +)