mirror of
https://github.com/sasjs/core.git
synced 2026-01-08 01:50:05 +00:00
feat: mp_ds2squeeze macro
This commit is contained in:
@@ -29,7 +29,7 @@
|
|||||||
@li COMPARE - compare the current macro variables against previous values
|
@li COMPARE - compare the current macro variables against previous values
|
||||||
@param [in] scope= (GLOBAL) The scope of the variables to be checked. This
|
@param [in] scope= (GLOBAL) The scope of the variables to be checked. This
|
||||||
corresponds to the values in the SCOPE column in `sashelp.vmacro`.
|
corresponds to the values in the SCOPE column in `sashelp.vmacro`.
|
||||||
@param [in] desc= (Testing variable scope) The user provided test description
|
@param [in] desc= (Testing scope leakage) The user provided test description
|
||||||
@param [in,out] scopeds= (work.mp_assertscope) The dataset to contain the
|
@param [in,out] scopeds= (work.mp_assertscope) The dataset to contain the
|
||||||
scope snapshot
|
scope snapshot
|
||||||
@param [out] outds= (work.test_results) The output dataset to contain the
|
@param [out] outds= (work.test_results) The output dataset to contain the
|
||||||
@@ -51,7 +51,7 @@
|
|||||||
**/
|
**/
|
||||||
|
|
||||||
%macro mp_assertscope(action,
|
%macro mp_assertscope(action,
|
||||||
desc=0,
|
desc=Testing Scope Leakage,
|
||||||
scope=GLOBAL,
|
scope=GLOBAL,
|
||||||
scopeds=work.mp_assertscope,
|
scopeds=work.mp_assertscope,
|
||||||
outds=work.test_results
|
outds=work.test_results
|
||||||
|
|||||||
118
base/mp_ds2squeeze.sas
Normal file
118
base/mp_ds2squeeze.sas
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
/**
|
||||||
|
@file
|
||||||
|
@brief Create a smaller version of a dataset, without data loss
|
||||||
|
@details This macro will scan the input dataset and create a new one, that
|
||||||
|
has the minimum variable lengths needed to store the data without data loss.
|
||||||
|
|
||||||
|
Inspiration was taken from [How to Reduce the Disk Space Required by a
|
||||||
|
SAS® Data Set](https://www.lexjansen.com/nesug/nesug06/io/io18.pdf) by
|
||||||
|
Selvaratnam Sridharma. The end of the referenced paper presents a macro named
|
||||||
|
"squeeze", hence the nomenclature.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
data big;
|
||||||
|
length my big $32000;
|
||||||
|
do i=1 to 1e4;
|
||||||
|
my=repeat('oh my',100);
|
||||||
|
big='dawg';
|
||||||
|
special=._;
|
||||||
|
output;
|
||||||
|
end;
|
||||||
|
run;
|
||||||
|
|
||||||
|
%mp_ds2squeeze(work.big,outds=work.smaller)
|
||||||
|
|
||||||
|
The following will also be printed to the log (exact values may differ
|
||||||
|
depending on your OS and COMPRESS settings):
|
||||||
|
|
||||||
|
> MP_DS2SQUEEZE: work.big was 625MB
|
||||||
|
> MP_DS2SQUEEZE: work.smaller is 5MB
|
||||||
|
|
||||||
|
@param [in] libds The library.dataset to be squeezed
|
||||||
|
@param [out] outds= (work.mp_ds2squeeze) The squeezed dataset to create
|
||||||
|
@param [in] mdebug= (0) Set to 1 to enable DEBUG messages
|
||||||
|
|
||||||
|
<h4> SAS Macros </h4>
|
||||||
|
@li mf_getfilesize.sas
|
||||||
|
@li mf_getuniquefileref.sas
|
||||||
|
@li mf_getuniquename.sas
|
||||||
|
@li mp_getmaxvarlengths.sas
|
||||||
|
|
||||||
|
<h4> Related Programs </h4>
|
||||||
|
@li mp_ds2squeeze.test.sas
|
||||||
|
|
||||||
|
@version 9.3
|
||||||
|
@author Allan Bowe
|
||||||
|
**/
|
||||||
|
|
||||||
|
%macro mp_ds2squeeze(
|
||||||
|
libds,
|
||||||
|
outds=work.work.mp_ds2squeeze,
|
||||||
|
mdebug=0
|
||||||
|
)/*/STORE SOURCE*/;
|
||||||
|
%local dbg source;
|
||||||
|
%if &mdebug=1 %then %do;
|
||||||
|
%put &sysmacroname entry vars:;
|
||||||
|
%put _local_;
|
||||||
|
%end;
|
||||||
|
%else %do;
|
||||||
|
%let dbg=*;
|
||||||
|
%let source=/source2;
|
||||||
|
%end;
|
||||||
|
|
||||||
|
%local optval ds fref;
|
||||||
|
%let ds=%mf_getuniquename();
|
||||||
|
%let fref=%mf_getuniquefileref();
|
||||||
|
|
||||||
|
%mp_getmaxvarlengths(&libds,outds=&ds)
|
||||||
|
|
||||||
|
data _null_;
|
||||||
|
set &ds end=last;
|
||||||
|
file &fref;
|
||||||
|
/* grab the types */
|
||||||
|
retain dsid;
|
||||||
|
if _n_=1 then dsid=open("&libds",'is');
|
||||||
|
if dsid le 0 then do;
|
||||||
|
msg=sysmsg();
|
||||||
|
put msg=;
|
||||||
|
stop;
|
||||||
|
end;
|
||||||
|
type=vartype(dsid,varnum(dsid, name));
|
||||||
|
if last then rc=close(dsid);
|
||||||
|
/* write out the length statement */
|
||||||
|
if _n_=1 then put 'length ';
|
||||||
|
length len $6;
|
||||||
|
if type='C' then do;
|
||||||
|
if maxlen=0 then len='$1';
|
||||||
|
else len=cats('$',maxlen);
|
||||||
|
end;
|
||||||
|
else do;
|
||||||
|
if maxlen=0 then len='3';
|
||||||
|
else len=maxlen;
|
||||||
|
end;
|
||||||
|
put ' ' name ' ' len;
|
||||||
|
if last then put ';';
|
||||||
|
run;
|
||||||
|
|
||||||
|
/* configure varlenchk - as we are explicitly shortening the variables */
|
||||||
|
%let optval=%sysfunc(getoption(varlenchk));
|
||||||
|
options varlenchk=NOWARN;
|
||||||
|
|
||||||
|
data &outds;
|
||||||
|
%inc &fref &source;
|
||||||
|
set &libds;
|
||||||
|
run;
|
||||||
|
|
||||||
|
options varlenchk=&optval;
|
||||||
|
|
||||||
|
%if &mdebug=0 %then %do;
|
||||||
|
proc sql;
|
||||||
|
drop table &ds;
|
||||||
|
filename &fref clear;
|
||||||
|
%end;
|
||||||
|
|
||||||
|
%put &sysmacroname: &libds was %mf_getfilesize(libds=&libds,format=yes);
|
||||||
|
%put &sysmacroname: &outds is %mf_getfilesize(libds=&outds,format=yes);
|
||||||
|
|
||||||
|
%mend mp_ds2squeeze;
|
||||||
@@ -33,13 +33,15 @@
|
|||||||
%macro mp_init(prefix=SASJS
|
%macro mp_init(prefix=SASJS
|
||||||
)/*/STORE SOURCE*/;
|
)/*/STORE SOURCE*/;
|
||||||
|
|
||||||
|
%if %symexist(SASJS_PREFIX) %then %return; /* only run once */
|
||||||
|
|
||||||
%global
|
%global
|
||||||
SASJS_PREFIX /* the ONLY hard-coded global macro variable in SASjs */
|
SASJS_PREFIX /* the ONLY hard-coded global macro variable in SASjs */
|
||||||
&prefix._INIT_NUM /* initialisation time as numeric */
|
&prefix._INIT_NUM /* initialisation time as numeric */
|
||||||
&prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */
|
&prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */
|
||||||
&prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */
|
&prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */
|
||||||
;
|
;
|
||||||
%if %length(&sasjs_prefix>0) %then %return; /* only run once */
|
|
||||||
%let sasjs_prefix=&prefix;
|
%let sasjs_prefix=&prefix;
|
||||||
|
|
||||||
data _null_;
|
data _null_;
|
||||||
|
|||||||
43
tests/crossplatform/mp_ds2squeeze.test.sas
Normal file
43
tests/crossplatform/mp_ds2squeeze.test.sas
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
/**
|
||||||
|
@file
|
||||||
|
@brief Testing mp_ds2squeeze.sas macro
|
||||||
|
|
||||||
|
<h4> SAS Macros </h4>
|
||||||
|
@li mp_assert.sas
|
||||||
|
@li mp_assertscope.sas
|
||||||
|
@li mp_ds2squeeze.sas
|
||||||
|
|
||||||
|
**/
|
||||||
|
|
||||||
|
data big;
|
||||||
|
length my big $32000;
|
||||||
|
do i=1 to 1e4;
|
||||||
|
my=repeat('oh my',100);
|
||||||
|
big='dawg';
|
||||||
|
special=._;
|
||||||
|
missn=.;
|
||||||
|
missc='';
|
||||||
|
output;
|
||||||
|
end;
|
||||||
|
run;
|
||||||
|
|
||||||
|
%mp_assertscope(SNAPSHOT)
|
||||||
|
%mp_ds2squeeze(work.big,outds=work.smaller)
|
||||||
|
%mp_assertscope(COMPARE)
|
||||||
|
|
||||||
|
%mp_assert(
|
||||||
|
iftrue=(&syscc=0),
|
||||||
|
desc=Checking syscc
|
||||||
|
)
|
||||||
|
%mp_assert(
|
||||||
|
iftrue=(%mf_getvarlen(work.smaller,missn)=3),
|
||||||
|
desc=Check missing numeric is 3
|
||||||
|
)
|
||||||
|
%mp_assert(
|
||||||
|
iftrue=(%mf_getvarlen(work.smaller,special)=3),
|
||||||
|
desc=Check missing special numeric is 3
|
||||||
|
)
|
||||||
|
%mp_assert(
|
||||||
|
iftrue=(%mf_getvarlen(work.smaller,missc)=1),
|
||||||
|
desc=Check missing char is 1
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user