1
0
mirror of https://github.com/sasjs/core.git synced 2026-01-06 17:10:05 +00:00

chore(merge): merging with v4

Merge branch 'main' into issue144
This commit is contained in:
munja
2022-01-24 15:34:50 +01:00
25 changed files with 604 additions and 325 deletions

View File

@@ -11,7 +11,7 @@
@cond
**/
%macro mf_abort(mac=mf_abort.sas, type=deprecated, msg=, iftrue=%str(1=1)
%macro mf_abort(mac=mf_abort.sas, msg=, iftrue=%str(1=1)
)/*/STORE SOURCE*/;
%if not(%eval(%unquote(&iftrue))) %then %return;

View File

@@ -173,7 +173,8 @@
if symexist('_debug') then debug=quote(trim(symget('_debug')));
else debug='""';
put '>>weboutBEGIN<<';
put '{"START_DTTM" : "' "%sysfunc(datetime(),datetime20.3)" '"';
put '{"SYSDATE" : "' "&SYSDATE" '"';
put ',"SYSTIME" : "' "&SYSTIME" '"';
put ',"sasjsAbort" : [{';
put ' "MSG":' msg ;
put ' ,"MAC": "' "&mac" '"}]';
@@ -202,7 +203,7 @@
put ',"SYSVLONG" : ' sysvlong;
syswarningtext=quote(trim(symget('syswarningtext')));
put ",""SYSWARNINGTEXT"" : " syswarningtext;
put ',"END_DTTM" : "' "%sysfunc(datetime(),datetime20.3)" '" ';
put ',"END_DTTM" : "' "%sysfunc(datetime(),E8601DT26.6)" '" ';
put "}" @;
put '>>weboutEND<<';
run;

View File

@@ -32,7 +32,7 @@
@li COMPARE - compare the current macro variables against previous values
@param [in] scope= (GLOBAL) The scope of the variables to be checked. This
corresponds to the values in the SCOPE column in `sashelp.vmacro`.
@param [in] desc= (Testing variable scope) The user provided test description
@param [in] desc= (Testing scope leakage) The user provided test description
@param [in,out] scopeds= (work.mp_assertscope) The dataset to contain the
scope snapshot
@param [out] outds= (work.test_results) The output dataset to contain the
@@ -54,7 +54,7 @@
**/
%macro mp_assertscope(action,
desc=0,
desc=Testing Scope Leakage,
scope=GLOBAL,
scopeds=work.mp_assertscope,
outds=work.test_results

119
base/mp_ds2squeeze.sas Normal file
View File

@@ -0,0 +1,119 @@
/**
@file
@brief Create a smaller version of a dataset, without data loss
@details This macro will scan the input dataset and create a new one, that
has the minimum variable lengths needed to store the data without data loss.
Inspiration was taken from [How to Reduce the Disk Space Required by a
SAS® Data Set](https://www.lexjansen.com/nesug/nesug06/io/io18.pdf) by
Selvaratnam Sridharma. The end of the referenced paper presents a macro named
"squeeze", hence the nomenclature.
Usage:
data big;
length my big $32000;
do i=1 to 1e4;
my=repeat('oh my',100);
big='dawg';
special=._;
output;
end;
run;
%mp_ds2squeeze(work.big,outds=work.smaller)
The following will also be printed to the log (exact values may differ
depending on your OS and COMPRESS settings):
> MP_DS2SQUEEZE: work.big was 625MB
> MP_DS2SQUEEZE: work.smaller is 5MB
@param [in] libds The library.dataset to be squeezed
@param [out] outds= (work.mp_ds2squeeze) The squeezed dataset to create
@param [in] mdebug= (0) Set to 1 to enable DEBUG messages
<h4> SAS Macros </h4>
@li mf_getfilesize.sas
@li mf_getuniquefileref.sas
@li mf_getuniquename.sas
@li mp_getmaxvarlengths.sas
<h4> Related Programs </h4>
@li mp_ds2squeeze.test.sas
@version 9.3
@author Allan Bowe
**/
%macro mp_ds2squeeze(
libds,
outds=work.work.mp_ds2squeeze,
mdebug=0
)/*/STORE SOURCE*/;
%local dbg source;
%if &mdebug=1 %then %do;
%put &sysmacroname entry vars:;
%put _local_;
%end;
%else %do;
%let dbg=*;
%let source=/source2;
%end;
%local optval ds fref;
%let ds=%mf_getuniquename();
%let fref=%mf_getuniquefileref();
%mp_getmaxvarlengths(&libds,outds=&ds)
data _null_;
set &ds end=last;
file &fref;
/* grab the types */
retain dsid;
if _n_=1 then dsid=open("&libds",'is');
if dsid le 0 then do;
msg=sysmsg();
put msg=;
stop;
end;
type=vartype(dsid,varnum(dsid, name));
if last then rc=close(dsid);
/* write out the length statement */
if _n_=1 then put 'length ';
length len $6;
if type='C' then do;
if maxlen=0 then len='$1';
else len=cats('$',maxlen);
end;
else do;
if maxlen=0 then len='3';
else len=cats(maxlen);
end;
put ' ' name ' ' len;
if last then put ';';
run;
/* configure varlenchk - as we are explicitly shortening the variables */
%let optval=%sysfunc(getoption(varlenchk));
options varlenchk=NOWARN;
data &outds;
%inc &fref &source;
set &libds;
run;
options varlenchk=&optval;
%if &mdebug=0 %then %do;
proc sql;
drop table &ds;
filename &fref clear;
%end;
%put &sysmacroname: &libds was %mf_getfilesize(libds=&libds,format=yes);
%put &sysmacroname: &outds is %mf_getfilesize(libds=&outds,format=yes);
%mend mp_ds2squeeze;

View File

@@ -1,28 +1,46 @@
/**
@file mp_getmaxvarlengths.sas
@file
@brief Scans a dataset to find the max length of the variable values
@details
This macro will scan a base dataset and produce an output dataset with two
columns:
- NAME Name of the base dataset column
- MAXLEN Maximum length of the data contained therein.
- MAXLEN Maximum length of the data contained therein.
Character fields may be allocated very large widths (eg 32000) of which the
maximum value is likely to be much narrower. This macro was designed to
enable a HTML table to be appropriately sized however this could be used as
part of a data audit to ensure we aren't over-sizing our tables in relation to
the data therein.
Character fields are often allocated very large widths (eg 32000) of which the
maximum value is likely to be much narrower. Identifying such cases can be
helpful in the following scenarios:
@li Enabling a HTML table to be appropriately sized (`num2char=YES`)
@li Reducing the size of a dataset to save on storage (mp_ds2squeeze.sas)
@li Identifying columns containing nothing but missing values (`MAXLEN=0` in
the output table)
If the entire column is made up of (non-special) missing values then a value
of 0 is returned.
Numeric fields are converted using the relevant format to determine the width.
Usage:
%mp_getmaxvarlengths(sashelp.class,outds=work.myds)
@param libds Two part dataset (or view) reference.
@param outds= The output dataset to create
@param [in] libds Two part dataset (or view) reference.
@param [in] num2char= (NO) When set to NO, numeric fields are sized according
to the number of bytes used (or set to zero in the case of non-special
missings). When YES, the numeric field is converted to character (using the
format, if available), and that is sized instead, using `lengthn()`.
@param [out] outds= The output dataset to create, eg:
|NAME:$8.|MAXLEN:best.|
|---|---|
|`Name `|`7 `|
|`Sex `|`1 `|
|`Age `|`3 `|
|`Height `|`8 `|
|`Weight `|`3 `|
<h4> SAS Macros </h4>
@li mcf_length.sas
@li mf_getuniquename.sas
@li mf_getvarlist.sas
@li mf_getvartype.sas
@li mf_getvarformat.sas
@@ -30,20 +48,32 @@
@version 9.2
@author Allan Bowe
<h4> Related Macros </h4>
@li mp_ds2squeeze.sas
@li mp_getmaxvarlengths.test.sas
**/
%macro mp_getmaxvarlengths(
libds /* libref.dataset to analyse */
,outds=work.mp_getmaxvarlengths /* name of output dataset to create */
libds
,num2char=NO
,outds=work.mp_getmaxvarlengths
)/*/STORE SOURCE*/;
%local vars x var fmt;
%local vars prefix x var fmt;
%let vars=%mf_getvarlist(libds=&libds);
%let prefix=%substr(%mf_getuniquename(),1,25);
%let num2char=%upcase(&num2char);
%if &num2char=NO %then %do;
/* compile length function for numeric fields */
%mcf_length(wrap=YES, insert_cmplib=YES)
%end;
proc sql;
create table &outds (rename=(
%do x=1 %to %sysfunc(countw(&vars,%str( )));
________&x=%scan(&vars,&x)
&prefix.&x=%scan(&vars,&x)
%end;
))
as select
@@ -51,18 +81,21 @@ create table &outds (rename=(
%let var=%scan(&vars,&x);
%if &x>1 %then ,;
%if %mf_getvartype(&libds,&var)=C %then %do;
max(length(&var)) as ________&x
max(lengthn(&var)) as &prefix.&x
%end;
%else %do;
%else %if &num2char=YES %then %do;
%let fmt=%mf_getvarformat(&libds,&var);
%put fmt=&fmt;
%if %str(&fmt)=%str() %then %do;
max(length(cats(&var))) as ________&x
max(lengthn(cats(&var))) as &prefix.&x
%end;
%else %do;
max(length(put(&var,&fmt))) as ________&x
max(lengthn(put(&var,&fmt))) as &prefix.&x
%end;
%end;
%else %do;
max(mcf_length(&var)) as &prefix.&x
%end;
%end;
from &libds;

View File

@@ -33,37 +33,41 @@
%macro mp_init(prefix=SASJS
)/*/STORE SOURCE*/;
%global
&prefix._INIT_NUM /* initialisation time as numeric */
&prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */
&prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */
;
%if %eval(&&&prefix._INIT_NUM>0) %then %return; /* only run once */
%if %symexist(SASJS_PREFIX) %then %return; /* only run once */
data _null_;
dttm=datetime();
call symputx("&prefix._init_num",dttm,'g');
call symputx("&prefix._init_dttm",put(dttm,E8601DT26.6),'g');
call symputx("&prefix.work",pathname('WORK'),'g');
run;
%global
SASJS_PREFIX /* the ONLY hard-coded global macro variable in SASjs */
&prefix._INIT_NUM /* initialisation time as numeric */
&prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */
&prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */
;
options
noautocorrect /* disallow misspelled procedure names */
compress=CHAR /* default is none so ensure we have something! */
datastmtchk=ALLKEYWORDS /* protection from overwriting input datasets */
dsoptions=note2err /* undocumented - convert bad NOTEs to ERRs */
%str(err)orcheck=STRICT /* catch errs in libname/filename statements */
fmterr /* ensure err when a format cannot be found */
mergenoby=%str(ERR)OR /* throw err when a merge has no BY variables */
missing=. /* changing this can cause hard to detect errs */
noquotelenmax /* avoid warnings for long strings */
noreplace /* avoid overwriting permanent datasets */
ps=max /* reduce log size slightly */
ls=max /* reduce log even more and avoid word truncation */
validmemname=COMPATIBLE /* avoid special characters etc in table names */
validvarname=V7 /* avoid special characters etc in variable names */
varinitchk=%str(ERR)OR /* avoid data mistakes from variable name typos */
varlenchk=%str(ERR)OR /* fail hard if truncation (data loss) can result */
;
%let sasjs_prefix=&prefix;
data _null_;
dttm=datetime();
call symputx("&sasjs_prefix._init_num",dttm,'g');
call symputx("&sasjs_prefix._init_dttm",put(dttm,E8601DT26.6),'g');
call symputx("&sasjs_prefix.work",pathname('WORK'),'g');
run;
options
noautocorrect /* disallow misspelled procedure names */
compress=CHAR /* default is none so ensure we have something! */
datastmtchk=ALLKEYWORDS /* protection from overwriting input datasets */
dsoptions=note2err /* undocumented - convert bad NOTEs to ERRs */
%str(err)orcheck=STRICT /* catch errs in libname/filename statements */
fmterr /* ensure err when a format cannot be found */
mergenoby=%str(ERR)OR /* throw err when a merge has no BY variables */
missing=. /* changing this can cause hard to detect errs */
noquotelenmax /* avoid warnings for long strings */
noreplace /* avoid overwriting permanent datasets */
ps=max /* reduce log size slightly */
ls=max /* reduce log even more and avoid word truncation */
validmemname=COMPATIBLE /* avoid special characters etc in table names */
validvarname=V7 /* avoid special characters etc in variable names */
varinitchk=%str(ERR)OR /* avoid data mistakes from variable name typos */
varlenchk=%str(ERR)OR /* fail hard if truncation (data loss) can result */
;
%mend mp_init;

View File

@@ -62,7 +62,6 @@
%macro mp_jsonout(action,ds,jref=_webout,dslabel=,fmt=Y
,engine=DATASTEP
,dbg=0 /* DEPRECATED */
,missing=NULL
,showmeta=NO
)/*/STORE SOURCE*/;