mirror of
https://github.com/sasjs/core.git
synced 2025-12-11 06:24:35 +00:00
feat: adding mcf_length to mp_getmaxvarlengths
BREAKING CHANGE: mp_getmaxvarlengths now returns 0 for non-special missings, and will use numeric length (as opposed to cast-to-character length) by default
This commit is contained in:
@@ -167,7 +167,7 @@ SAS code can contain one of two types of dependency - SAS Macros, and SAS Includ
|
||||
@li someprogram.sas FREFTWO
|
||||
```
|
||||
|
||||
The CLI can then extract all the dependencies and insert as precode (SAS Macros) or in a temp engine fileref (SAS Includes) when creating SAS Jobs and Services.
|
||||
The CLI can then extract all the dependencies and insert as precode (SAS Macros) or in a temp engine fileref (SAS Includes) when creating SAS Jobs and Services (and Tests).
|
||||
|
||||
When contributing to this library, it is therefore important to ensure that all dependencies are listed in the header in this format.
|
||||
|
||||
@@ -183,6 +183,7 @@ When contributing to this library, it is therefore important to ensure that all
|
||||
- Mandatory parameters should be positional, all optional parameters should be keyword (var=) style.
|
||||
- All dataset references must be 2 level (eg `work.blah`, not `blah`). This is to avoid contention when options [DATASTMTCHK](https://support.sas.com/documentation/cdl/en/lrdict/64316/HTML/default/viewer.htm#a000279064.htm)=ALLKEYWORDS is in effect.
|
||||
- Avoid naming collisions! All macro variables should be local scope. Use system generated work tables where possible - eg `data ; set sashelp.class; run; data &output; set &syslast; run;`
|
||||
- Where global macro variables are absolutely necessary, they should make use of `&sasjs_prefix` - see mp_init.sas
|
||||
- The use of `quit;` for `proc sql` is optional unless you are looking to benefit from the timing statistics.
|
||||
- Use [sasjs lint](https://github.com/sasjs/lint)!
|
||||
|
||||
@@ -192,9 +193,9 @@ When contributing to this library, it is therefore important to ensure that all
|
||||
|
||||
## Breaking Changes
|
||||
|
||||
We are currently on major release v3. The following changes are planned when the next major (breaking) release becomes necessary:
|
||||
We are currently on major release v4. The following changes are planned when the next major (breaking) release becomes necessary:
|
||||
|
||||
* Remove `dbg` parameter from mp_jsonout.sas (implement mdebug instead)
|
||||
* (None as yet)
|
||||
|
||||
## Star Gazing
|
||||
|
||||
|
||||
@@ -1,28 +1,46 @@
|
||||
/**
|
||||
@file mp_getmaxvarlengths.sas
|
||||
@file
|
||||
@brief Scans a dataset to find the max length of the variable values
|
||||
@details
|
||||
This macro will scan a base dataset and produce an output dataset with two
|
||||
columns:
|
||||
|
||||
- NAME Name of the base dataset column
|
||||
- MAXLEN Maximum length of the data contained therein.
|
||||
- MAXLEN Maximum length of the data contained therein.
|
||||
|
||||
Character fields may be allocated very large widths (eg 32000) of which the
|
||||
maximum value is likely to be much narrower. This macro was designed to
|
||||
enable a HTML table to be appropriately sized however this could be used as
|
||||
part of a data audit to ensure we aren't over-sizing our tables in relation to
|
||||
the data therein.
|
||||
Character fields are often allocated very large widths (eg 32000) of which the
|
||||
maximum value is likely to be much narrower. Identifying such cases can be
|
||||
helpful in the following scenarios:
|
||||
|
||||
@li Enabling a HTML table to be appropriately sized (`num2char=YES`)
|
||||
@li Reducing the size of a dataset to save on storage (mp_ds2squeeze.sas)
|
||||
@li Identifying columns containing nothing but missing values (`MAXLEN=0` in
|
||||
the output table)
|
||||
|
||||
If the entire column is made up of (non-special) missing values then a value
|
||||
of 0 is returned.
|
||||
|
||||
Numeric fields are converted using the relevant format to determine the width.
|
||||
Usage:
|
||||
|
||||
%mp_getmaxvarlengths(sashelp.class,outds=work.myds)
|
||||
|
||||
@param libds Two part dataset (or view) reference.
|
||||
@param outds= The output dataset to create
|
||||
@param [in] libds Two part dataset (or view) reference.
|
||||
@param [in] num2char= (NO) When set to NO, numeric fields are sized according
|
||||
to the number of bytes used (or set to zero in the case of non-special
|
||||
missings). When YES, the numeric field is converted to character (using the
|
||||
format, if available), and that is sized instead, using `lengthn()`.
|
||||
@param [out] outds= The output dataset to create, eg:
|
||||
|NAME:$8.|MAXLEN:best.|
|
||||
|---|---|
|
||||
|`Name `|`7 `|
|
||||
|`Sex `|`1 `|
|
||||
|`Age `|`3 `|
|
||||
|`Height `|`8 `|
|
||||
|`Weight `|`3 `|
|
||||
|
||||
<h4> SAS Macros </h4>
|
||||
@li mcf_length.sas
|
||||
@li mf_getuniquename.sas
|
||||
@li mf_getvarlist.sas
|
||||
@li mf_getvartype.sas
|
||||
@li mf_getvarformat.sas
|
||||
@@ -30,20 +48,32 @@
|
||||
@version 9.2
|
||||
@author Allan Bowe
|
||||
|
||||
<h4> Related Macros </h4>
|
||||
@li mp_ds2squeeze.sas
|
||||
@li mp_getmaxvarlengths.test.sas
|
||||
|
||||
**/
|
||||
|
||||
%macro mp_getmaxvarlengths(
|
||||
libds /* libref.dataset to analyse */
|
||||
,outds=work.mp_getmaxvarlengths /* name of output dataset to create */
|
||||
libds
|
||||
,num2char=NO
|
||||
,outds=work.mp_getmaxvarlengths
|
||||
)/*/STORE SOURCE*/;
|
||||
|
||||
%local vars x var fmt;
|
||||
%local vars prefix x var fmt;
|
||||
%let vars=%mf_getvarlist(libds=&libds);
|
||||
%let prefix=%substr(%mf_getuniquename(),1,25);
|
||||
%let num2char=%upcase(&num2char);
|
||||
|
||||
%if &num2char=NO %then %do;
|
||||
/* compile length function for numeric fields */
|
||||
%mcf_length(wrap=YES, insert_cmplib=YES)
|
||||
%end;
|
||||
|
||||
proc sql;
|
||||
create table &outds (rename=(
|
||||
%do x=1 %to %sysfunc(countw(&vars,%str( )));
|
||||
________&x=%scan(&vars,&x)
|
||||
&prefix.&x=%scan(&vars,&x)
|
||||
%end;
|
||||
))
|
||||
as select
|
||||
@@ -51,18 +81,21 @@ create table &outds (rename=(
|
||||
%let var=%scan(&vars,&x);
|
||||
%if &x>1 %then ,;
|
||||
%if %mf_getvartype(&libds,&var)=C %then %do;
|
||||
max(length(&var)) as ________&x
|
||||
max(lengthn(&var)) as &prefix.&x
|
||||
%end;
|
||||
%else %do;
|
||||
%else %if &num2char=YES %then %do;
|
||||
%let fmt=%mf_getvarformat(&libds,&var);
|
||||
%put fmt=&fmt;
|
||||
%if %str(&fmt)=%str() %then %do;
|
||||
max(length(cats(&var))) as ________&x
|
||||
max(lengthn(cats(&var))) as &prefix.&x
|
||||
%end;
|
||||
%else %do;
|
||||
max(length(put(&var,&fmt))) as ________&x
|
||||
max(lengthn(put(&var,&fmt))) as &prefix.&x
|
||||
%end;
|
||||
%end;
|
||||
%else %do;
|
||||
max(mcf_length(&var)) as &prefix.&x
|
||||
%end;
|
||||
%end;
|
||||
from &libds;
|
||||
|
||||
|
||||
@@ -33,37 +33,39 @@
|
||||
%macro mp_init(prefix=SASJS
|
||||
)/*/STORE SOURCE*/;
|
||||
|
||||
%global
|
||||
&prefix._INIT_NUM /* initialisation time as numeric */
|
||||
&prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */
|
||||
&prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */
|
||||
;
|
||||
%if %eval(&&&prefix._INIT_NUM>0) %then %return; /* only run once */
|
||||
%global
|
||||
SASJS_PREFIX /* the ONLY hard-coded global macro variable in SASjs */
|
||||
&prefix._INIT_NUM /* initialisation time as numeric */
|
||||
&prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */
|
||||
&prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */
|
||||
;
|
||||
%if %length(&sasjs_prefix>0) %then %return; /* only run once */
|
||||
%let sasjs_prefix=&prefix;
|
||||
|
||||
data _null_;
|
||||
dttm=datetime();
|
||||
call symputx("&prefix._init_num",dttm,'g');
|
||||
call symputx("&prefix._init_dttm",put(dttm,E8601DT26.6),'g');
|
||||
call symputx("&prefix.work",pathname('WORK'),'g');
|
||||
run;
|
||||
data _null_;
|
||||
dttm=datetime();
|
||||
call symputx("&sasjs_prefix._init_num",dttm,'g');
|
||||
call symputx("&sasjs_prefix._init_dttm",put(dttm,E8601DT26.6),'g');
|
||||
call symputx("&sasjs_prefix.work",pathname('WORK'),'g');
|
||||
run;
|
||||
|
||||
options
|
||||
noautocorrect /* disallow misspelled procedure names */
|
||||
compress=CHAR /* default is none so ensure we have something! */
|
||||
datastmtchk=ALLKEYWORDS /* protection from overwriting input datasets */
|
||||
dsoptions=note2err /* undocumented - convert bad NOTEs to ERRs */
|
||||
%str(err)orcheck=STRICT /* catch errs in libname/filename statements */
|
||||
fmterr /* ensure err when a format cannot be found */
|
||||
mergenoby=%str(ERR)OR /* throw err when a merge has no BY variables */
|
||||
missing=. /* changing this can cause hard to detect errs */
|
||||
noquotelenmax /* avoid warnings for long strings */
|
||||
noreplace /* avoid overwriting permanent datasets */
|
||||
ps=max /* reduce log size slightly */
|
||||
ls=max /* reduce log even more and avoid word truncation */
|
||||
validmemname=COMPATIBLE /* avoid special characters etc in table names */
|
||||
validvarname=V7 /* avoid special characters etc in variable names */
|
||||
varinitchk=%str(ERR)OR /* avoid data mistakes from variable name typos */
|
||||
varlenchk=%str(ERR)OR /* fail hard if truncation (data loss) can result */
|
||||
;
|
||||
options
|
||||
noautocorrect /* disallow misspelled procedure names */
|
||||
compress=CHAR /* default is none so ensure we have something! */
|
||||
datastmtchk=ALLKEYWORDS /* protection from overwriting input datasets */
|
||||
dsoptions=note2err /* undocumented - convert bad NOTEs to ERRs */
|
||||
%str(err)orcheck=STRICT /* catch errs in libname/filename statements */
|
||||
fmterr /* ensure err when a format cannot be found */
|
||||
mergenoby=%str(ERR)OR /* throw err when a merge has no BY variables */
|
||||
missing=. /* changing this can cause hard to detect errs */
|
||||
noquotelenmax /* avoid warnings for long strings */
|
||||
noreplace /* avoid overwriting permanent datasets */
|
||||
ps=max /* reduce log size slightly */
|
||||
ls=max /* reduce log even more and avoid word truncation */
|
||||
validmemname=COMPATIBLE /* avoid special characters etc in table names */
|
||||
validvarname=V7 /* avoid special characters etc in variable names */
|
||||
varinitchk=%str(ERR)OR /* avoid data mistakes from variable name typos */
|
||||
varlenchk=%str(ERR)OR /* fail hard if truncation (data loss) can result */
|
||||
;
|
||||
|
||||
%mend mp_init;
|
||||
@@ -39,6 +39,9 @@
|
||||
@param [out] pkg= (utils) The output package in which to create the function.
|
||||
Uses a 3 part format: libref.catalog.package
|
||||
|
||||
<h4> SAS Macros </h4>
|
||||
@li mf_existfunction.sas
|
||||
|
||||
<h4> Related Macros </h4>
|
||||
@li mcf_length.test.sas
|
||||
|
||||
@@ -51,13 +54,15 @@
|
||||
,pkg=UTILS
|
||||
)/*/STORE SOURCE*/;
|
||||
|
||||
%if %mf_existfunction(mcf_length)=1 %then %return;
|
||||
|
||||
%if &wrap=YES %then %do;
|
||||
proc fcmp outlib=&lib..&cat..&pkg;
|
||||
%end;
|
||||
|
||||
function mcf_length(var);
|
||||
if missing(var) then len=0;
|
||||
else if trunc(var,3)=var then len=3;
|
||||
if var=. then len=0;
|
||||
else if missing(var) or trunc(var,3)=var then len=3;
|
||||
else if trunc(var,4)=var then len=4;
|
||||
else if trunc(var,5)=var then len=5;
|
||||
else if trunc(var,6)=var then len=6;
|
||||
|
||||
@@ -39,6 +39,9 @@
|
||||
@param [out] pkg= (utils) The output package in which to create the function.
|
||||
Uses a 3 part format: libref.catalog.package
|
||||
|
||||
<h4> SAS Macros </h4>
|
||||
@li mf_existfunction.sas
|
||||
|
||||
**/
|
||||
|
||||
%macro mcf_string2file(wrap=NO
|
||||
@@ -48,6 +51,8 @@
|
||||
,pkg=UTILS
|
||||
)/*/STORE SOURCE*/;
|
||||
|
||||
%if %mf_existfunction(mcf_string2file)=1 %then %return;
|
||||
|
||||
%if &wrap=YES %then %do;
|
||||
proc fcmp outlib=&lib..&cat..&pkg;
|
||||
%end;
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
|
||||
data test;
|
||||
call symputx('null',mcf_length(.));
|
||||
call symputx('special',mcf_length(._))
|
||||
call symputx('three',mcf_length(1));
|
||||
call symputx('four',mcf_length(10000000));
|
||||
call symputx('five',mcf_length(12345678));
|
||||
@@ -24,6 +25,10 @@ run;
|
||||
iftrue=(%str(&null)=%str(0)),
|
||||
desc=Check if NULL returns 0
|
||||
)
|
||||
%mp_assert(
|
||||
iftrue=(%str(&special)=%str(3)),
|
||||
desc=Check if special missing ._ returns 3
|
||||
)
|
||||
%mp_assert(
|
||||
iftrue=(%str(&three)=%str(3)),
|
||||
desc=Check for length 3
|
||||
@@ -47,4 +52,16 @@ run;
|
||||
%mp_assert(
|
||||
iftrue=(%str(&eight)=%str(8)),
|
||||
desc=Check for length 8
|
||||
)
|
||||
%mp_assert(
|
||||
iftrue=(&syscc=0),
|
||||
desc=Check syscc=0 before re-initialisation
|
||||
)
|
||||
|
||||
/* test 2 - compile again test for warnings */
|
||||
%mcf_length(wrap=YES, insert_cmplib=YES)
|
||||
|
||||
%mp_assert(
|
||||
iftrue=(&syscc=0),
|
||||
desc=Check syscc=0 after re-initialisation
|
||||
)
|
||||
80
tests/crossplatform/mp_getmaxvarlengths.test.sas
Normal file
80
tests/crossplatform/mp_getmaxvarlengths.test.sas
Normal file
@@ -0,0 +1,80 @@
|
||||
/**
|
||||
@file
|
||||
@brief Testing mp_getmaxvarlengths macro
|
||||
|
||||
<h4> SAS Macros </h4>
|
||||
@li mp_getmaxvarlengths.sas
|
||||
@li mp_assert.sas
|
||||
@li mp_assertdsobs.sas
|
||||
@li mp_assertscope.sas
|
||||
|
||||
**/
|
||||
|
||||
|
||||
/* regular usage */
|
||||
%mp_assertscope(SNAPSHOT)
|
||||
%mp_getmaxvarlengths(sashelp.class,outds=work.myds)
|
||||
%mp_assertscope(COMPARE,desc=checking scope leakage on mp_getmaxvarlengths)
|
||||
%mp_assert(
|
||||
iftrue=(&syscc=0),
|
||||
desc=No errs
|
||||
)
|
||||
%mp_assertdsobs(work.myds,
|
||||
desc=Has 5 records,
|
||||
test=EQUALS 5
|
||||
)
|
||||
data work.errs;
|
||||
set work.myds;
|
||||
if name='Name' and maxlen ne 7 then output;
|
||||
if name='Sex' and maxlen ne 1 then output;
|
||||
if name='Age' and maxlen ne 3 then output;
|
||||
if name='Height' and maxlen ne 8 then output;
|
||||
if name='Weight' and maxlen ne 3 then output;
|
||||
run;
|
||||
data _null_;
|
||||
set work.errs;
|
||||
putlog (_all_)(=);
|
||||
run;
|
||||
|
||||
%mp_assertdsobs(work.errs,
|
||||
desc=Err table has 0 records,
|
||||
test=EQUALS 0
|
||||
)
|
||||
|
||||
/* test2 */
|
||||
data work.test2;
|
||||
length a 3 b 5;
|
||||
a=1/3;
|
||||
b=1/3;
|
||||
c=1/3;
|
||||
d=._;
|
||||
e=.;
|
||||
output;
|
||||
output;
|
||||
run;
|
||||
%mp_getmaxvarlengths(work.test2,outds=work.myds2)
|
||||
%mp_assert(
|
||||
iftrue=(&syscc=0),
|
||||
desc=No errs in second test (with nulls)
|
||||
)
|
||||
%mp_assertdsobs(work.myds2,
|
||||
desc=Has 5 records,
|
||||
test=EQUALS 5
|
||||
)
|
||||
data work.errs2;
|
||||
set work.myds2;
|
||||
if name='a' and maxlen ne 3 then output;
|
||||
if name='b' and maxlen ne 5 then output;
|
||||
if name='c' and maxlen ne 8 then output;
|
||||
if name='d' and maxlen ne 3 then output;
|
||||
if name='e' and maxlen ne 0 then output;
|
||||
run;
|
||||
data _null_;
|
||||
set work.errs2;
|
||||
putlog (_all_)(=);
|
||||
run;
|
||||
|
||||
%mp_assertdsobs(work.errs2,
|
||||
desc=Err table has 0 records,
|
||||
test=EQUALS 0
|
||||
)
|
||||
Reference in New Issue
Block a user