1
0
mirror of https://github.com/sasjs/core.git synced 2026-01-16 04:50:05 +00:00

feat: adding mcf_length to mp_getmaxvarlengths

BREAKING CHANGE: mp_getmaxvarlengths now returns 0 for non-special missings, and will use numeric length (as opposed to cast-to-character length) by default
This commit is contained in:
munja
2022-01-23 23:26:10 +01:00
parent f7fac50108
commit 142b46570d
7 changed files with 196 additions and 53 deletions

View File

@@ -167,7 +167,7 @@ SAS code can contain one of two types of dependency - SAS Macros, and SAS Includ
@li someprogram.sas FREFTWO @li someprogram.sas FREFTWO
``` ```
The CLI can then extract all the dependencies and insert as precode (SAS Macros) or in a temp engine fileref (SAS Includes) when creating SAS Jobs and Services. The CLI can then extract all the dependencies and insert as precode (SAS Macros) or in a temp engine fileref (SAS Includes) when creating SAS Jobs and Services (and Tests).
When contributing to this library, it is therefore important to ensure that all dependencies are listed in the header in this format. When contributing to this library, it is therefore important to ensure that all dependencies are listed in the header in this format.
@@ -183,6 +183,7 @@ When contributing to this library, it is therefore important to ensure that all
- Mandatory parameters should be positional, all optional parameters should be keyword (var=) style. - Mandatory parameters should be positional, all optional parameters should be keyword (var=) style.
- All dataset references must be 2 level (eg `work.blah`, not `blah`). This is to avoid contention when options [DATASTMTCHK](https://support.sas.com/documentation/cdl/en/lrdict/64316/HTML/default/viewer.htm#a000279064.htm)=ALLKEYWORDS is in effect. - All dataset references must be 2 level (eg `work.blah`, not `blah`). This is to avoid contention when options [DATASTMTCHK](https://support.sas.com/documentation/cdl/en/lrdict/64316/HTML/default/viewer.htm#a000279064.htm)=ALLKEYWORDS is in effect.
- Avoid naming collisions! All macro variables should be local scope. Use system generated work tables where possible - eg `data ; set sashelp.class; run; data &output; set &syslast; run;` - Avoid naming collisions! All macro variables should be local scope. Use system generated work tables where possible - eg `data ; set sashelp.class; run; data &output; set &syslast; run;`
- Where global macro variables are absolutely necessary, they should make use of `&sasjs_prefix` - see mp_init.sas
- The use of `quit;` for `proc sql` is optional unless you are looking to benefit from the timing statistics. - The use of `quit;` for `proc sql` is optional unless you are looking to benefit from the timing statistics.
- Use [sasjs lint](https://github.com/sasjs/lint)! - Use [sasjs lint](https://github.com/sasjs/lint)!
@@ -192,9 +193,9 @@ When contributing to this library, it is therefore important to ensure that all
## Breaking Changes ## Breaking Changes
We are currently on major release v3. The following changes are planned when the next major (breaking) release becomes necessary: We are currently on major release v4. The following changes are planned when the next major (breaking) release becomes necessary:
* Remove `dbg` parameter from mp_jsonout.sas (implement mdebug instead) * (None as yet)
## Star Gazing ## Star Gazing

View File

@@ -1,28 +1,46 @@
/** /**
@file mp_getmaxvarlengths.sas @file
@brief Scans a dataset to find the max length of the variable values @brief Scans a dataset to find the max length of the variable values
@details @details
This macro will scan a base dataset and produce an output dataset with two This macro will scan a base dataset and produce an output dataset with two
columns: columns:
- NAME Name of the base dataset column - NAME Name of the base dataset column
- MAXLEN Maximum length of the data contained therein. - MAXLEN Maximum length of the data contained therein.
Character fields may be allocated very large widths (eg 32000) of which the Character fields are often allocated very large widths (eg 32000) of which the
maximum value is likely to be much narrower. This macro was designed to maximum value is likely to be much narrower. Identifying such cases can be
enable a HTML table to be appropriately sized however this could be used as helpful in the following scenarios:
part of a data audit to ensure we aren't over-sizing our tables in relation to
the data therein. @li Enabling a HTML table to be appropriately sized (`num2char=YES`)
@li Reducing the size of a dataset to save on storage (mp_ds2squeeze.sas)
@li Identifying columns containing nothing but missing values (`MAXLEN=0` in
the output table)
If the entire column is made up of (non-special) missing values then a value
of 0 is returned.
Numeric fields are converted using the relevant format to determine the width.
Usage: Usage:
%mp_getmaxvarlengths(sashelp.class,outds=work.myds) %mp_getmaxvarlengths(sashelp.class,outds=work.myds)
@param libds Two part dataset (or view) reference. @param [in] libds Two part dataset (or view) reference.
@param outds= The output dataset to create @param [in] num2char= (NO) When set to NO, numeric fields are sized according
to the number of bytes used (or set to zero in the case of non-special
missings). When YES, the numeric field is converted to character (using the
format, if available), and that is sized instead, using `lengthn()`.
@param [out] outds= The output dataset to create, eg:
|NAME:$8.|MAXLEN:best.|
|---|---|
|`Name `|`7 `|
|`Sex `|`1 `|
|`Age `|`3 `|
|`Height `|`8 `|
|`Weight `|`3 `|
<h4> SAS Macros </h4> <h4> SAS Macros </h4>
@li mcf_length.sas
@li mf_getuniquename.sas
@li mf_getvarlist.sas @li mf_getvarlist.sas
@li mf_getvartype.sas @li mf_getvartype.sas
@li mf_getvarformat.sas @li mf_getvarformat.sas
@@ -30,20 +48,32 @@
@version 9.2 @version 9.2
@author Allan Bowe @author Allan Bowe
<h4> Related Macros </h4>
@li mp_ds2squeeze.sas
@li mp_getmaxvarlengths.test.sas
**/ **/
%macro mp_getmaxvarlengths( %macro mp_getmaxvarlengths(
libds /* libref.dataset to analyse */ libds
,outds=work.mp_getmaxvarlengths /* name of output dataset to create */ ,num2char=NO
,outds=work.mp_getmaxvarlengths
)/*/STORE SOURCE*/; )/*/STORE SOURCE*/;
%local vars x var fmt; %local vars prefix x var fmt;
%let vars=%mf_getvarlist(libds=&libds); %let vars=%mf_getvarlist(libds=&libds);
%let prefix=%substr(%mf_getuniquename(),1,25);
%let num2char=%upcase(&num2char);
%if &num2char=NO %then %do;
/* compile length function for numeric fields */
%mcf_length(wrap=YES, insert_cmplib=YES)
%end;
proc sql; proc sql;
create table &outds (rename=( create table &outds (rename=(
%do x=1 %to %sysfunc(countw(&vars,%str( ))); %do x=1 %to %sysfunc(countw(&vars,%str( )));
________&x=%scan(&vars,&x) &prefix.&x=%scan(&vars,&x)
%end; %end;
)) ))
as select as select
@@ -51,18 +81,21 @@ create table &outds (rename=(
%let var=%scan(&vars,&x); %let var=%scan(&vars,&x);
%if &x>1 %then ,; %if &x>1 %then ,;
%if %mf_getvartype(&libds,&var)=C %then %do; %if %mf_getvartype(&libds,&var)=C %then %do;
max(length(&var)) as ________&x max(lengthn(&var)) as &prefix.&x
%end; %end;
%else %do; %else %if &num2char=YES %then %do;
%let fmt=%mf_getvarformat(&libds,&var); %let fmt=%mf_getvarformat(&libds,&var);
%put fmt=&fmt; %put fmt=&fmt;
%if %str(&fmt)=%str() %then %do; %if %str(&fmt)=%str() %then %do;
max(length(cats(&var))) as ________&x max(lengthn(cats(&var))) as &prefix.&x
%end; %end;
%else %do; %else %do;
max(length(put(&var,&fmt))) as ________&x max(lengthn(put(&var,&fmt))) as &prefix.&x
%end; %end;
%end; %end;
%else %do;
max(mcf_length(&var)) as &prefix.&x
%end;
%end; %end;
from &libds; from &libds;

View File

@@ -33,37 +33,39 @@
%macro mp_init(prefix=SASJS %macro mp_init(prefix=SASJS
)/*/STORE SOURCE*/; )/*/STORE SOURCE*/;
%global %global
&prefix._INIT_NUM /* initialisation time as numeric */ SASJS_PREFIX /* the ONLY hard-coded global macro variable in SASjs */
&prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */ &prefix._INIT_NUM /* initialisation time as numeric */
&prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */ &prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */
; &prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */
%if %eval(&&&prefix._INIT_NUM>0) %then %return; /* only run once */ ;
%if %length(&sasjs_prefix>0) %then %return; /* only run once */
%let sasjs_prefix=&prefix;
data _null_; data _null_;
dttm=datetime(); dttm=datetime();
call symputx("&prefix._init_num",dttm,'g'); call symputx("&sasjs_prefix._init_num",dttm,'g');
call symputx("&prefix._init_dttm",put(dttm,E8601DT26.6),'g'); call symputx("&sasjs_prefix._init_dttm",put(dttm,E8601DT26.6),'g');
call symputx("&prefix.work",pathname('WORK'),'g'); call symputx("&sasjs_prefix.work",pathname('WORK'),'g');
run; run;
options options
noautocorrect /* disallow misspelled procedure names */ noautocorrect /* disallow misspelled procedure names */
compress=CHAR /* default is none so ensure we have something! */ compress=CHAR /* default is none so ensure we have something! */
datastmtchk=ALLKEYWORDS /* protection from overwriting input datasets */ datastmtchk=ALLKEYWORDS /* protection from overwriting input datasets */
dsoptions=note2err /* undocumented - convert bad NOTEs to ERRs */ dsoptions=note2err /* undocumented - convert bad NOTEs to ERRs */
%str(err)orcheck=STRICT /* catch errs in libname/filename statements */ %str(err)orcheck=STRICT /* catch errs in libname/filename statements */
fmterr /* ensure err when a format cannot be found */ fmterr /* ensure err when a format cannot be found */
mergenoby=%str(ERR)OR /* throw err when a merge has no BY variables */ mergenoby=%str(ERR)OR /* throw err when a merge has no BY variables */
missing=. /* changing this can cause hard to detect errs */ missing=. /* changing this can cause hard to detect errs */
noquotelenmax /* avoid warnings for long strings */ noquotelenmax /* avoid warnings for long strings */
noreplace /* avoid overwriting permanent datasets */ noreplace /* avoid overwriting permanent datasets */
ps=max /* reduce log size slightly */ ps=max /* reduce log size slightly */
ls=max /* reduce log even more and avoid word truncation */ ls=max /* reduce log even more and avoid word truncation */
validmemname=COMPATIBLE /* avoid special characters etc in table names */ validmemname=COMPATIBLE /* avoid special characters etc in table names */
validvarname=V7 /* avoid special characters etc in variable names */ validvarname=V7 /* avoid special characters etc in variable names */
varinitchk=%str(ERR)OR /* avoid data mistakes from variable name typos */ varinitchk=%str(ERR)OR /* avoid data mistakes from variable name typos */
varlenchk=%str(ERR)OR /* fail hard if truncation (data loss) can result */ varlenchk=%str(ERR)OR /* fail hard if truncation (data loss) can result */
; ;
%mend mp_init; %mend mp_init;

View File

@@ -39,6 +39,9 @@
@param [out] pkg= (utils) The output package in which to create the function. @param [out] pkg= (utils) The output package in which to create the function.
Uses a 3 part format: libref.catalog.package Uses a 3 part format: libref.catalog.package
<h4> SAS Macros </h4>
@li mf_existfunction.sas
<h4> Related Macros </h4> <h4> Related Macros </h4>
@li mcf_length.test.sas @li mcf_length.test.sas
@@ -51,13 +54,15 @@
,pkg=UTILS ,pkg=UTILS
)/*/STORE SOURCE*/; )/*/STORE SOURCE*/;
%if %mf_existfunction(mcf_length)=1 %then %return;
%if &wrap=YES %then %do; %if &wrap=YES %then %do;
proc fcmp outlib=&lib..&cat..&pkg; proc fcmp outlib=&lib..&cat..&pkg;
%end; %end;
function mcf_length(var); function mcf_length(var);
if missing(var) then len=0; if var=. then len=0;
else if trunc(var,3)=var then len=3; else if missing(var) or trunc(var,3)=var then len=3;
else if trunc(var,4)=var then len=4; else if trunc(var,4)=var then len=4;
else if trunc(var,5)=var then len=5; else if trunc(var,5)=var then len=5;
else if trunc(var,6)=var then len=6; else if trunc(var,6)=var then len=6;

View File

@@ -39,6 +39,9 @@
@param [out] pkg= (utils) The output package in which to create the function. @param [out] pkg= (utils) The output package in which to create the function.
Uses a 3 part format: libref.catalog.package Uses a 3 part format: libref.catalog.package
<h4> SAS Macros </h4>
@li mf_existfunction.sas
**/ **/
%macro mcf_string2file(wrap=NO %macro mcf_string2file(wrap=NO
@@ -48,6 +51,8 @@
,pkg=UTILS ,pkg=UTILS
)/*/STORE SOURCE*/; )/*/STORE SOURCE*/;
%if %mf_existfunction(mcf_string2file)=1 %then %return;
%if &wrap=YES %then %do; %if &wrap=YES %then %do;
proc fcmp outlib=&lib..&cat..&pkg; proc fcmp outlib=&lib..&cat..&pkg;
%end; %end;

View File

@@ -12,6 +12,7 @@
data test; data test;
call symputx('null',mcf_length(.)); call symputx('null',mcf_length(.));
call symputx('special',mcf_length(._))
call symputx('three',mcf_length(1)); call symputx('three',mcf_length(1));
call symputx('four',mcf_length(10000000)); call symputx('four',mcf_length(10000000));
call symputx('five',mcf_length(12345678)); call symputx('five',mcf_length(12345678));
@@ -24,6 +25,10 @@ run;
iftrue=(%str(&null)=%str(0)), iftrue=(%str(&null)=%str(0)),
desc=Check if NULL returns 0 desc=Check if NULL returns 0
) )
%mp_assert(
iftrue=(%str(&special)=%str(3)),
desc=Check if special missing ._ returns 3
)
%mp_assert( %mp_assert(
iftrue=(%str(&three)=%str(3)), iftrue=(%str(&three)=%str(3)),
desc=Check for length 3 desc=Check for length 3
@@ -48,3 +53,15 @@ run;
iftrue=(%str(&eight)=%str(8)), iftrue=(%str(&eight)=%str(8)),
desc=Check for length 8 desc=Check for length 8
) )
%mp_assert(
iftrue=(&syscc=0),
desc=Check syscc=0 before re-initialisation
)
/* test 2 - compile again test for warnings */
%mcf_length(wrap=YES, insert_cmplib=YES)
%mp_assert(
iftrue=(&syscc=0),
desc=Check syscc=0 after re-initialisation
)

View File

@@ -0,0 +1,80 @@
/**
@file
@brief Testing mp_getmaxvarlengths macro
<h4> SAS Macros </h4>
@li mp_getmaxvarlengths.sas
@li mp_assert.sas
@li mp_assertdsobs.sas
@li mp_assertscope.sas
**/
/* regular usage */
%mp_assertscope(SNAPSHOT)
%mp_getmaxvarlengths(sashelp.class,outds=work.myds)
%mp_assertscope(COMPARE,desc=checking scope leakage on mp_getmaxvarlengths)
%mp_assert(
iftrue=(&syscc=0),
desc=No errs
)
%mp_assertdsobs(work.myds,
desc=Has 5 records,
test=EQUALS 5
)
data work.errs;
set work.myds;
if name='Name' and maxlen ne 7 then output;
if name='Sex' and maxlen ne 1 then output;
if name='Age' and maxlen ne 3 then output;
if name='Height' and maxlen ne 8 then output;
if name='Weight' and maxlen ne 3 then output;
run;
data _null_;
set work.errs;
putlog (_all_)(=);
run;
%mp_assertdsobs(work.errs,
desc=Err table has 0 records,
test=EQUALS 0
)
/* test2 */
data work.test2;
length a 3 b 5;
a=1/3;
b=1/3;
c=1/3;
d=._;
e=.;
output;
output;
run;
%mp_getmaxvarlengths(work.test2,outds=work.myds2)
%mp_assert(
iftrue=(&syscc=0),
desc=No errs in second test (with nulls)
)
%mp_assertdsobs(work.myds2,
desc=Has 5 records,
test=EQUALS 5
)
data work.errs2;
set work.myds2;
if name='a' and maxlen ne 3 then output;
if name='b' and maxlen ne 5 then output;
if name='c' and maxlen ne 8 then output;
if name='d' and maxlen ne 3 then output;
if name='e' and maxlen ne 0 then output;
run;
data _null_;
set work.errs2;
putlog (_all_)(=);
run;
%mp_assertdsobs(work.errs2,
desc=Err table has 0 records,
test=EQUALS 0
)