1
0
mirror of https://github.com/sasjs/core.git synced 2026-01-11 02:50:06 +00:00

feat: adding mcf_length to mp_getmaxvarlengths

BREAKING CHANGE: mp_getmaxvarlengths now returns 0 for non-special missings, and will use numeric length (as opposed to cast-to-character length) by default
This commit is contained in:
munja
2022-01-23 23:26:10 +01:00
parent f7fac50108
commit 142b46570d
7 changed files with 196 additions and 53 deletions

View File

@@ -1,28 +1,46 @@
/**
@file mp_getmaxvarlengths.sas
@file
@brief Scans a dataset to find the max length of the variable values
@details
This macro will scan a base dataset and produce an output dataset with two
columns:
- NAME Name of the base dataset column
- MAXLEN Maximum length of the data contained therein.
- MAXLEN Maximum length of the data contained therein.
Character fields may be allocated very large widths (eg 32000) of which the
maximum value is likely to be much narrower. This macro was designed to
enable a HTML table to be appropriately sized however this could be used as
part of a data audit to ensure we aren't over-sizing our tables in relation to
the data therein.
Character fields are often allocated very large widths (eg 32000) of which the
maximum value is likely to be much narrower. Identifying such cases can be
helpful in the following scenarios:
@li Enabling a HTML table to be appropriately sized (`num2char=YES`)
@li Reducing the size of a dataset to save on storage (mp_ds2squeeze.sas)
@li Identifying columns containing nothing but missing values (`MAXLEN=0` in
the output table)
If the entire column is made up of (non-special) missing values then a value
of 0 is returned.
Numeric fields are converted using the relevant format to determine the width.
Usage:
%mp_getmaxvarlengths(sashelp.class,outds=work.myds)
@param libds Two part dataset (or view) reference.
@param outds= The output dataset to create
@param [in] libds Two part dataset (or view) reference.
@param [in] num2char= (NO) When set to NO, numeric fields are sized according
to the number of bytes used (or set to zero in the case of non-special
missings). When YES, the numeric field is converted to character (using the
format, if available), and that is sized instead, using `lengthn()`.
@param [out] outds= The output dataset to create, eg:
|NAME:$8.|MAXLEN:best.|
|---|---|
|`Name `|`7 `|
|`Sex `|`1 `|
|`Age `|`3 `|
|`Height `|`8 `|
|`Weight `|`3 `|
<h4> SAS Macros </h4>
@li mcf_length.sas
@li mf_getuniquename.sas
@li mf_getvarlist.sas
@li mf_getvartype.sas
@li mf_getvarformat.sas
@@ -30,20 +48,32 @@
@version 9.2
@author Allan Bowe
<h4> Related Macros </h4>
@li mp_ds2squeeze.sas
@li mp_getmaxvarlengths.test.sas
**/
%macro mp_getmaxvarlengths(
libds /* libref.dataset to analyse */
,outds=work.mp_getmaxvarlengths /* name of output dataset to create */
libds
,num2char=NO
,outds=work.mp_getmaxvarlengths
)/*/STORE SOURCE*/;
%local vars x var fmt;
%local vars prefix x var fmt;
%let vars=%mf_getvarlist(libds=&libds);
%let prefix=%substr(%mf_getuniquename(),1,25);
%let num2char=%upcase(&num2char);
%if &num2char=NO %then %do;
/* compile length function for numeric fields */
%mcf_length(wrap=YES, insert_cmplib=YES)
%end;
proc sql;
create table &outds (rename=(
%do x=1 %to %sysfunc(countw(&vars,%str( )));
________&x=%scan(&vars,&x)
&prefix.&x=%scan(&vars,&x)
%end;
))
as select
@@ -51,18 +81,21 @@ create table &outds (rename=(
%let var=%scan(&vars,&x);
%if &x>1 %then ,;
%if %mf_getvartype(&libds,&var)=C %then %do;
max(length(&var)) as ________&x
max(lengthn(&var)) as &prefix.&x
%end;
%else %do;
%else %if &num2char=YES %then %do;
%let fmt=%mf_getvarformat(&libds,&var);
%put fmt=&fmt;
%if %str(&fmt)=%str() %then %do;
max(length(cats(&var))) as ________&x
max(lengthn(cats(&var))) as &prefix.&x
%end;
%else %do;
max(length(put(&var,&fmt))) as ________&x
max(lengthn(put(&var,&fmt))) as &prefix.&x
%end;
%end;
%else %do;
max(mcf_length(&var)) as &prefix.&x
%end;
%end;
from &libds;

View File

@@ -33,37 +33,39 @@
%macro mp_init(prefix=SASJS
)/*/STORE SOURCE*/;
%global
&prefix._INIT_NUM /* initialisation time as numeric */
&prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */
&prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */
;
%if %eval(&&&prefix._INIT_NUM>0) %then %return; /* only run once */
%global
SASJS_PREFIX /* the ONLY hard-coded global macro variable in SASjs */
&prefix._INIT_NUM /* initialisation time as numeric */
&prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */
&prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */
;
%if %length(&sasjs_prefix>0) %then %return; /* only run once */
%let sasjs_prefix=&prefix;
data _null_;
dttm=datetime();
call symputx("&prefix._init_num",dttm,'g');
call symputx("&prefix._init_dttm",put(dttm,E8601DT26.6),'g');
call symputx("&prefix.work",pathname('WORK'),'g');
run;
data _null_;
dttm=datetime();
call symputx("&sasjs_prefix._init_num",dttm,'g');
call symputx("&sasjs_prefix._init_dttm",put(dttm,E8601DT26.6),'g');
call symputx("&sasjs_prefix.work",pathname('WORK'),'g');
run;
options
noautocorrect /* disallow misspelled procedure names */
compress=CHAR /* default is none so ensure we have something! */
datastmtchk=ALLKEYWORDS /* protection from overwriting input datasets */
dsoptions=note2err /* undocumented - convert bad NOTEs to ERRs */
%str(err)orcheck=STRICT /* catch errs in libname/filename statements */
fmterr /* ensure err when a format cannot be found */
mergenoby=%str(ERR)OR /* throw err when a merge has no BY variables */
missing=. /* changing this can cause hard to detect errs */
noquotelenmax /* avoid warnings for long strings */
noreplace /* avoid overwriting permanent datasets */
ps=max /* reduce log size slightly */
ls=max /* reduce log even more and avoid word truncation */
validmemname=COMPATIBLE /* avoid special characters etc in table names */
validvarname=V7 /* avoid special characters etc in variable names */
varinitchk=%str(ERR)OR /* avoid data mistakes from variable name typos */
varlenchk=%str(ERR)OR /* fail hard if truncation (data loss) can result */
;
options
noautocorrect /* disallow misspelled procedure names */
compress=CHAR /* default is none so ensure we have something! */
datastmtchk=ALLKEYWORDS /* protection from overwriting input datasets */
dsoptions=note2err /* undocumented - convert bad NOTEs to ERRs */
%str(err)orcheck=STRICT /* catch errs in libname/filename statements */
fmterr /* ensure err when a format cannot be found */
mergenoby=%str(ERR)OR /* throw err when a merge has no BY variables */
missing=. /* changing this can cause hard to detect errs */
noquotelenmax /* avoid warnings for long strings */
noreplace /* avoid overwriting permanent datasets */
ps=max /* reduce log size slightly */
ls=max /* reduce log even more and avoid word truncation */
validmemname=COMPATIBLE /* avoid special characters etc in table names */
validvarname=V7 /* avoid special characters etc in variable names */
varinitchk=%str(ERR)OR /* avoid data mistakes from variable name typos */
varlenchk=%str(ERR)OR /* fail hard if truncation (data loss) can result */
;
%mend mp_init;