From 142b46570d516b8095cbd6fd7ed65935e54d7c3a Mon Sep 17 00:00:00 2001 From: munja Date: Sun, 23 Jan 2022 23:26:10 +0100 Subject: [PATCH] feat: adding mcf_length to mp_getmaxvarlengths BREAKING CHANGE: mp_getmaxvarlengths now returns 0 for non-special missings, and will use numeric length (as opposed to cast-to-character length) by default --- README.md | 7 +- base/mp_getmaxvarlengths.sas | 69 +++++++++++----- base/mp_init.sas | 62 +++++++------- fcmp/mcf_length.sas | 9 ++- fcmp/mcf_string2file.sas | 5 ++ tests/crossplatform/mcf_length.test.sas | 17 ++++ .../mp_getmaxvarlengths.test.sas | 80 +++++++++++++++++++ 7 files changed, 196 insertions(+), 53 deletions(-) create mode 100644 tests/crossplatform/mp_getmaxvarlengths.test.sas diff --git a/README.md b/README.md index 5fb7235..1c142e9 100644 --- a/README.md +++ b/README.md @@ -167,7 +167,7 @@ SAS code can contain one of two types of dependency - SAS Macros, and SAS Includ @li someprogram.sas FREFTWO ``` -The CLI can then extract all the dependencies and insert as precode (SAS Macros) or in a temp engine fileref (SAS Includes) when creating SAS Jobs and Services. +The CLI can then extract all the dependencies and insert as precode (SAS Macros) or in a temp engine fileref (SAS Includes) when creating SAS Jobs and Services (and Tests). When contributing to this library, it is therefore important to ensure that all dependencies are listed in the header in this format. @@ -183,6 +183,7 @@ When contributing to this library, it is therefore important to ensure that all - Mandatory parameters should be positional, all optional parameters should be keyword (var=) style. - All dataset references must be 2 level (eg `work.blah`, not `blah`). This is to avoid contention when options [DATASTMTCHK](https://support.sas.com/documentation/cdl/en/lrdict/64316/HTML/default/viewer.htm#a000279064.htm)=ALLKEYWORDS is in effect. - Avoid naming collisions! All macro variables should be local scope. Use system generated work tables where possible - eg `data ; set sashelp.class; run; data &output; set &syslast; run;` +- Where global macro variables are absolutely necessary, they should make use of `&sasjs_prefix` - see mp_init.sas - The use of `quit;` for `proc sql` is optional unless you are looking to benefit from the timing statistics. - Use [sasjs lint](https://github.com/sasjs/lint)! @@ -192,9 +193,9 @@ When contributing to this library, it is therefore important to ensure that all ## Breaking Changes -We are currently on major release v3. The following changes are planned when the next major (breaking) release becomes necessary: +We are currently on major release v4. The following changes are planned when the next major (breaking) release becomes necessary: -* Remove `dbg` parameter from mp_jsonout.sas (implement mdebug instead) +* (None as yet) ## Star Gazing diff --git a/base/mp_getmaxvarlengths.sas b/base/mp_getmaxvarlengths.sas index 644ac10..133f249 100755 --- a/base/mp_getmaxvarlengths.sas +++ b/base/mp_getmaxvarlengths.sas @@ -1,28 +1,46 @@ /** - @file mp_getmaxvarlengths.sas + @file @brief Scans a dataset to find the max length of the variable values @details This macro will scan a base dataset and produce an output dataset with two columns: - NAME Name of the base dataset column - - MAXLEN Maximum length of the data contained therein. + - MAXLEN Maximum length of the data contained therein. - Character fields may be allocated very large widths (eg 32000) of which the - maximum value is likely to be much narrower. This macro was designed to - enable a HTML table to be appropriately sized however this could be used as - part of a data audit to ensure we aren't over-sizing our tables in relation to - the data therein. + Character fields are often allocated very large widths (eg 32000) of which the + maximum value is likely to be much narrower. Identifying such cases can be + helpful in the following scenarios: + + @li Enabling a HTML table to be appropriately sized (`num2char=YES`) + @li Reducing the size of a dataset to save on storage (mp_ds2squeeze.sas) + @li Identifying columns containing nothing but missing values (`MAXLEN=0` in + the output table) + + If the entire column is made up of (non-special) missing values then a value + of 0 is returned. - Numeric fields are converted using the relevant format to determine the width. Usage: %mp_getmaxvarlengths(sashelp.class,outds=work.myds) - @param libds Two part dataset (or view) reference. - @param outds= The output dataset to create + @param [in] libds Two part dataset (or view) reference. + @param [in] num2char= (NO) When set to NO, numeric fields are sized according + to the number of bytes used (or set to zero in the case of non-special + missings). When YES, the numeric field is converted to character (using the + format, if available), and that is sized instead, using `lengthn()`. + @param [out] outds= The output dataset to create, eg: + |NAME:$8.|MAXLEN:best.| + |---|---| + |`Name `|`7 `| + |`Sex `|`1 `| + |`Age `|`3 `| + |`Height `|`8 `| + |`Weight `|`3 `|

SAS Macros

+ @li mcf_length.sas + @li mf_getuniquename.sas @li mf_getvarlist.sas @li mf_getvartype.sas @li mf_getvarformat.sas @@ -30,20 +48,32 @@ @version 9.2 @author Allan Bowe +

Related Macros

+ @li mp_ds2squeeze.sas + @li mp_getmaxvarlengths.test.sas + **/ %macro mp_getmaxvarlengths( - libds /* libref.dataset to analyse */ - ,outds=work.mp_getmaxvarlengths /* name of output dataset to create */ + libds + ,num2char=NO + ,outds=work.mp_getmaxvarlengths )/*/STORE SOURCE*/; -%local vars x var fmt; +%local vars prefix x var fmt; %let vars=%mf_getvarlist(libds=&libds); +%let prefix=%substr(%mf_getuniquename(),1,25); +%let num2char=%upcase(&num2char); + +%if &num2char=NO %then %do; + /* compile length function for numeric fields */ + %mcf_length(wrap=YES, insert_cmplib=YES) +%end; proc sql; create table &outds (rename=( %do x=1 %to %sysfunc(countw(&vars,%str( ))); - ________&x=%scan(&vars,&x) + &prefix.&x=%scan(&vars,&x) %end; )) as select @@ -51,18 +81,21 @@ create table &outds (rename=( %let var=%scan(&vars,&x); %if &x>1 %then ,; %if %mf_getvartype(&libds,&var)=C %then %do; - max(length(&var)) as ________&x + max(lengthn(&var)) as &prefix.&x %end; - %else %do; + %else %if &num2char=YES %then %do; %let fmt=%mf_getvarformat(&libds,&var); %put fmt=&fmt; %if %str(&fmt)=%str() %then %do; - max(length(cats(&var))) as ________&x + max(lengthn(cats(&var))) as &prefix.&x %end; %else %do; - max(length(put(&var,&fmt))) as ________&x + max(lengthn(put(&var,&fmt))) as &prefix.&x %end; %end; + %else %do; + max(mcf_length(&var)) as &prefix.&x + %end; %end; from &libds; diff --git a/base/mp_init.sas b/base/mp_init.sas index 47fd2eb..28d67d6 100644 --- a/base/mp_init.sas +++ b/base/mp_init.sas @@ -33,37 +33,39 @@ %macro mp_init(prefix=SASJS )/*/STORE SOURCE*/; - %global - &prefix._INIT_NUM /* initialisation time as numeric */ - &prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */ - &prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */ - ; - %if %eval(&&&prefix._INIT_NUM>0) %then %return; /* only run once */ +%global + SASJS_PREFIX /* the ONLY hard-coded global macro variable in SASjs */ + &prefix._INIT_NUM /* initialisation time as numeric */ + &prefix._INIT_DTTM /* initialisation time in E8601DT26.6 format */ + &prefix.WORK /* avoid typing %sysfunc(pathname(work)) every time */ +; +%if %length(&sasjs_prefix>0) %then %return; /* only run once */ +%let sasjs_prefix=&prefix; - data _null_; - dttm=datetime(); - call symputx("&prefix._init_num",dttm,'g'); - call symputx("&prefix._init_dttm",put(dttm,E8601DT26.6),'g'); - call symputx("&prefix.work",pathname('WORK'),'g'); - run; +data _null_; + dttm=datetime(); + call symputx("&sasjs_prefix._init_num",dttm,'g'); + call symputx("&sasjs_prefix._init_dttm",put(dttm,E8601DT26.6),'g'); + call symputx("&sasjs_prefix.work",pathname('WORK'),'g'); +run; - options - noautocorrect /* disallow misspelled procedure names */ - compress=CHAR /* default is none so ensure we have something! */ - datastmtchk=ALLKEYWORDS /* protection from overwriting input datasets */ - dsoptions=note2err /* undocumented - convert bad NOTEs to ERRs */ - %str(err)orcheck=STRICT /* catch errs in libname/filename statements */ - fmterr /* ensure err when a format cannot be found */ - mergenoby=%str(ERR)OR /* throw err when a merge has no BY variables */ - missing=. /* changing this can cause hard to detect errs */ - noquotelenmax /* avoid warnings for long strings */ - noreplace /* avoid overwriting permanent datasets */ - ps=max /* reduce log size slightly */ - ls=max /* reduce log even more and avoid word truncation */ - validmemname=COMPATIBLE /* avoid special characters etc in table names */ - validvarname=V7 /* avoid special characters etc in variable names */ - varinitchk=%str(ERR)OR /* avoid data mistakes from variable name typos */ - varlenchk=%str(ERR)OR /* fail hard if truncation (data loss) can result */ - ; +options + noautocorrect /* disallow misspelled procedure names */ + compress=CHAR /* default is none so ensure we have something! */ + datastmtchk=ALLKEYWORDS /* protection from overwriting input datasets */ + dsoptions=note2err /* undocumented - convert bad NOTEs to ERRs */ + %str(err)orcheck=STRICT /* catch errs in libname/filename statements */ + fmterr /* ensure err when a format cannot be found */ + mergenoby=%str(ERR)OR /* throw err when a merge has no BY variables */ + missing=. /* changing this can cause hard to detect errs */ + noquotelenmax /* avoid warnings for long strings */ + noreplace /* avoid overwriting permanent datasets */ + ps=max /* reduce log size slightly */ + ls=max /* reduce log even more and avoid word truncation */ + validmemname=COMPATIBLE /* avoid special characters etc in table names */ + validvarname=V7 /* avoid special characters etc in variable names */ + varinitchk=%str(ERR)OR /* avoid data mistakes from variable name typos */ + varlenchk=%str(ERR)OR /* fail hard if truncation (data loss) can result */ +; %mend mp_init; \ No newline at end of file diff --git a/fcmp/mcf_length.sas b/fcmp/mcf_length.sas index b6688bd..43b2bdb 100644 --- a/fcmp/mcf_length.sas +++ b/fcmp/mcf_length.sas @@ -39,6 +39,9 @@ @param [out] pkg= (utils) The output package in which to create the function. Uses a 3 part format: libref.catalog.package +

SAS Macros

+ @li mf_existfunction.sas +

Related Macros

@li mcf_length.test.sas @@ -51,13 +54,15 @@ ,pkg=UTILS )/*/STORE SOURCE*/; +%if %mf_existfunction(mcf_length)=1 %then %return; + %if &wrap=YES %then %do; proc fcmp outlib=&lib..&cat..&pkg; %end; function mcf_length(var); - if missing(var) then len=0; - else if trunc(var,3)=var then len=3; + if var=. then len=0; + else if missing(var) or trunc(var,3)=var then len=3; else if trunc(var,4)=var then len=4; else if trunc(var,5)=var then len=5; else if trunc(var,6)=var then len=6; diff --git a/fcmp/mcf_string2file.sas b/fcmp/mcf_string2file.sas index dbfcbb9..f41b30c 100644 --- a/fcmp/mcf_string2file.sas +++ b/fcmp/mcf_string2file.sas @@ -39,6 +39,9 @@ @param [out] pkg= (utils) The output package in which to create the function. Uses a 3 part format: libref.catalog.package +

SAS Macros

+ @li mf_existfunction.sas + **/ %macro mcf_string2file(wrap=NO @@ -48,6 +51,8 @@ ,pkg=UTILS )/*/STORE SOURCE*/; +%if %mf_existfunction(mcf_string2file)=1 %then %return; + %if &wrap=YES %then %do; proc fcmp outlib=&lib..&cat..&pkg; %end; diff --git a/tests/crossplatform/mcf_length.test.sas b/tests/crossplatform/mcf_length.test.sas index be1f3e6..90a7587 100644 --- a/tests/crossplatform/mcf_length.test.sas +++ b/tests/crossplatform/mcf_length.test.sas @@ -12,6 +12,7 @@ data test; call symputx('null',mcf_length(.)); + call symputx('special',mcf_length(._)) call symputx('three',mcf_length(1)); call symputx('four',mcf_length(10000000)); call symputx('five',mcf_length(12345678)); @@ -24,6 +25,10 @@ run; iftrue=(%str(&null)=%str(0)), desc=Check if NULL returns 0 ) +%mp_assert( + iftrue=(%str(&special)=%str(3)), + desc=Check if special missing ._ returns 3 +) %mp_assert( iftrue=(%str(&three)=%str(3)), desc=Check for length 3 @@ -47,4 +52,16 @@ run; %mp_assert( iftrue=(%str(&eight)=%str(8)), desc=Check for length 8 +) +%mp_assert( + iftrue=(&syscc=0), + desc=Check syscc=0 before re-initialisation +) + +/* test 2 - compile again test for warnings */ +%mcf_length(wrap=YES, insert_cmplib=YES) + +%mp_assert( + iftrue=(&syscc=0), + desc=Check syscc=0 after re-initialisation ) \ No newline at end of file diff --git a/tests/crossplatform/mp_getmaxvarlengths.test.sas b/tests/crossplatform/mp_getmaxvarlengths.test.sas new file mode 100644 index 0000000..2ec974f --- /dev/null +++ b/tests/crossplatform/mp_getmaxvarlengths.test.sas @@ -0,0 +1,80 @@ +/** + @file + @brief Testing mp_getmaxvarlengths macro + +

SAS Macros

+ @li mp_getmaxvarlengths.sas + @li mp_assert.sas + @li mp_assertdsobs.sas + @li mp_assertscope.sas + +**/ + + +/* regular usage */ +%mp_assertscope(SNAPSHOT) +%mp_getmaxvarlengths(sashelp.class,outds=work.myds) +%mp_assertscope(COMPARE,desc=checking scope leakage on mp_getmaxvarlengths) +%mp_assert( + iftrue=(&syscc=0), + desc=No errs +) +%mp_assertdsobs(work.myds, + desc=Has 5 records, + test=EQUALS 5 +) +data work.errs; + set work.myds; + if name='Name' and maxlen ne 7 then output; + if name='Sex' and maxlen ne 1 then output; + if name='Age' and maxlen ne 3 then output; + if name='Height' and maxlen ne 8 then output; + if name='Weight' and maxlen ne 3 then output; +run; +data _null_; + set work.errs; + putlog (_all_)(=); +run; + +%mp_assertdsobs(work.errs, + desc=Err table has 0 records, + test=EQUALS 0 +) + +/* test2 */ +data work.test2; + length a 3 b 5; + a=1/3; + b=1/3; + c=1/3; + d=._; + e=.; + output; + output; +run; +%mp_getmaxvarlengths(work.test2,outds=work.myds2) +%mp_assert( + iftrue=(&syscc=0), + desc=No errs in second test (with nulls) +) +%mp_assertdsobs(work.myds2, + desc=Has 5 records, + test=EQUALS 5 +) +data work.errs2; + set work.myds2; + if name='a' and maxlen ne 3 then output; + if name='b' and maxlen ne 5 then output; + if name='c' and maxlen ne 8 then output; + if name='d' and maxlen ne 3 then output; + if name='e' and maxlen ne 0 then output; +run; +data _null_; + set work.errs2; + putlog (_all_)(=); +run; + +%mp_assertdsobs(work.errs2, + desc=Err table has 0 records, + test=EQUALS 0 +) \ No newline at end of file