diff --git a/base/mp_md5.sas b/base/mp_md5.sas new file mode 100644 index 0000000..6405419 --- /dev/null +++ b/base/mp_md5.sas @@ -0,0 +1,58 @@ +/** + @file + @brief Generates an md5 expression for hashing a set of variables + @details This is the same algorithm used to hash records in + [Data Controller for SAS](https://datacontroller.io) (free for up + to 5 users). + + It is not designed to be efficient - it is designed to be effective, + given the range of edge cases (large floating points, special missing + numerics, thousands of columns, very wide columns). + + It can be used only in data step, eg as follows: + + data _null_; + set sashelp.class; + hashvar=%mp_md5(cvars=name sex, nvars=age height weight); + put hashvar=; + run; + + Unfortunately it will not run in SQL - it fails with the following message: + + > The width value for HEX is out of bounds. It should be between 1 and 16 + + The macro will also cause errors if the data contains (non-special) missings + and the (undocumented) `options dsoptions=nonote2err;` is in effect. + + This can be avoided in two ways: + + @li Global option: `options dsoptions=nonote2err;` + @li Data step option: `data YOURLIB.YOURDATASET /nonote2err;` + + @param cvars= Space seperated list of character variables + @param nvars= Space seperated list of numeric variables + +

Related Programs

+ @li mp_init.sas + + @version 9.2 + @author Allan Bowe +**/ + +%macro mp_md5(cvars=,nvars=); +%local i var sep; +put(md5( + %do i=1 %to %sysfunc(countw(&cvars)); + %let var=%scan(&cvars,&i,%str( )); + &sep put(md5(trim(&var)),$hex32.) + %let sep=!!; + %end; + %do i=1 %to %sysfunc(countw(&nvars)); + %let var=%scan(&nvars,&i,%str( )); + /* multiply by 1 to strip precision errors (eg 0 != 0) */ + /* but ONLY if not missing, else will lose any special missing values */ + &sep put(md5(trim(put(ifn(missing(&var),&var,&var*1),binary64.))),$hex32.) + %let sep=!!; + %end; +),hex32.) +%mend mp_md5; diff --git a/tests/crossplatform/mp_md5.test.sas b/tests/crossplatform/mp_md5.test.sas new file mode 100644 index 0000000..084b351 --- /dev/null +++ b/tests/crossplatform/mp_md5.test.sas @@ -0,0 +1,41 @@ +/** + @file + @brief Testing mp_md5.sas macro + +

SAS Macros

+ @li mp_md5.sas + @li mp_assert.sas + @li mp_assertscope.sas + +**/ +%global hash1 hash2 hash3; + +%mp_assertscope(SNAPSHOT) +data work.test1 /nonote2err; + c1=''; + c2=repeat('x',32767); + c3=' f'; + n1=.a; + n2=.; + n3=1.0000000001; + hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3); + call symputx('hash1',hash); + n1=.b; + hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3); + call symputx('hash2',hash); + c3='f'; + hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3); + call symputx('hash3',hash); +run; +%mp_assertscope(COMPARE,ignorelist=HASH1 HASH2 HASH3) + +%mp_assert( + iftrue=("&hash1" ne "&hash2"), + desc=Checking first hash diff, + outds=work.test_results +) +%mp_assert( + iftrue=("&hash2" ne "&hash3"), + desc=Checking first hash diff, + outds=work.test_results +)