1
0
mirror of https://github.com/sasjs/core.git synced 2025-12-11 06:24:35 +00:00

feat: new macro (mp_md5) for calculating an md5 hash of a set of columns

This commit is contained in:
munja
2022-02-09 21:56:46 +01:00
parent dfd60200fb
commit 9a1f7d0985
2 changed files with 99 additions and 0 deletions

58
base/mp_md5.sas Normal file
View File

@@ -0,0 +1,58 @@
/**
@file
@brief Generates an md5 expression for hashing a set of variables
@details This is the same algorithm used to hash records in
[Data Controller for SAS](https://datacontroller.io) (free for up
to 5 users).
It is not designed to be efficient - it is designed to be effective,
given the range of edge cases (large floating points, special missing
numerics, thousands of columns, very wide columns).
It can be used only in data step, eg as follows:
data _null_;
set sashelp.class;
hashvar=%mp_md5(cvars=name sex, nvars=age height weight);
put hashvar=;
run;
Unfortunately it will not run in SQL - it fails with the following message:
> The width value for HEX is out of bounds. It should be between 1 and 16
The macro will also cause errors if the data contains (non-special) missings
and the (undocumented) `options dsoptions=nonote2err;` is in effect.
This can be avoided in two ways:
@li Global option: `options dsoptions=nonote2err;`
@li Data step option: `data YOURLIB.YOURDATASET /nonote2err;`
@param cvars= Space seperated list of character variables
@param nvars= Space seperated list of numeric variables
<h4> Related Programs </h4>
@li mp_init.sas
@version 9.2
@author Allan Bowe
**/
%macro mp_md5(cvars=,nvars=);
%local i var sep;
put(md5(
%do i=1 %to %sysfunc(countw(&cvars));
%let var=%scan(&cvars,&i,%str( ));
&sep put(md5(trim(&var)),$hex32.)
%let sep=!!;
%end;
%do i=1 %to %sysfunc(countw(&nvars));
%let var=%scan(&nvars,&i,%str( ));
/* multiply by 1 to strip precision errors (eg 0 != 0) */
/* but ONLY if not missing, else will lose any special missing values */
&sep put(md5(trim(put(ifn(missing(&var),&var,&var*1),binary64.))),$hex32.)
%let sep=!!;
%end;
),hex32.)
%mend mp_md5;

View File

@@ -0,0 +1,41 @@
/**
@file
@brief Testing mp_md5.sas macro
<h4> SAS Macros </h4>
@li mp_md5.sas
@li mp_assert.sas
@li mp_assertscope.sas
**/
%global hash1 hash2 hash3;
%mp_assertscope(SNAPSHOT)
data work.test1 /nonote2err;
c1='';
c2=repeat('x',32767);
c3=' f';
n1=.a;
n2=.;
n3=1.0000000001;
hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3);
call symputx('hash1',hash);
n1=.b;
hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3);
call symputx('hash2',hash);
c3='f';
hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3);
call symputx('hash3',hash);
run;
%mp_assertscope(COMPARE,ignorelist=HASH1 HASH2 HASH3)
%mp_assert(
iftrue=("&hash1" ne "&hash2"),
desc=Checking first hash diff,
outds=work.test_results
)
%mp_assert(
iftrue=("&hash2" ne "&hash3"),
desc=Checking first hash diff,
outds=work.test_results
)