mirror of
https://github.com/sasjs/core.git
synced 2025-12-11 06:24:35 +00:00
feat: new macro (mp_md5) for calculating an md5 hash of a set of columns
This commit is contained in:
58
base/mp_md5.sas
Normal file
58
base/mp_md5.sas
Normal file
@@ -0,0 +1,58 @@
|
||||
/**
|
||||
@file
|
||||
@brief Generates an md5 expression for hashing a set of variables
|
||||
@details This is the same algorithm used to hash records in
|
||||
[Data Controller for SAS](https://datacontroller.io) (free for up
|
||||
to 5 users).
|
||||
|
||||
It is not designed to be efficient - it is designed to be effective,
|
||||
given the range of edge cases (large floating points, special missing
|
||||
numerics, thousands of columns, very wide columns).
|
||||
|
||||
It can be used only in data step, eg as follows:
|
||||
|
||||
data _null_;
|
||||
set sashelp.class;
|
||||
hashvar=%mp_md5(cvars=name sex, nvars=age height weight);
|
||||
put hashvar=;
|
||||
run;
|
||||
|
||||
Unfortunately it will not run in SQL - it fails with the following message:
|
||||
|
||||
> The width value for HEX is out of bounds. It should be between 1 and 16
|
||||
|
||||
The macro will also cause errors if the data contains (non-special) missings
|
||||
and the (undocumented) `options dsoptions=nonote2err;` is in effect.
|
||||
|
||||
This can be avoided in two ways:
|
||||
|
||||
@li Global option: `options dsoptions=nonote2err;`
|
||||
@li Data step option: `data YOURLIB.YOURDATASET /nonote2err;`
|
||||
|
||||
@param cvars= Space seperated list of character variables
|
||||
@param nvars= Space seperated list of numeric variables
|
||||
|
||||
<h4> Related Programs </h4>
|
||||
@li mp_init.sas
|
||||
|
||||
@version 9.2
|
||||
@author Allan Bowe
|
||||
**/
|
||||
|
||||
%macro mp_md5(cvars=,nvars=);
|
||||
%local i var sep;
|
||||
put(md5(
|
||||
%do i=1 %to %sysfunc(countw(&cvars));
|
||||
%let var=%scan(&cvars,&i,%str( ));
|
||||
&sep put(md5(trim(&var)),$hex32.)
|
||||
%let sep=!!;
|
||||
%end;
|
||||
%do i=1 %to %sysfunc(countw(&nvars));
|
||||
%let var=%scan(&nvars,&i,%str( ));
|
||||
/* multiply by 1 to strip precision errors (eg 0 != 0) */
|
||||
/* but ONLY if not missing, else will lose any special missing values */
|
||||
&sep put(md5(trim(put(ifn(missing(&var),&var,&var*1),binary64.))),$hex32.)
|
||||
%let sep=!!;
|
||||
%end;
|
||||
),hex32.)
|
||||
%mend mp_md5;
|
||||
41
tests/crossplatform/mp_md5.test.sas
Normal file
41
tests/crossplatform/mp_md5.test.sas
Normal file
@@ -0,0 +1,41 @@
|
||||
/**
|
||||
@file
|
||||
@brief Testing mp_md5.sas macro
|
||||
|
||||
<h4> SAS Macros </h4>
|
||||
@li mp_md5.sas
|
||||
@li mp_assert.sas
|
||||
@li mp_assertscope.sas
|
||||
|
||||
**/
|
||||
%global hash1 hash2 hash3;
|
||||
|
||||
%mp_assertscope(SNAPSHOT)
|
||||
data work.test1 /nonote2err;
|
||||
c1='';
|
||||
c2=repeat('x',32767);
|
||||
c3=' f';
|
||||
n1=.a;
|
||||
n2=.;
|
||||
n3=1.0000000001;
|
||||
hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3);
|
||||
call symputx('hash1',hash);
|
||||
n1=.b;
|
||||
hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3);
|
||||
call symputx('hash2',hash);
|
||||
c3='f';
|
||||
hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3);
|
||||
call symputx('hash3',hash);
|
||||
run;
|
||||
%mp_assertscope(COMPARE,ignorelist=HASH1 HASH2 HASH3)
|
||||
|
||||
%mp_assert(
|
||||
iftrue=("&hash1" ne "&hash2"),
|
||||
desc=Checking first hash diff,
|
||||
outds=work.test_results
|
||||
)
|
||||
%mp_assert(
|
||||
iftrue=("&hash2" ne "&hash3"),
|
||||
desc=Checking first hash diff,
|
||||
outds=work.test_results
|
||||
)
|
||||
Reference in New Issue
Block a user