mirror of
https://github.com/sasjs/core.git
synced 2026-01-09 02:10:06 +00:00
feat: new macro (mp_md5) for calculating an md5 hash of a set of columns
This commit is contained in:
58
base/mp_md5.sas
Normal file
58
base/mp_md5.sas
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
/**
|
||||||
|
@file
|
||||||
|
@brief Generates an md5 expression for hashing a set of variables
|
||||||
|
@details This is the same algorithm used to hash records in
|
||||||
|
[Data Controller for SAS](https://datacontroller.io) (free for up
|
||||||
|
to 5 users).
|
||||||
|
|
||||||
|
It is not designed to be efficient - it is designed to be effective,
|
||||||
|
given the range of edge cases (large floating points, special missing
|
||||||
|
numerics, thousands of columns, very wide columns).
|
||||||
|
|
||||||
|
It can be used only in data step, eg as follows:
|
||||||
|
|
||||||
|
data _null_;
|
||||||
|
set sashelp.class;
|
||||||
|
hashvar=%mp_md5(cvars=name sex, nvars=age height weight);
|
||||||
|
put hashvar=;
|
||||||
|
run;
|
||||||
|
|
||||||
|
Unfortunately it will not run in SQL - it fails with the following message:
|
||||||
|
|
||||||
|
> The width value for HEX is out of bounds. It should be between 1 and 16
|
||||||
|
|
||||||
|
The macro will also cause errors if the data contains (non-special) missings
|
||||||
|
and the (undocumented) `options dsoptions=nonote2err;` is in effect.
|
||||||
|
|
||||||
|
This can be avoided in two ways:
|
||||||
|
|
||||||
|
@li Global option: `options dsoptions=nonote2err;`
|
||||||
|
@li Data step option: `data YOURLIB.YOURDATASET /nonote2err;`
|
||||||
|
|
||||||
|
@param cvars= Space seperated list of character variables
|
||||||
|
@param nvars= Space seperated list of numeric variables
|
||||||
|
|
||||||
|
<h4> Related Programs </h4>
|
||||||
|
@li mp_init.sas
|
||||||
|
|
||||||
|
@version 9.2
|
||||||
|
@author Allan Bowe
|
||||||
|
**/
|
||||||
|
|
||||||
|
%macro mp_md5(cvars=,nvars=);
|
||||||
|
%local i var sep;
|
||||||
|
put(md5(
|
||||||
|
%do i=1 %to %sysfunc(countw(&cvars));
|
||||||
|
%let var=%scan(&cvars,&i,%str( ));
|
||||||
|
&sep put(md5(trim(&var)),$hex32.)
|
||||||
|
%let sep=!!;
|
||||||
|
%end;
|
||||||
|
%do i=1 %to %sysfunc(countw(&nvars));
|
||||||
|
%let var=%scan(&nvars,&i,%str( ));
|
||||||
|
/* multiply by 1 to strip precision errors (eg 0 != 0) */
|
||||||
|
/* but ONLY if not missing, else will lose any special missing values */
|
||||||
|
&sep put(md5(trim(put(ifn(missing(&var),&var,&var*1),binary64.))),$hex32.)
|
||||||
|
%let sep=!!;
|
||||||
|
%end;
|
||||||
|
),hex32.)
|
||||||
|
%mend mp_md5;
|
||||||
41
tests/crossplatform/mp_md5.test.sas
Normal file
41
tests/crossplatform/mp_md5.test.sas
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
/**
|
||||||
|
@file
|
||||||
|
@brief Testing mp_md5.sas macro
|
||||||
|
|
||||||
|
<h4> SAS Macros </h4>
|
||||||
|
@li mp_md5.sas
|
||||||
|
@li mp_assert.sas
|
||||||
|
@li mp_assertscope.sas
|
||||||
|
|
||||||
|
**/
|
||||||
|
%global hash1 hash2 hash3;
|
||||||
|
|
||||||
|
%mp_assertscope(SNAPSHOT)
|
||||||
|
data work.test1 /nonote2err;
|
||||||
|
c1='';
|
||||||
|
c2=repeat('x',32767);
|
||||||
|
c3=' f';
|
||||||
|
n1=.a;
|
||||||
|
n2=.;
|
||||||
|
n3=1.0000000001;
|
||||||
|
hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3);
|
||||||
|
call symputx('hash1',hash);
|
||||||
|
n1=.b;
|
||||||
|
hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3);
|
||||||
|
call symputx('hash2',hash);
|
||||||
|
c3='f';
|
||||||
|
hash=%mp_md5(cvars=c1 c2 c3,nvars=n1 n2 n3);
|
||||||
|
call symputx('hash3',hash);
|
||||||
|
run;
|
||||||
|
%mp_assertscope(COMPARE,ignorelist=HASH1 HASH2 HASH3)
|
||||||
|
|
||||||
|
%mp_assert(
|
||||||
|
iftrue=("&hash1" ne "&hash2"),
|
||||||
|
desc=Checking first hash diff,
|
||||||
|
outds=work.test_results
|
||||||
|
)
|
||||||
|
%mp_assert(
|
||||||
|
iftrue=("&hash2" ne "&hash3"),
|
||||||
|
desc=Checking first hash diff,
|
||||||
|
outds=work.test_results
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user