mirror of
https://github.com/sasjs/core.git
synced 2026-01-03 15:40:05 +00:00
feat: new macro for hashing a table (mp_hashdataset)
This commit is contained in:
74
base/mp_hashdataset.sas
Normal file
74
base/mp_hashdataset.sas
Normal file
@@ -0,0 +1,74 @@
|
||||
/**
|
||||
@file
|
||||
@brief Returns a unique hash for a dataset
|
||||
@details Ignores metadata attributes, used only to hash values. Compared
|
||||
datasets must be in the same order.
|
||||
|
||||
%mp_hashdataset(sashelp.class,outds=myhash);
|
||||
|
||||
data _null_;
|
||||
set work.myhash;
|
||||
put hashkey=;
|
||||
run;
|
||||
|
||||
|
||||
<h4> SAS Macros </h4>
|
||||
@li mf_getattrn.sas
|
||||
@li mf_getuniquename.sas
|
||||
@li mf_getvarlist.sas
|
||||
@li mf_getvartype.sas
|
||||
|
||||
@param [in] libds dataset to hash
|
||||
@param [out] outds= (work.mf_hashdataset) The output dataset to create. This
|
||||
will contain one column (hashkey) with one observation (a hex32.
|
||||
representation of the input hash)
|
||||
|hashkey:$32.|
|
||||
|---|
|
||||
|28ABC74ABFC45F50794237BA5566E6CA|
|
||||
|
||||
@version 9.2
|
||||
@author Allan Bowe
|
||||
**/
|
||||
|
||||
%macro mp_hashdataset(
|
||||
libds,
|
||||
outds=
|
||||
)/*/STORE SOURCE*/;
|
||||
%if %mf_getattrn(&libds,NLOBS)=0 %then %do;
|
||||
%put %str(WARN)ING: Dataset &libds is empty;, or is not a dataset;
|
||||
%end;
|
||||
%else %if %mf_getattrn(&libds,NLOBS)<0 %then %do;
|
||||
%put %str(ERR)OR: Dataset &libds is not a dataset;
|
||||
%end;
|
||||
%else %do;
|
||||
%local keyvar /* roll up the md5 */
|
||||
prevkeyvar /* retain prev record md5 */
|
||||
lastvar /* last var in input ds */
|
||||
varlist var i;
|
||||
/* avoid naming conflict for hash key vars */
|
||||
%let keyvar=%mf_getuniquename();
|
||||
%let prevkeyvar=%mf_getuniquename();
|
||||
%let lastvar=%mf_getuniquename();
|
||||
%let varlist=%mf_getvarlist(&libds);
|
||||
data &outds(rename=(&keyvar=hashkey) keep=&keyvar);
|
||||
length &prevkeyvar &keyvar $32;
|
||||
retain &prevkeyvar;
|
||||
set &libds end=&lastvar;
|
||||
/* hash should include previous row */
|
||||
if _n_>1 then &keyvar=put(md5(&prevkeyvar
|
||||
/* loop every column, hashing every individual value */
|
||||
%do i=1 %to %sysfunc(countw(&varlist));
|
||||
%let var=%scan(&varlist,&i,%str( ));
|
||||
%if %mf_getvartype(&libds,&var)=C %then %do;
|
||||
!!put(md5(trim(&var)),$hex32.)
|
||||
%end;
|
||||
%else %do;
|
||||
!!put(md5(trim(put(&var*1,binary64.))),$hex32.)
|
||||
%end;
|
||||
%end;
|
||||
),$hex32.);
|
||||
&prevkeyvar=&keyvar;
|
||||
if &lastvar then output;
|
||||
run;
|
||||
%end;
|
||||
%mend;
|
||||
Reference in New Issue
Block a user