1
0
mirror of https://github.com/sasjs/core.git synced 2026-01-10 02:40:05 +00:00

fix: updating mp_hashdataset to cope with STRICT mode. Adding test and improving sasjs/server compatibility.

This commit is contained in:
munja
2022-03-29 13:22:24 +01:00
parent 40d694eec8
commit 72d6b446c3
5 changed files with 155 additions and 87 deletions

View File

@@ -1,8 +1,8 @@
/**
@file
@brief Returns a unique hash for a dataset
@details Ignores metadata attributes, used only to hash values. Compared
datasets must be in the same order.
@details Ignores metadata attributes, used only to hash values. If used to
compare datasets, they must have their columns and rows in the same order.
%mp_hashdataset(sashelp.class,outds=myhash)
@@ -17,7 +17,10 @@
@li mf_getattrn.sas
@li mf_getuniquename.sas
@li mf_getvarlist.sas
@li mf_getvartype.sas
@li mp_md5.sas
<h4> Related Files </h4>
@li mp_hashdataset.test.sas
@param [in] libds dataset to hash
@param [in] salt= Provide a salt (could be, for instance, the dataset name)
@@ -35,48 +38,48 @@
%macro mp_hashdataset(
libds,
outds=,
outds=work._data_,
salt=,
iftrue=%str(1=1)
)/*/STORE SOURCE*/;
%if not(%eval(%unquote(&iftrue))) %then %return;
%local keyvar /* roll up the md5 */
prevkeyvar /* retain prev record md5 */
lastvar /* last var in input ds */
cvars nvars;
%if %mf_getattrn(&libds,NLOBS)=0 %then %do;
%put %str(WARN)ING: Dataset &libds is empty, or is not a dataset;
%end;
%else %if %mf_getattrn(&libds,NLOBS)<0 %then %do;
%put %str(ERR)OR: Dataset &libds is not a dataset;
%end;
%else %do;
%local keyvar /* roll up the md5 */
prevkeyvar /* retain prev record md5 */
lastvar /* last var in input ds */
varlist var i;
/* avoid naming conflict for hash key vars */
%let keyvar=%mf_getuniquename();
%let prevkeyvar=%mf_getuniquename();
%let lastvar=%mf_getuniquename();
%let varlist=%mf_getvarlist(&libds);
data &outds(rename=(&keyvar=hashkey) keep=&keyvar);
length &prevkeyvar &keyvar $32;
retain &prevkeyvar "%sysfunc(md5(%str(&salt)),$hex32.)";
set &libds end=&lastvar;
/* hash should include previous row */
&keyvar=put(md5(&prevkeyvar
/* loop every column, hashing every individual value */
%do i=1 %to %sysfunc(countw(&varlist));
%let var=%scan(&varlist,&i,%str( ));
%if %mf_getvartype(&libds,&var)=C %then %do;
!!put(md5(trim(&var)),$hex32.)
%end;
%else %do;
!!put(md5(trim(put(&var*1,binary64.))),$hex32.)
%end;
%end;
),$hex32.);
&prevkeyvar=&keyvar;
if &lastvar then output;
run;
%end;
%mend mp_hashdataset;
%if not(%eval(%unquote(&iftrue))) %then %return;
/* avoid naming conflict for hash key vars */
%let keyvar=%mf_getuniquename();
%let prevkeyvar=%mf_getuniquename();
%let lastvar=%mf_getuniquename();
%if %mf_getattrn(&libds,NLOBS)=0 %then %do;
data &outds;
length hashkey $32;
retain hashkey "%sysfunc(md5(%str(&salt)),$hex32.)";
output;
stop;
run;
%put &sysmacroname: Dataset &libds is empty, or is not a dataset;
%put &sysmacroname: hashkey of &outds is based on salt (&salt) only;
%end;
%else %if %mf_getattrn(&libds,NLOBS)<0 %then %do;
%put %str(ERR)OR: Dataset &libds is not a dataset;
%end;
%else %do;
data &outds(rename=(&keyvar=hashkey) keep=&keyvar);
length &prevkeyvar &keyvar $32;
retain &prevkeyvar "%sysfunc(md5(%str(&salt)),$hex32.)";
set &libds end=&lastvar;
/* hash should include previous row */
&keyvar=%mp_md5(
cvars=%mf_getvarlist(&libds,typefilter=C) &prevkeyvar,
nvars=%mf_getvarlist(&libds,typefilter=N)
);
&prevkeyvar=&keyvar;
if &lastvar then output;
run;
%end;
%mend mp_hashdataset;