1
0
mirror of https://github.com/sasjs/core.git synced 2025-12-10 22:14:35 +00:00

fix: updating mp_hashdataset to cope with STRICT mode. Adding test and improving sasjs/server compatibility.

This commit is contained in:
munja
2022-03-29 13:22:24 +01:00
parent 40d694eec8
commit 72d6b446c3
5 changed files with 155 additions and 87 deletions

91
all.sas
View File

@@ -7954,8 +7954,8 @@ run;
%mend mp_guesspk;/**
@file
@brief Returns a unique hash for a dataset
@details Ignores metadata attributes, used only to hash values. Compared
datasets must be in the same order.
@details Ignores metadata attributes, used only to hash values. If used to
compare datasets, they must have their columns and rows in the same order.
%mp_hashdataset(sashelp.class,outds=myhash)
@@ -7970,7 +7970,10 @@ run;
@li mf_getattrn.sas
@li mf_getuniquename.sas
@li mf_getvarlist.sas
@li mf_getvartype.sas
@li mp_md5.sas
<h4> Related Files </h4>
@li mp_hashdataset.test.sas
@param [in] libds dataset to hash
@param [in] salt= Provide a salt (could be, for instance, the dataset name)
@@ -7988,51 +7991,52 @@ run;
%macro mp_hashdataset(
libds,
outds=,
outds=work._data_,
salt=,
iftrue=%str(1=1)
)/*/STORE SOURCE*/;
%if not(%eval(%unquote(&iftrue))) %then %return;
%local keyvar /* roll up the md5 */
prevkeyvar /* retain prev record md5 */
lastvar /* last var in input ds */
cvars nvars;
%if %mf_getattrn(&libds,NLOBS)=0 %then %do;
%put %str(WARN)ING: Dataset &libds is empty, or is not a dataset;
%end;
%else %if %mf_getattrn(&libds,NLOBS)<0 %then %do;
%put %str(ERR)OR: Dataset &libds is not a dataset;
%end;
%else %do;
%local keyvar /* roll up the md5 */
prevkeyvar /* retain prev record md5 */
lastvar /* last var in input ds */
varlist var i;
/* avoid naming conflict for hash key vars */
%let keyvar=%mf_getuniquename();
%let prevkeyvar=%mf_getuniquename();
%let lastvar=%mf_getuniquename();
%let varlist=%mf_getvarlist(&libds);
data &outds(rename=(&keyvar=hashkey) keep=&keyvar);
length &prevkeyvar &keyvar $32;
retain &prevkeyvar "%sysfunc(md5(%str(&salt)),$hex32.)";
set &libds end=&lastvar;
/* hash should include previous row */
&keyvar=put(md5(&prevkeyvar
/* loop every column, hashing every individual value */
%do i=1 %to %sysfunc(countw(&varlist));
%let var=%scan(&varlist,&i,%str( ));
%if %mf_getvartype(&libds,&var)=C %then %do;
!!put(md5(trim(&var)),$hex32.)
%end;
%else %do;
!!put(md5(trim(put(&var*1,binary64.))),$hex32.)
%end;
%end;
),$hex32.);
&prevkeyvar=&keyvar;
if &lastvar then output;
run;
%end;
%mend mp_hashdataset;/**
%if not(%eval(%unquote(&iftrue))) %then %return;
/* avoid naming conflict for hash key vars */
%let keyvar=%mf_getuniquename();
%let prevkeyvar=%mf_getuniquename();
%let lastvar=%mf_getuniquename();
%if %mf_getattrn(&libds,NLOBS)=0 %then %do;
data &outds;
length hashkey $32;
retain hashkey "%sysfunc(md5(%str(&salt)),$hex32.)";
output;
stop;
run;
%put &sysmacroname: Dataset &libds is empty, or is not a dataset;
%put &sysmacroname: hashkey of &outds is based on salt (&salt) only;
%end;
%else %if %mf_getattrn(&libds,NLOBS)<0 %then %do;
%put %str(ERR)OR: Dataset &libds is not a dataset;
%end;
%else %do;
data &outds(rename=(&keyvar=hashkey) keep=&keyvar);
length &prevkeyvar &keyvar $32;
retain &prevkeyvar "%sysfunc(md5(%str(&salt)),$hex32.)";
set &libds end=&lastvar;
/* hash should include previous row */
&keyvar=%mp_md5(
cvars=%mf_getvarlist(&libds,typefilter=C) &prevkeyvar,
nvars=%mf_getvarlist(&libds,typefilter=N)
);
&prevkeyvar=&keyvar;
if &lastvar then output;
run;
%end;
%mend mp_hashdataset;
/**
@file
@brief Performs a wrapped \%include
@details This macro wrapper is necessary if you need your included code to
@@ -19224,6 +19228,7 @@ run;
data _null_;
set &tempds;
if not (upcase(name) =:"DATA"); /* ignore temp datasets */
if not (upcase(name)=:"_DATA_");
i+1;
call symputx(cats('wt',i),name,'l');
call symputx('wtcnt',i,'l');

View File

@@ -1,8 +1,8 @@
/**
@file
@brief Returns a unique hash for a dataset
@details Ignores metadata attributes, used only to hash values. Compared
datasets must be in the same order.
@details Ignores metadata attributes, used only to hash values. If used to
compare datasets, they must have their columns and rows in the same order.
%mp_hashdataset(sashelp.class,outds=myhash)
@@ -17,7 +17,10 @@
@li mf_getattrn.sas
@li mf_getuniquename.sas
@li mf_getvarlist.sas
@li mf_getvartype.sas
@li mp_md5.sas
<h4> Related Files </h4>
@li mp_hashdataset.test.sas
@param [in] libds dataset to hash
@param [in] salt= Provide a salt (could be, for instance, the dataset name)
@@ -35,48 +38,48 @@
%macro mp_hashdataset(
libds,
outds=,
outds=work._data_,
salt=,
iftrue=%str(1=1)
)/*/STORE SOURCE*/;
%if not(%eval(%unquote(&iftrue))) %then %return;
%local keyvar /* roll up the md5 */
prevkeyvar /* retain prev record md5 */
lastvar /* last var in input ds */
cvars nvars;
%if %mf_getattrn(&libds,NLOBS)=0 %then %do;
%put %str(WARN)ING: Dataset &libds is empty, or is not a dataset;
%end;
%else %if %mf_getattrn(&libds,NLOBS)<0 %then %do;
%put %str(ERR)OR: Dataset &libds is not a dataset;
%end;
%else %do;
%local keyvar /* roll up the md5 */
prevkeyvar /* retain prev record md5 */
lastvar /* last var in input ds */
varlist var i;
/* avoid naming conflict for hash key vars */
%let keyvar=%mf_getuniquename();
%let prevkeyvar=%mf_getuniquename();
%let lastvar=%mf_getuniquename();
%let varlist=%mf_getvarlist(&libds);
data &outds(rename=(&keyvar=hashkey) keep=&keyvar);
length &prevkeyvar &keyvar $32;
retain &prevkeyvar "%sysfunc(md5(%str(&salt)),$hex32.)";
set &libds end=&lastvar;
/* hash should include previous row */
&keyvar=put(md5(&prevkeyvar
/* loop every column, hashing every individual value */
%do i=1 %to %sysfunc(countw(&varlist));
%let var=%scan(&varlist,&i,%str( ));
%if %mf_getvartype(&libds,&var)=C %then %do;
!!put(md5(trim(&var)),$hex32.)
%end;
%else %do;
!!put(md5(trim(put(&var*1,binary64.))),$hex32.)
%end;
%end;
),$hex32.);
&prevkeyvar=&keyvar;
if &lastvar then output;
run;
%end;
%mend mp_hashdataset;
%if not(%eval(%unquote(&iftrue))) %then %return;
/* avoid naming conflict for hash key vars */
%let keyvar=%mf_getuniquename();
%let prevkeyvar=%mf_getuniquename();
%let lastvar=%mf_getuniquename();
%if %mf_getattrn(&libds,NLOBS)=0 %then %do;
data &outds;
length hashkey $32;
retain hashkey "%sysfunc(md5(%str(&salt)),$hex32.)";
output;
stop;
run;
%put &sysmacroname: Dataset &libds is empty, or is not a dataset;
%put &sysmacroname: hashkey of &outds is based on salt (&salt) only;
%end;
%else %if %mf_getattrn(&libds,NLOBS)<0 %then %do;
%put %str(ERR)OR: Dataset &libds is not a dataset;
%end;
%else %do;
data &outds(rename=(&keyvar=hashkey) keep=&keyvar);
length &prevkeyvar &keyvar $32;
retain &prevkeyvar "%sysfunc(md5(%str(&salt)),$hex32.)";
set &libds end=&lastvar;
/* hash should include previous row */
&keyvar=%mp_md5(
cvars=%mf_getvarlist(&libds,typefilter=C) &prevkeyvar,
nvars=%mf_getvarlist(&libds,typefilter=N)
);
&prevkeyvar=&keyvar;
if &lastvar then output;
run;
%end;
%mend mp_hashdataset;

View File

@@ -71,7 +71,7 @@
},
{
"name": "server",
"serverUrl": "https://sas.analytium.co.uk:5000",
"serverUrl": " ",
"serverType": "SASJS",
"httpsAgentOptions": {
"allowInsecureRequests": false

View File

@@ -114,6 +114,7 @@
data _null_;
set &tempds;
if not (upcase(name) =:"DATA"); /* ignore temp datasets */
if not (upcase(name)=:"_DATA_");
i+1;
call symputx(cats('wt',i),name,'l');
call symputx('wtcnt',i,'l');

View File

@@ -0,0 +1,59 @@
/**
@file
@brief Testing mp_hashdataset.sas macro
<h4> SAS Macros </h4>
@li mp_hashdataset.sas
@li mp_assert.sas
**/
/* test 1 - regular DS */
data work.test;
set sashelp.vextfl;
missval=.;
misscval='';
run;
%mp_assertscope(SNAPSHOT)
%mp_hashdataset(test)
%mp_assertscope(COMPARE)
%mp_assert(
iftrue=(&syscc=0),
desc=Regular test works,
outds=work.test_results
)
%mp_hashdataset(test,outds=work.test2)
%mp_assert(
iftrue=(&syscc=0),
desc=hash with output runs without errors,
outds=work.test_results
)
%mp_assert(
iftrue=(%mf_nobs(work.test2)=1),
desc=output has 1 row,
outds=work.test_results
)
data work.test3a;
set work.test;
stop;
run;
%mp_hashdataset(test3a,outds=work.test3b)
%mp_assert(
iftrue=(&syscc=0),
desc=hash with zero-row input runs without errors,
outds=work.test_results
)
%mp_assert(
iftrue=(%mf_nobs(work.test3b)=1),
desc=test 3 output has 1 row,
outds=work.test_results
)