1
0
mirror of https://github.com/sasjs/core.git synced 2026-01-09 10:20:06 +00:00

fix: updating mp_hashdataset to cope with STRICT mode. Adding test and improving sasjs/server compatibility.

This commit is contained in:
munja
2022-03-29 13:22:24 +01:00
parent 40d694eec8
commit 72d6b446c3
5 changed files with 155 additions and 87 deletions

59
all.sas
View File

@@ -7954,8 +7954,8 @@ run;
%mend mp_guesspk;/** %mend mp_guesspk;/**
@file @file
@brief Returns a unique hash for a dataset @brief Returns a unique hash for a dataset
@details Ignores metadata attributes, used only to hash values. Compared @details Ignores metadata attributes, used only to hash values. If used to
datasets must be in the same order. compare datasets, they must have their columns and rows in the same order.
%mp_hashdataset(sashelp.class,outds=myhash) %mp_hashdataset(sashelp.class,outds=myhash)
@@ -7970,7 +7970,10 @@ run;
@li mf_getattrn.sas @li mf_getattrn.sas
@li mf_getuniquename.sas @li mf_getuniquename.sas
@li mf_getvarlist.sas @li mf_getvarlist.sas
@li mf_getvartype.sas @li mp_md5.sas
<h4> Related Files </h4>
@li mp_hashdataset.test.sas
@param [in] libds dataset to hash @param [in] libds dataset to hash
@param [in] salt= Provide a salt (could be, for instance, the dataset name) @param [in] salt= Provide a salt (could be, for instance, the dataset name)
@@ -7988,51 +7991,52 @@ run;
%macro mp_hashdataset( %macro mp_hashdataset(
libds, libds,
outds=, outds=work._data_,
salt=, salt=,
iftrue=%str(1=1) iftrue=%str(1=1)
)/*/STORE SOURCE*/; )/*/STORE SOURCE*/;
%local keyvar /* roll up the md5 */
prevkeyvar /* retain prev record md5 */
lastvar /* last var in input ds */
cvars nvars;
%if not(%eval(%unquote(&iftrue))) %then %return; %if not(%eval(%unquote(&iftrue))) %then %return;
/* avoid naming conflict for hash key vars */
%let keyvar=%mf_getuniquename();
%let prevkeyvar=%mf_getuniquename();
%let lastvar=%mf_getuniquename();
%if %mf_getattrn(&libds,NLOBS)=0 %then %do; %if %mf_getattrn(&libds,NLOBS)=0 %then %do;
%put %str(WARN)ING: Dataset &libds is empty, or is not a dataset; data &outds;
length hashkey $32;
retain hashkey "%sysfunc(md5(%str(&salt)),$hex32.)";
output;
stop;
run;
%put &sysmacroname: Dataset &libds is empty, or is not a dataset;
%put &sysmacroname: hashkey of &outds is based on salt (&salt) only;
%end; %end;
%else %if %mf_getattrn(&libds,NLOBS)<0 %then %do; %else %if %mf_getattrn(&libds,NLOBS)<0 %then %do;
%put %str(ERR)OR: Dataset &libds is not a dataset; %put %str(ERR)OR: Dataset &libds is not a dataset;
%end; %end;
%else %do; %else %do;
%local keyvar /* roll up the md5 */
prevkeyvar /* retain prev record md5 */
lastvar /* last var in input ds */
varlist var i;
/* avoid naming conflict for hash key vars */
%let keyvar=%mf_getuniquename();
%let prevkeyvar=%mf_getuniquename();
%let lastvar=%mf_getuniquename();
%let varlist=%mf_getvarlist(&libds);
data &outds(rename=(&keyvar=hashkey) keep=&keyvar); data &outds(rename=(&keyvar=hashkey) keep=&keyvar);
length &prevkeyvar &keyvar $32; length &prevkeyvar &keyvar $32;
retain &prevkeyvar "%sysfunc(md5(%str(&salt)),$hex32.)"; retain &prevkeyvar "%sysfunc(md5(%str(&salt)),$hex32.)";
set &libds end=&lastvar; set &libds end=&lastvar;
/* hash should include previous row */ /* hash should include previous row */
&keyvar=put(md5(&prevkeyvar &keyvar=%mp_md5(
/* loop every column, hashing every individual value */ cvars=%mf_getvarlist(&libds,typefilter=C) &prevkeyvar,
%do i=1 %to %sysfunc(countw(&varlist)); nvars=%mf_getvarlist(&libds,typefilter=N)
%let var=%scan(&varlist,&i,%str( )); );
%if %mf_getvartype(&libds,&var)=C %then %do;
!!put(md5(trim(&var)),$hex32.)
%end;
%else %do;
!!put(md5(trim(put(&var*1,binary64.))),$hex32.)
%end;
%end;
),$hex32.);
&prevkeyvar=&keyvar; &prevkeyvar=&keyvar;
if &lastvar then output; if &lastvar then output;
run; run;
%end; %end;
%mend mp_hashdataset;/** %mend mp_hashdataset;
/**
@file @file
@brief Performs a wrapped \%include @brief Performs a wrapped \%include
@details This macro wrapper is necessary if you need your included code to @details This macro wrapper is necessary if you need your included code to
@@ -19224,6 +19228,7 @@ run;
data _null_; data _null_;
set &tempds; set &tempds;
if not (upcase(name) =:"DATA"); /* ignore temp datasets */ if not (upcase(name) =:"DATA"); /* ignore temp datasets */
if not (upcase(name)=:"_DATA_");
i+1; i+1;
call symputx(cats('wt',i),name,'l'); call symputx(cats('wt',i),name,'l');
call symputx('wtcnt',i,'l'); call symputx('wtcnt',i,'l');

View File

@@ -1,8 +1,8 @@
/** /**
@file @file
@brief Returns a unique hash for a dataset @brief Returns a unique hash for a dataset
@details Ignores metadata attributes, used only to hash values. Compared @details Ignores metadata attributes, used only to hash values. If used to
datasets must be in the same order. compare datasets, they must have their columns and rows in the same order.
%mp_hashdataset(sashelp.class,outds=myhash) %mp_hashdataset(sashelp.class,outds=myhash)
@@ -17,7 +17,10 @@
@li mf_getattrn.sas @li mf_getattrn.sas
@li mf_getuniquename.sas @li mf_getuniquename.sas
@li mf_getvarlist.sas @li mf_getvarlist.sas
@li mf_getvartype.sas @li mp_md5.sas
<h4> Related Files </h4>
@li mp_hashdataset.test.sas
@param [in] libds dataset to hash @param [in] libds dataset to hash
@param [in] salt= Provide a salt (could be, for instance, the dataset name) @param [in] salt= Provide a salt (could be, for instance, the dataset name)
@@ -35,46 +38,46 @@
%macro mp_hashdataset( %macro mp_hashdataset(
libds, libds,
outds=, outds=work._data_,
salt=, salt=,
iftrue=%str(1=1) iftrue=%str(1=1)
)/*/STORE SOURCE*/; )/*/STORE SOURCE*/;
%local keyvar /* roll up the md5 */
prevkeyvar /* retain prev record md5 */
lastvar /* last var in input ds */
cvars nvars;
%if not(%eval(%unquote(&iftrue))) %then %return; %if not(%eval(%unquote(&iftrue))) %then %return;
/* avoid naming conflict for hash key vars */
%let keyvar=%mf_getuniquename();
%let prevkeyvar=%mf_getuniquename();
%let lastvar=%mf_getuniquename();
%if %mf_getattrn(&libds,NLOBS)=0 %then %do; %if %mf_getattrn(&libds,NLOBS)=0 %then %do;
%put %str(WARN)ING: Dataset &libds is empty, or is not a dataset; data &outds;
length hashkey $32;
retain hashkey "%sysfunc(md5(%str(&salt)),$hex32.)";
output;
stop;
run;
%put &sysmacroname: Dataset &libds is empty, or is not a dataset;
%put &sysmacroname: hashkey of &outds is based on salt (&salt) only;
%end; %end;
%else %if %mf_getattrn(&libds,NLOBS)<0 %then %do; %else %if %mf_getattrn(&libds,NLOBS)<0 %then %do;
%put %str(ERR)OR: Dataset &libds is not a dataset; %put %str(ERR)OR: Dataset &libds is not a dataset;
%end; %end;
%else %do; %else %do;
%local keyvar /* roll up the md5 */
prevkeyvar /* retain prev record md5 */
lastvar /* last var in input ds */
varlist var i;
/* avoid naming conflict for hash key vars */
%let keyvar=%mf_getuniquename();
%let prevkeyvar=%mf_getuniquename();
%let lastvar=%mf_getuniquename();
%let varlist=%mf_getvarlist(&libds);
data &outds(rename=(&keyvar=hashkey) keep=&keyvar); data &outds(rename=(&keyvar=hashkey) keep=&keyvar);
length &prevkeyvar &keyvar $32; length &prevkeyvar &keyvar $32;
retain &prevkeyvar "%sysfunc(md5(%str(&salt)),$hex32.)"; retain &prevkeyvar "%sysfunc(md5(%str(&salt)),$hex32.)";
set &libds end=&lastvar; set &libds end=&lastvar;
/* hash should include previous row */ /* hash should include previous row */
&keyvar=put(md5(&prevkeyvar &keyvar=%mp_md5(
/* loop every column, hashing every individual value */ cvars=%mf_getvarlist(&libds,typefilter=C) &prevkeyvar,
%do i=1 %to %sysfunc(countw(&varlist)); nvars=%mf_getvarlist(&libds,typefilter=N)
%let var=%scan(&varlist,&i,%str( )); );
%if %mf_getvartype(&libds,&var)=C %then %do;
!!put(md5(trim(&var)),$hex32.)
%end;
%else %do;
!!put(md5(trim(put(&var*1,binary64.))),$hex32.)
%end;
%end;
),$hex32.);
&prevkeyvar=&keyvar; &prevkeyvar=&keyvar;
if &lastvar then output; if &lastvar then output;
run; run;

View File

@@ -71,7 +71,7 @@
}, },
{ {
"name": "server", "name": "server",
"serverUrl": "https://sas.analytium.co.uk:5000", "serverUrl": " ",
"serverType": "SASJS", "serverType": "SASJS",
"httpsAgentOptions": { "httpsAgentOptions": {
"allowInsecureRequests": false "allowInsecureRequests": false

View File

@@ -114,6 +114,7 @@
data _null_; data _null_;
set &tempds; set &tempds;
if not (upcase(name) =:"DATA"); /* ignore temp datasets */ if not (upcase(name) =:"DATA"); /* ignore temp datasets */
if not (upcase(name)=:"_DATA_");
i+1; i+1;
call symputx(cats('wt',i),name,'l'); call symputx(cats('wt',i),name,'l');
call symputx('wtcnt',i,'l'); call symputx('wtcnt',i,'l');

View File

@@ -0,0 +1,59 @@
/**
@file
@brief Testing mp_hashdataset.sas macro
<h4> SAS Macros </h4>
@li mp_hashdataset.sas
@li mp_assert.sas
**/
/* test 1 - regular DS */
data work.test;
set sashelp.vextfl;
missval=.;
misscval='';
run;
%mp_assertscope(SNAPSHOT)
%mp_hashdataset(test)
%mp_assertscope(COMPARE)
%mp_assert(
iftrue=(&syscc=0),
desc=Regular test works,
outds=work.test_results
)
%mp_hashdataset(test,outds=work.test2)
%mp_assert(
iftrue=(&syscc=0),
desc=hash with output runs without errors,
outds=work.test_results
)
%mp_assert(
iftrue=(%mf_nobs(work.test2)=1),
desc=output has 1 row,
outds=work.test_results
)
data work.test3a;
set work.test;
stop;
run;
%mp_hashdataset(test3a,outds=work.test3b)
%mp_assert(
iftrue=(&syscc=0),
desc=hash with zero-row input runs without errors,
outds=work.test_results
)
%mp_assert(
iftrue=(%mf_nobs(work.test3b)=1),
desc=test 3 output has 1 row,
outds=work.test_results
)