mirror of
https://github.com/sasjs/core.git
synced 2025-12-31 06:30:05 +00:00
feat: new mp_hashdirectory() macro and associated test. Closes #312
This commit is contained in:
180
all.sas
180
all.sas
@@ -4534,6 +4534,9 @@ run;
|
|||||||
@param [in] maxdepth= (0) Set to a positive integer to indicate the level of
|
@param [in] maxdepth= (0) Set to a positive integer to indicate the level of
|
||||||
subdirectory scan recursion - eg 3, to go `./3/levels/deep`. For unlimited
|
subdirectory scan recursion - eg 3, to go `./3/levels/deep`. For unlimited
|
||||||
recursion, set to MAX.
|
recursion, set to MAX.
|
||||||
|
@param [in] showparent= (NO) By default, the initial parent directory is not
|
||||||
|
part of the results. Set to YES to include it. For this record only,
|
||||||
|
directory=filepath.
|
||||||
@param [out] outds= (work.mp_dirlist) The output dataset to create
|
@param [out] outds= (work.mp_dirlist) The output dataset to create
|
||||||
@param [out] getattrs= (NO) If getattrs=YES then the doptname / foptname
|
@param [out] getattrs= (NO) If getattrs=YES then the doptname / foptname
|
||||||
functions are used to scan all properties - any characters that are not
|
functions are used to scan all properties - any characters that are not
|
||||||
@@ -4570,6 +4573,7 @@ run;
|
|||||||
, fref=0
|
, fref=0
|
||||||
, outds=work.mp_dirlist
|
, outds=work.mp_dirlist
|
||||||
, getattrs=NO
|
, getattrs=NO
|
||||||
|
, showparent=NO
|
||||||
, maxdepth=0
|
, maxdepth=0
|
||||||
, level=0 /* The level of recursion to perform. For internal use only. */
|
, level=0 /* The level of recursion to perform. For internal use only. */
|
||||||
)/*/STORE SOURCE*/;
|
)/*/STORE SOURCE*/;
|
||||||
@@ -4652,6 +4656,15 @@ data &out_ds(compress=no
|
|||||||
output;
|
output;
|
||||||
end;
|
end;
|
||||||
rc = dclose(did);
|
rc = dclose(did);
|
||||||
|
%if &showparent=YES and &level=0 %then %do;
|
||||||
|
filepath=directory;
|
||||||
|
file_or_folder='folder';
|
||||||
|
ext='';
|
||||||
|
filename=scan(directory,-1,'/\');
|
||||||
|
msg='';
|
||||||
|
level=&level;
|
||||||
|
output;
|
||||||
|
%end;
|
||||||
stop;
|
stop;
|
||||||
run;
|
run;
|
||||||
|
|
||||||
@@ -4739,6 +4752,9 @@ run;
|
|||||||
data _null_;
|
data _null_;
|
||||||
set &out_ds;
|
set &out_ds;
|
||||||
where file_or_folder='folder';
|
where file_or_folder='folder';
|
||||||
|
%if &showparent=YES and &level=0 %then %do;
|
||||||
|
if filepath ne directory;
|
||||||
|
%end;
|
||||||
length code $10000;
|
length code $10000;
|
||||||
code=cats('%nrstr(%mp_dirlist(path=',filepath,",outds=&outds"
|
code=cats('%nrstr(%mp_dirlist(path=',filepath,",outds=&outds"
|
||||||
,",getattrs=&getattrs,level=%eval(&level+1),maxdepth=&maxdepth))");
|
,",getattrs=&getattrs,level=%eval(&level+1),maxdepth=&maxdepth))");
|
||||||
@@ -5754,7 +5770,7 @@ data _null_;
|
|||||||
run;
|
run;
|
||||||
|
|
||||||
%if %upcase(&showlog)=YES %then %do;
|
%if %upcase(&showlog)=YES %then %do;
|
||||||
options ps=max;
|
options ps=max lrecl=max;
|
||||||
data _null_;
|
data _null_;
|
||||||
infile &outref;
|
infile &outref;
|
||||||
input;
|
input;
|
||||||
@@ -5762,7 +5778,8 @@ run;
|
|||||||
run;
|
run;
|
||||||
%end;
|
%end;
|
||||||
|
|
||||||
%mend mp_ds2md;/**
|
%mend mp_ds2md;
|
||||||
|
/**
|
||||||
@file
|
@file
|
||||||
@brief Create a smaller version of a dataset, without data loss
|
@brief Create a smaller version of a dataset, without data loss
|
||||||
@details This macro will scan the input dataset and create a new one, that
|
@details This macro will scan the input dataset and create a new one, that
|
||||||
@@ -8553,7 +8570,7 @@ run;
|
|||||||
put hashkey=;
|
put hashkey=;
|
||||||
run;
|
run;
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
<h4> SAS Macros </h4>
|
<h4> SAS Macros </h4>
|
||||||
@li mf_getattrn.sas
|
@li mf_getattrn.sas
|
||||||
@@ -8563,11 +8580,12 @@ run;
|
|||||||
|
|
||||||
<h4> Related Files </h4>
|
<h4> Related Files </h4>
|
||||||
@li mp_hashdataset.test.sas
|
@li mp_hashdataset.test.sas
|
||||||
|
@li mp_hashdirectory.sas
|
||||||
|
|
||||||
@param [in] libds dataset to hash
|
@param [in] libds dataset to hash
|
||||||
@param [in] salt= Provide a salt (could be, for instance, the dataset name)
|
@param [in] salt= Provide a salt (could be, for instance, the dataset name)
|
||||||
@param [in] iftrue= A condition under which the macro should be executed.
|
@param [in] iftrue= (1=1) A condition under which the macro should be executed
|
||||||
@param [out] outds= (work.mf_hashdataset) The output dataset to create. This
|
@param [out] outds= (work._data_) The output dataset to create. This
|
||||||
will contain one column (hashkey) with one observation (a $hex32.
|
will contain one column (hashkey) with one observation (a $hex32.
|
||||||
representation of the input hash)
|
representation of the input hash)
|
||||||
|hashkey:$32.|
|
|hashkey:$32.|
|
||||||
@@ -8630,6 +8648,158 @@ run;
|
|||||||
run;
|
run;
|
||||||
%end;
|
%end;
|
||||||
%mend mp_hashdataset;
|
%mend mp_hashdataset;
|
||||||
|
/**
|
||||||
|
@file
|
||||||
|
@brief Returns a unique hash for each file in a directory
|
||||||
|
@details Hashes each file in each directory, and then hashes the hashes to
|
||||||
|
create a hash for each directory also.
|
||||||
|
|
||||||
|
This makes use of the new `hashing_file()` and `hashing` functions, available
|
||||||
|
since 9.4m6. Interestingly, these can even be used in pure macro, eg:
|
||||||
|
|
||||||
|
%put %sysfunc(hashing_file(md5,/path/to/file.blob,0));
|
||||||
|
|
||||||
|
An example of this logic being applied in JavaScript is available in the
|
||||||
|
@sasjs/utils library.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
%let fpath=/some/directory;
|
||||||
|
|
||||||
|
%mp_hashdirectory(&fpath,outds=myhash,maxdepth=2)
|
||||||
|
|
||||||
|
data _null_;
|
||||||
|
set work.myhash;
|
||||||
|
put (_all_)(=);
|
||||||
|
run;
|
||||||
|
|
||||||
|
Whilst files are hashed in their entirety, the logic for creating a folder
|
||||||
|
hash is as follows:
|
||||||
|
|
||||||
|
@li Sort the files by filename (case sensitive, uppercase then lower)
|
||||||
|
@li Take the first 100 hashes, concatenate and hash
|
||||||
|
@li Concatenate this hash with another 100 hashes and hash again
|
||||||
|
@li Continue until the end of the folder. This is the folder hash
|
||||||
|
@li If a folder contains other folders, start from the bottom of the tree -
|
||||||
|
the folder hashes cascade upwards so you know immediately if there is a
|
||||||
|
change in a sub/sub directory
|
||||||
|
@li If the folder has no content (empty) then it is ignored. No hash created.
|
||||||
|
|
||||||
|
<h4> SAS Macros </h4>
|
||||||
|
@li mp_dirlist.sas
|
||||||
|
|
||||||
|
<h4> Related Files </h4>
|
||||||
|
@li mp_hashdataset.sas
|
||||||
|
@li mp_hashdirectory.test.sas
|
||||||
|
@li mp_md5.sas
|
||||||
|
|
||||||
|
@param [in] inloc Full filepath of the file to be hashed (unquoted)
|
||||||
|
@param [in] iftrue= (1=1) A condition under which the macro should be executed
|
||||||
|
@param [in] maxdepth= (0) Set to a positive integer to indicate the level of
|
||||||
|
subdirectory scan recursion - eg 3, to go `./3/levels/deep`. For unlimited
|
||||||
|
recursion, set to MAX.
|
||||||
|
@param [in] method= (MD5) the hashing method to use. Available options:
|
||||||
|
@li MD5
|
||||||
|
@li SH1
|
||||||
|
@li SHA256
|
||||||
|
@li SHA384
|
||||||
|
@li SHA512
|
||||||
|
@li CRC32
|
||||||
|
@param [out] outds= (work.mp_hashdirectory) The output dataset. Contains:
|
||||||
|
@li directory - the parent folder
|
||||||
|
@li file_hash - the hash output
|
||||||
|
@li hash_duration - how long the hash took (first hash always takes longer)
|
||||||
|
@li file_path - /full/path/to/each/file.ext
|
||||||
|
@li file_or_folder - contains either "file" or "folder"
|
||||||
|
@li level - the depth of the directory (top level is 0)
|
||||||
|
|
||||||
|
@version 9.4m6
|
||||||
|
@author Allan Bowe
|
||||||
|
**/
|
||||||
|
|
||||||
|
%macro mp_hashdirectory(inloc,
|
||||||
|
outds=work.mp_hashdirectory,
|
||||||
|
method=MD5,
|
||||||
|
maxdepth=0,
|
||||||
|
iftrue=%str(1=1)
|
||||||
|
)/*/STORE SOURCE*/;
|
||||||
|
|
||||||
|
%local curlevel tempds ;
|
||||||
|
|
||||||
|
%if not(%eval(%unquote(&iftrue))) %then %return;
|
||||||
|
|
||||||
|
/* get the directory listing */
|
||||||
|
%mp_dirlist(path=&inloc, outds=&outds, maxdepth=&maxdepth, showparent=YES)
|
||||||
|
|
||||||
|
/* create the hashes */
|
||||||
|
data &outds;
|
||||||
|
set &outds (rename=(filepath=file_path));
|
||||||
|
length FILE_HASH $32 HASH_DURATION 8;
|
||||||
|
keep directory file_hash hash_duration file_path file_or_folder level;
|
||||||
|
|
||||||
|
ts=datetime();
|
||||||
|
if file_or_folder='file' then do;
|
||||||
|
file_hash=hashing_file("&method",cats(file_path),0);
|
||||||
|
end;
|
||||||
|
hash_duration=datetime()-ts;
|
||||||
|
run;
|
||||||
|
|
||||||
|
proc sort data=&outds ;
|
||||||
|
by descending level directory file_path;
|
||||||
|
run;
|
||||||
|
|
||||||
|
data _null_;
|
||||||
|
set &outds;
|
||||||
|
call symputx('maxlevel',level,'l');
|
||||||
|
stop;
|
||||||
|
run;
|
||||||
|
|
||||||
|
/* now hash the hashes to populate folder hashes, starting from the bottom */
|
||||||
|
%do curlevel=&maxlevel %to 0 %by -1;
|
||||||
|
data work._data_ (keep=directory file_hash);
|
||||||
|
set &outds;
|
||||||
|
where level=&curlevel;
|
||||||
|
by descending level directory file_path;
|
||||||
|
length str $32767 tmp_hash $32;
|
||||||
|
retain str tmp_hash ;
|
||||||
|
/* reset vars when starting a new directory */
|
||||||
|
if first.directory then do;
|
||||||
|
str='';
|
||||||
|
tmp_hash='';
|
||||||
|
i=0;
|
||||||
|
end;
|
||||||
|
/* hash each chunk of 100 file paths */
|
||||||
|
i+1;
|
||||||
|
str=cats(str,file_hash);
|
||||||
|
if mod(i,100)=0 or last.directory then do;
|
||||||
|
tmp_hash=hashing("&method",cats(tmp_hash,str));
|
||||||
|
str='';
|
||||||
|
end;
|
||||||
|
/* output the hash at directory level */
|
||||||
|
if last.directory then do;
|
||||||
|
file_hash=tmp_hash;
|
||||||
|
output;
|
||||||
|
end;
|
||||||
|
if last.level then stop;
|
||||||
|
run;
|
||||||
|
%let tempds=&syslast;
|
||||||
|
/* join the hash back into the main table */
|
||||||
|
proc sql undo_policy=none;
|
||||||
|
create table &outds as
|
||||||
|
select a.directory
|
||||||
|
,coalesce(b.file_hash,a.file_hash) as file_hash
|
||||||
|
,a.hash_duration
|
||||||
|
,a.file_path
|
||||||
|
,a.file_or_folder
|
||||||
|
,a.level
|
||||||
|
from &outds a
|
||||||
|
left join &tempds b
|
||||||
|
on a.file_path=b.directory
|
||||||
|
order by level desc, directory, file_path;
|
||||||
|
drop table &tempds;
|
||||||
|
%end;
|
||||||
|
|
||||||
|
%mend mp_hashdirectory;
|
||||||
/**
|
/**
|
||||||
@file
|
@file
|
||||||
@brief Performs a wrapped \%include
|
@brief Performs a wrapped \%include
|
||||||
|
|||||||
@@ -27,6 +27,9 @@
|
|||||||
@param [in] maxdepth= (0) Set to a positive integer to indicate the level of
|
@param [in] maxdepth= (0) Set to a positive integer to indicate the level of
|
||||||
subdirectory scan recursion - eg 3, to go `./3/levels/deep`. For unlimited
|
subdirectory scan recursion - eg 3, to go `./3/levels/deep`. For unlimited
|
||||||
recursion, set to MAX.
|
recursion, set to MAX.
|
||||||
|
@param [in] showparent= (NO) By default, the initial parent directory is not
|
||||||
|
part of the results. Set to YES to include it. For this record only,
|
||||||
|
directory=filepath.
|
||||||
@param [out] outds= (work.mp_dirlist) The output dataset to create
|
@param [out] outds= (work.mp_dirlist) The output dataset to create
|
||||||
@param [out] getattrs= (NO) If getattrs=YES then the doptname / foptname
|
@param [out] getattrs= (NO) If getattrs=YES then the doptname / foptname
|
||||||
functions are used to scan all properties - any characters that are not
|
functions are used to scan all properties - any characters that are not
|
||||||
@@ -63,6 +66,7 @@
|
|||||||
, fref=0
|
, fref=0
|
||||||
, outds=work.mp_dirlist
|
, outds=work.mp_dirlist
|
||||||
, getattrs=NO
|
, getattrs=NO
|
||||||
|
, showparent=NO
|
||||||
, maxdepth=0
|
, maxdepth=0
|
||||||
, level=0 /* The level of recursion to perform. For internal use only. */
|
, level=0 /* The level of recursion to perform. For internal use only. */
|
||||||
)/*/STORE SOURCE*/;
|
)/*/STORE SOURCE*/;
|
||||||
@@ -145,6 +149,15 @@ data &out_ds(compress=no
|
|||||||
output;
|
output;
|
||||||
end;
|
end;
|
||||||
rc = dclose(did);
|
rc = dclose(did);
|
||||||
|
%if &showparent=YES and &level=0 %then %do;
|
||||||
|
filepath=directory;
|
||||||
|
file_or_folder='folder';
|
||||||
|
ext='';
|
||||||
|
filename=scan(directory,-1,'/\');
|
||||||
|
msg='';
|
||||||
|
level=&level;
|
||||||
|
output;
|
||||||
|
%end;
|
||||||
stop;
|
stop;
|
||||||
run;
|
run;
|
||||||
|
|
||||||
@@ -232,6 +245,9 @@ run;
|
|||||||
data _null_;
|
data _null_;
|
||||||
set &out_ds;
|
set &out_ds;
|
||||||
where file_or_folder='folder';
|
where file_or_folder='folder';
|
||||||
|
%if &showparent=YES and &level=0 %then %do;
|
||||||
|
if filepath ne directory;
|
||||||
|
%end;
|
||||||
length code $10000;
|
length code $10000;
|
||||||
code=cats('%nrstr(%mp_dirlist(path=',filepath,",outds=&outds"
|
code=cats('%nrstr(%mp_dirlist(path=',filepath,",outds=&outds"
|
||||||
,",getattrs=&getattrs,level=%eval(&level+1),maxdepth=&maxdepth))");
|
,",getattrs=&getattrs,level=%eval(&level+1),maxdepth=&maxdepth))");
|
||||||
|
|||||||
@@ -92,7 +92,7 @@ data _null_;
|
|||||||
run;
|
run;
|
||||||
|
|
||||||
%if %upcase(&showlog)=YES %then %do;
|
%if %upcase(&showlog)=YES %then %do;
|
||||||
options ps=max;
|
options ps=max lrecl=max;
|
||||||
data _null_;
|
data _null_;
|
||||||
infile &outref;
|
infile &outref;
|
||||||
input;
|
input;
|
||||||
@@ -100,4 +100,4 @@ run;
|
|||||||
run;
|
run;
|
||||||
%end;
|
%end;
|
||||||
|
|
||||||
%mend mp_ds2md;
|
%mend mp_ds2md;
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
put hashkey=;
|
put hashkey=;
|
||||||
run;
|
run;
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
<h4> SAS Macros </h4>
|
<h4> SAS Macros </h4>
|
||||||
@li mf_getattrn.sas
|
@li mf_getattrn.sas
|
||||||
@@ -21,11 +21,12 @@
|
|||||||
|
|
||||||
<h4> Related Files </h4>
|
<h4> Related Files </h4>
|
||||||
@li mp_hashdataset.test.sas
|
@li mp_hashdataset.test.sas
|
||||||
|
@li mp_hashdirectory.sas
|
||||||
|
|
||||||
@param [in] libds dataset to hash
|
@param [in] libds dataset to hash
|
||||||
@param [in] salt= Provide a salt (could be, for instance, the dataset name)
|
@param [in] salt= Provide a salt (could be, for instance, the dataset name)
|
||||||
@param [in] iftrue= A condition under which the macro should be executed.
|
@param [in] iftrue= (1=1) A condition under which the macro should be executed
|
||||||
@param [out] outds= (work.mf_hashdataset) The output dataset to create. This
|
@param [out] outds= (work._data_) The output dataset to create. This
|
||||||
will contain one column (hashkey) with one observation (a $hex32.
|
will contain one column (hashkey) with one observation (a $hex32.
|
||||||
representation of the input hash)
|
representation of the input hash)
|
||||||
|hashkey:$32.|
|
|hashkey:$32.|
|
||||||
|
|||||||
152
base/mp_hashdirectory.sas
Normal file
152
base/mp_hashdirectory.sas
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
/**
|
||||||
|
@file
|
||||||
|
@brief Returns a unique hash for each file in a directory
|
||||||
|
@details Hashes each file in each directory, and then hashes the hashes to
|
||||||
|
create a hash for each directory also.
|
||||||
|
|
||||||
|
This makes use of the new `hashing_file()` and `hashing` functions, available
|
||||||
|
since 9.4m6. Interestingly, these can even be used in pure macro, eg:
|
||||||
|
|
||||||
|
%put %sysfunc(hashing_file(md5,/path/to/file.blob,0));
|
||||||
|
|
||||||
|
An example of this logic being applied in JavaScript is available in the
|
||||||
|
@sasjs/utils library.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
%let fpath=/some/directory;
|
||||||
|
|
||||||
|
%mp_hashdirectory(&fpath,outds=myhash,maxdepth=2)
|
||||||
|
|
||||||
|
data _null_;
|
||||||
|
set work.myhash;
|
||||||
|
put (_all_)(=);
|
||||||
|
run;
|
||||||
|
|
||||||
|
Whilst files are hashed in their entirety, the logic for creating a folder
|
||||||
|
hash is as follows:
|
||||||
|
|
||||||
|
@li Sort the files by filename (case sensitive, uppercase then lower)
|
||||||
|
@li Take the first 100 hashes, concatenate and hash
|
||||||
|
@li Concatenate this hash with another 100 hashes and hash again
|
||||||
|
@li Continue until the end of the folder. This is the folder hash
|
||||||
|
@li If a folder contains other folders, start from the bottom of the tree -
|
||||||
|
the folder hashes cascade upwards so you know immediately if there is a
|
||||||
|
change in a sub/sub directory
|
||||||
|
@li If the folder has no content (empty) then it is ignored. No hash created.
|
||||||
|
|
||||||
|
<h4> SAS Macros </h4>
|
||||||
|
@li mp_dirlist.sas
|
||||||
|
|
||||||
|
<h4> Related Files </h4>
|
||||||
|
@li mp_hashdataset.sas
|
||||||
|
@li mp_hashdirectory.test.sas
|
||||||
|
@li mp_md5.sas
|
||||||
|
|
||||||
|
@param [in] inloc Full filepath of the file to be hashed (unquoted)
|
||||||
|
@param [in] iftrue= (1=1) A condition under which the macro should be executed
|
||||||
|
@param [in] maxdepth= (0) Set to a positive integer to indicate the level of
|
||||||
|
subdirectory scan recursion - eg 3, to go `./3/levels/deep`. For unlimited
|
||||||
|
recursion, set to MAX.
|
||||||
|
@param [in] method= (MD5) the hashing method to use. Available options:
|
||||||
|
@li MD5
|
||||||
|
@li SH1
|
||||||
|
@li SHA256
|
||||||
|
@li SHA384
|
||||||
|
@li SHA512
|
||||||
|
@li CRC32
|
||||||
|
@param [out] outds= (work.mp_hashdirectory) The output dataset. Contains:
|
||||||
|
@li directory - the parent folder
|
||||||
|
@li file_hash - the hash output
|
||||||
|
@li hash_duration - how long the hash took (first hash always takes longer)
|
||||||
|
@li file_path - /full/path/to/each/file.ext
|
||||||
|
@li file_or_folder - contains either "file" or "folder"
|
||||||
|
@li level - the depth of the directory (top level is 0)
|
||||||
|
|
||||||
|
@version 9.4m6
|
||||||
|
@author Allan Bowe
|
||||||
|
**/
|
||||||
|
|
||||||
|
%macro mp_hashdirectory(inloc,
|
||||||
|
outds=work.mp_hashdirectory,
|
||||||
|
method=MD5,
|
||||||
|
maxdepth=0,
|
||||||
|
iftrue=%str(1=1)
|
||||||
|
)/*/STORE SOURCE*/;
|
||||||
|
|
||||||
|
%local curlevel tempds ;
|
||||||
|
|
||||||
|
%if not(%eval(%unquote(&iftrue))) %then %return;
|
||||||
|
|
||||||
|
/* get the directory listing */
|
||||||
|
%mp_dirlist(path=&inloc, outds=&outds, maxdepth=&maxdepth, showparent=YES)
|
||||||
|
|
||||||
|
/* create the hashes */
|
||||||
|
data &outds;
|
||||||
|
set &outds (rename=(filepath=file_path));
|
||||||
|
length FILE_HASH $32 HASH_DURATION 8;
|
||||||
|
keep directory file_hash hash_duration file_path file_or_folder level;
|
||||||
|
|
||||||
|
ts=datetime();
|
||||||
|
if file_or_folder='file' then do;
|
||||||
|
file_hash=hashing_file("&method",cats(file_path),0);
|
||||||
|
end;
|
||||||
|
hash_duration=datetime()-ts;
|
||||||
|
run;
|
||||||
|
|
||||||
|
proc sort data=&outds ;
|
||||||
|
by descending level directory file_path;
|
||||||
|
run;
|
||||||
|
|
||||||
|
data _null_;
|
||||||
|
set &outds;
|
||||||
|
call symputx('maxlevel',level,'l');
|
||||||
|
stop;
|
||||||
|
run;
|
||||||
|
|
||||||
|
/* now hash the hashes to populate folder hashes, starting from the bottom */
|
||||||
|
%do curlevel=&maxlevel %to 0 %by -1;
|
||||||
|
data work._data_ (keep=directory file_hash);
|
||||||
|
set &outds;
|
||||||
|
where level=&curlevel;
|
||||||
|
by descending level directory file_path;
|
||||||
|
length str $32767 tmp_hash $32;
|
||||||
|
retain str tmp_hash ;
|
||||||
|
/* reset vars when starting a new directory */
|
||||||
|
if first.directory then do;
|
||||||
|
str='';
|
||||||
|
tmp_hash='';
|
||||||
|
i=0;
|
||||||
|
end;
|
||||||
|
/* hash each chunk of 100 file paths */
|
||||||
|
i+1;
|
||||||
|
str=cats(str,file_hash);
|
||||||
|
if mod(i,100)=0 or last.directory then do;
|
||||||
|
tmp_hash=hashing("&method",cats(tmp_hash,str));
|
||||||
|
str='';
|
||||||
|
end;
|
||||||
|
/* output the hash at directory level */
|
||||||
|
if last.directory then do;
|
||||||
|
file_hash=tmp_hash;
|
||||||
|
output;
|
||||||
|
end;
|
||||||
|
if last.level then stop;
|
||||||
|
run;
|
||||||
|
%let tempds=&syslast;
|
||||||
|
/* join the hash back into the main table */
|
||||||
|
proc sql undo_policy=none;
|
||||||
|
create table &outds as
|
||||||
|
select a.directory
|
||||||
|
,coalesce(b.file_hash,a.file_hash) as file_hash
|
||||||
|
,a.hash_duration
|
||||||
|
,a.file_path
|
||||||
|
,a.file_or_folder
|
||||||
|
,a.level
|
||||||
|
from &outds a
|
||||||
|
left join &tempds b
|
||||||
|
on a.file_path=b.directory
|
||||||
|
order by level desc, directory, file_path;
|
||||||
|
drop table &tempds;
|
||||||
|
%end;
|
||||||
|
|
||||||
|
%mend mp_hashdirectory;
|
||||||
133
tests/base/mp_hashdirectory.test.sas
Normal file
133
tests/base/mp_hashdirectory.test.sas
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
/**
|
||||||
|
@file
|
||||||
|
@brief Testing mp_hashdirectory.sas macro
|
||||||
|
|
||||||
|
|
||||||
|
<h4> SAS Macros </h4>
|
||||||
|
@li mf_mkdir.sas
|
||||||
|
@li mf_nobs.sas
|
||||||
|
@li mp_assert.sas
|
||||||
|
@li mp_assertscope.sas
|
||||||
|
@li mp_hashdirectory.sas
|
||||||
|
|
||||||
|
**/
|
||||||
|
|
||||||
|
/* set up a directory to hash */
|
||||||
|
%let fpath=%sysfunc(pathname(work))/testdir;
|
||||||
|
|
||||||
|
%mf_mkdir(&fpath)
|
||||||
|
%mf_mkdir(&fpath/sub1)
|
||||||
|
%mf_mkdir(&fpath/sub2)
|
||||||
|
%mf_mkdir(&fpath/sub1/subsub)
|
||||||
|
|
||||||
|
/* note - the path in the file means the hash is different in each run */
|
||||||
|
%macro makefile(path,name);
|
||||||
|
data _null_;
|
||||||
|
file "&path/&name" termstr=lf;
|
||||||
|
put "This file is located at:";
|
||||||
|
put "&path";
|
||||||
|
put "and it is called:";
|
||||||
|
put "&name";
|
||||||
|
run;
|
||||||
|
%mend makefile;
|
||||||
|
|
||||||
|
%macro spawner(path);
|
||||||
|
%do x=1 %to 5;
|
||||||
|
%makefile(&path,file&x..txt)
|
||||||
|
%end;
|
||||||
|
%mend spawner;
|
||||||
|
|
||||||
|
%spawner(&fpath)
|
||||||
|
%spawner(&fpath/sub1)
|
||||||
|
%spawner(&fpath/sub1/subsub)
|
||||||
|
|
||||||
|
|
||||||
|
%mp_assertscope(SNAPSHOT)
|
||||||
|
%mp_hashdirectory(&fpath,outds=work.hashes,maxdepth=MAX)
|
||||||
|
%mp_assertscope(COMPARE)
|
||||||
|
|
||||||
|
%mp_assert(
|
||||||
|
iftrue=(&syscc=0),
|
||||||
|
desc=No errors,
|
||||||
|
outds=work.test_results
|
||||||
|
)
|
||||||
|
|
||||||
|
%mp_assert(
|
||||||
|
iftrue=(%mf_nobs(work.hashes)=19),
|
||||||
|
desc=record created for each entry,
|
||||||
|
outds=work.test_results
|
||||||
|
)
|
||||||
|
|
||||||
|
proc sql;
|
||||||
|
select count(*) into: misscheck
|
||||||
|
from work.hashes
|
||||||
|
where file_hash is missing;
|
||||||
|
|
||||||
|
%mp_assert(
|
||||||
|
iftrue=(&misscheck=1),
|
||||||
|
desc=Only one missing hash - the empty directory,
|
||||||
|
outds=work.test_results
|
||||||
|
)
|
||||||
|
|
||||||
|
data _null_;
|
||||||
|
set work.hashes;
|
||||||
|
if directory=file_path then call symputx('tophash',file_hash);
|
||||||
|
run;
|
||||||
|
|
||||||
|
%mp_assert(
|
||||||
|
iftrue=(%length(&tophash)=32),
|
||||||
|
desc=ensure valid top level hash created,
|
||||||
|
outds=work.test_results
|
||||||
|
)
|
||||||
|
|
||||||
|
/* now change a file and re-hash */
|
||||||
|
data _null_;
|
||||||
|
file "&fpath/sub1/subsub/file1.txt" termstr=lf;
|
||||||
|
put "This file has changed!";
|
||||||
|
run;
|
||||||
|
|
||||||
|
%mp_hashdirectory(&fpath,outds=work.hashes2,maxdepth=MAX)
|
||||||
|
|
||||||
|
data _null_;
|
||||||
|
set work.hashes2;
|
||||||
|
if directory=file_path then call symputx('tophash2',file_hash);
|
||||||
|
run;
|
||||||
|
|
||||||
|
%mp_assert(
|
||||||
|
iftrue=(&tophash ne &tophash2),
|
||||||
|
desc=ensure the changing of the hash results in a new value,
|
||||||
|
outds=work.test_results
|
||||||
|
)
|
||||||
|
|
||||||
|
/* now change it back and see if it matches */
|
||||||
|
data _null_;
|
||||||
|
file "&fpath/sub1/subsub/file1.txt" termstr=lf;
|
||||||
|
put "This file is located at:";
|
||||||
|
put "&fpath/sub1/subsub";
|
||||||
|
put "and it is called:";
|
||||||
|
put "file1.txt";
|
||||||
|
run;
|
||||||
|
run;
|
||||||
|
|
||||||
|
%mp_hashdirectory(&fpath,outds=work.hashes3,maxdepth=MAX)
|
||||||
|
|
||||||
|
data _null_;
|
||||||
|
set work.hashes3;
|
||||||
|
if directory=file_path then call symputx('tophash3',file_hash);
|
||||||
|
run;
|
||||||
|
|
||||||
|
%mp_assert(
|
||||||
|
iftrue=(&tophash=&tophash3),
|
||||||
|
desc=ensure the same files result in the same hash,
|
||||||
|
outds=work.test_results
|
||||||
|
)
|
||||||
|
|
||||||
|
/* dump contents for debugging */
|
||||||
|
data _null_;
|
||||||
|
set work.hashes;
|
||||||
|
put file_hash file_path;
|
||||||
|
run;
|
||||||
|
data _null_;
|
||||||
|
set work.hashes2;
|
||||||
|
put file_hash file_path;
|
||||||
|
run;
|
||||||
Reference in New Issue
Block a user