1
0
mirror of https://github.com/sasjs/core.git synced 2026-01-07 09:30:06 +00:00

fix: enable embedded blanks in mp_cleancsv, closes #289

This commit is contained in:
Allan Bowe
2022-07-21 22:40:43 +00:00
parent 37076eae89
commit a7dc314204
3 changed files with 69 additions and 12 deletions

19
all.sas
View File

@@ -3775,7 +3775,7 @@ run;
%mend mp_chop; %mend mp_chop;
/** /**
@file mp_cleancsv.sas @file
@brief Fixes embedded cr / lf / crlf in CSV @brief Fixes embedded cr / lf / crlf in CSV
@details CSVs will sometimes contain lf or crlf within quotes (eg when @details CSVs will sometimes contain lf or crlf within quotes (eg when
saved by excel). When the termstr is ALSO lf or crlf that can be tricky saved by excel). When the termstr is ALSO lf or crlf that can be tricky
@@ -3783,14 +3783,16 @@ run;
This macro converts any csv to follow the convention of a windows excel file, This macro converts any csv to follow the convention of a windows excel file,
applying CRLF line endings and converting embedded cr and crlf to lf. applying CRLF line endings and converting embedded cr and crlf to lf.
usage: Usage:
fileref mycsv "/path/your/csv"; fileref mycsv "/path/your/csv";
%mp_cleancsv(in=mycsv,out=/path/new.csv) %mp_cleancsv(in=mycsv,out=/path/new.csv)
@param in= provide path or fileref to input csv @param in= (NOTPROVIDED) Provide path or fileref to input csv. If a period is
@param out= output path or fileref to output csv found, it is assumed to be a file.
@param qchar= quote char - hex code 22 is the double quote. @param out= (NOTPROVIDED) Output path or fileref to output csv. If a period
is found, it is assumed to be a file.
@param qchar= ('22'x) Quote char - hex code 22 is the double quote.
@version 9.2 @version 9.2
@author Allan Bowe @author Allan Bowe
@@ -3832,9 +3834,14 @@ run;
else do; else do;
/* outside a quote, change cr and lf to crlf */ /* outside a quote, change cr and lf to crlf */
if inchar='0D'x then do; if inchar='0D'x then do;
crblank:
put '0D0A'x; put '0D0A'x;
input inchar $char1.; input inchar $char1.;
if inchar ne '0A'x then do; if inchar='0D'x then do;
/* multiple CR indicates CR formatted file with blank lines */
goto crblank;
end;
else if inchar ne '0A'x then do;
put inchar $char1.; put inchar $char1.;
if inchar=qchar then isq = mod(isq+1,2); if inchar=qchar then isq = mod(isq+1,2);
end; end;

View File

@@ -1,5 +1,5 @@
/** /**
@file mp_cleancsv.sas @file
@brief Fixes embedded cr / lf / crlf in CSV @brief Fixes embedded cr / lf / crlf in CSV
@details CSVs will sometimes contain lf or crlf within quotes (eg when @details CSVs will sometimes contain lf or crlf within quotes (eg when
saved by excel). When the termstr is ALSO lf or crlf that can be tricky saved by excel). When the termstr is ALSO lf or crlf that can be tricky
@@ -7,14 +7,16 @@
This macro converts any csv to follow the convention of a windows excel file, This macro converts any csv to follow the convention of a windows excel file,
applying CRLF line endings and converting embedded cr and crlf to lf. applying CRLF line endings and converting embedded cr and crlf to lf.
usage: Usage:
fileref mycsv "/path/your/csv"; fileref mycsv "/path/your/csv";
%mp_cleancsv(in=mycsv,out=/path/new.csv) %mp_cleancsv(in=mycsv,out=/path/new.csv)
@param in= provide path or fileref to input csv @param in= (NOTPROVIDED) Provide path or fileref to input csv. If a period is
@param out= output path or fileref to output csv found, it is assumed to be a file.
@param qchar= quote char - hex code 22 is the double quote. @param out= (NOTPROVIDED) Output path or fileref to output csv. If a period
is found, it is assumed to be a file.
@param qchar= ('22'x) Quote char - hex code 22 is the double quote.
@version 9.2 @version 9.2
@author Allan Bowe @author Allan Bowe
@@ -56,9 +58,14 @@
else do; else do;
/* outside a quote, change cr and lf to crlf */ /* outside a quote, change cr and lf to crlf */
if inchar='0D'x then do; if inchar='0D'x then do;
crblank:
put '0D0A'x; put '0D0A'x;
input inchar $char1.; input inchar $char1.;
if inchar ne '0A'x then do; if inchar='0D'x then do;
/* multiple CR indicates CR formatted file with blank lines */
goto crblank;
end;
else if inchar ne '0A'x then do;
put inchar $char1.; put inchar $char1.;
if inchar=qchar then isq = mod(isq+1,2); if inchar=qchar then isq = mod(isq+1,2);
end; end;

View File

@@ -0,0 +1,43 @@
/**
@file
@brief Testing mp_cleancsv.sas macro
@details Credit for test 1 goes to
[Tom](https://communities.sas.com/t5/user/viewprofilepage/user-id/159) from
SAS Communities:
https://communities.sas.com/t5/SAS-Programming/Removing-embedded-carriage-returns/m-p/824790#M325761
<h4> SAS Macros </h4>
@li mf_nobs.sas
@li mp_cleancsv.sas
@li mp_assert.sas
@li mp_assertscope.sas
**/
/* test 1 - cope with empty rows on CR formatted file */
filename crlf "%sysfunc(pathname(work))/crlf";
filename cr "%sysfunc(pathname(work))/cr";
data _null_;
file cr termstr=cr ;
put 'line 1'///'line 4'/'line 5';
run;
%mp_assertscope(SNAPSHOT)
%mp_cleancsv(in=cr,out=crlf)
%mp_assertscope(COMPARE)
/* 5 rows as all converted to OD0A */
data test1;
infile "%sysfunc(pathname(work))/crlf" lrecl=100 termstr=crlf;
input;
list;
run;
%put test1=%mf_nobs(test1);
%mp_assert(
iftrue=(%mf_nobs(work.test1)=5),
desc=Checking blank rows on CR formatted file,
outds=work.test_results
)