1
0
mirror of https://github.com/sasjs/core.git synced 2025-12-11 06:24:35 +00:00

Merge pull request #290 from sasjs/allanbowe/mp-cleancsv-does-not-289

fix: enable embedded blanks in mp_cleancsv, closes #289
This commit is contained in:
Allan Bowe
2022-07-21 23:47:18 +01:00
committed by GitHub
3 changed files with 69 additions and 12 deletions

19
all.sas
View File

@@ -3775,7 +3775,7 @@ run;
%mend mp_chop;
/**
@file mp_cleancsv.sas
@file
@brief Fixes embedded cr / lf / crlf in CSV
@details CSVs will sometimes contain lf or crlf within quotes (eg when
saved by excel). When the termstr is ALSO lf or crlf that can be tricky
@@ -3783,14 +3783,16 @@ run;
This macro converts any csv to follow the convention of a windows excel file,
applying CRLF line endings and converting embedded cr and crlf to lf.
usage:
Usage:
fileref mycsv "/path/your/csv";
%mp_cleancsv(in=mycsv,out=/path/new.csv)
@param in= provide path or fileref to input csv
@param out= output path or fileref to output csv
@param qchar= quote char - hex code 22 is the double quote.
@param in= (NOTPROVIDED) Provide path or fileref to input csv. If a period is
found, it is assumed to be a file.
@param out= (NOTPROVIDED) Output path or fileref to output csv. If a period
is found, it is assumed to be a file.
@param qchar= ('22'x) Quote char - hex code 22 is the double quote.
@version 9.2
@author Allan Bowe
@@ -3832,9 +3834,14 @@ run;
else do;
/* outside a quote, change cr and lf to crlf */
if inchar='0D'x then do;
crblank:
put '0D0A'x;
input inchar $char1.;
if inchar ne '0A'x then do;
if inchar='0D'x then do;
/* multiple CR indicates CR formatted file with blank lines */
goto crblank;
end;
else if inchar ne '0A'x then do;
put inchar $char1.;
if inchar=qchar then isq = mod(isq+1,2);
end;

View File

@@ -1,5 +1,5 @@
/**
@file mp_cleancsv.sas
@file
@brief Fixes embedded cr / lf / crlf in CSV
@details CSVs will sometimes contain lf or crlf within quotes (eg when
saved by excel). When the termstr is ALSO lf or crlf that can be tricky
@@ -7,14 +7,16 @@
This macro converts any csv to follow the convention of a windows excel file,
applying CRLF line endings and converting embedded cr and crlf to lf.
usage:
Usage:
fileref mycsv "/path/your/csv";
%mp_cleancsv(in=mycsv,out=/path/new.csv)
@param in= provide path or fileref to input csv
@param out= output path or fileref to output csv
@param qchar= quote char - hex code 22 is the double quote.
@param in= (NOTPROVIDED) Provide path or fileref to input csv. If a period is
found, it is assumed to be a file.
@param out= (NOTPROVIDED) Output path or fileref to output csv. If a period
is found, it is assumed to be a file.
@param qchar= ('22'x) Quote char - hex code 22 is the double quote.
@version 9.2
@author Allan Bowe
@@ -56,9 +58,14 @@
else do;
/* outside a quote, change cr and lf to crlf */
if inchar='0D'x then do;
crblank:
put '0D0A'x;
input inchar $char1.;
if inchar ne '0A'x then do;
if inchar='0D'x then do;
/* multiple CR indicates CR formatted file with blank lines */
goto crblank;
end;
else if inchar ne '0A'x then do;
put inchar $char1.;
if inchar=qchar then isq = mod(isq+1,2);
end;

View File

@@ -0,0 +1,43 @@
/**
@file
@brief Testing mp_cleancsv.sas macro
@details Credit for test 1 goes to
[Tom](https://communities.sas.com/t5/user/viewprofilepage/user-id/159) from
SAS Communities:
https://communities.sas.com/t5/SAS-Programming/Removing-embedded-carriage-returns/m-p/824790#M325761
<h4> SAS Macros </h4>
@li mf_nobs.sas
@li mp_cleancsv.sas
@li mp_assert.sas
@li mp_assertscope.sas
**/
/* test 1 - cope with empty rows on CR formatted file */
filename crlf "%sysfunc(pathname(work))/crlf";
filename cr "%sysfunc(pathname(work))/cr";
data _null_;
file cr termstr=cr ;
put 'line 1'///'line 4'/'line 5';
run;
%mp_assertscope(SNAPSHOT)
%mp_cleancsv(in=cr,out=crlf)
%mp_assertscope(COMPARE)
/* 5 rows as all converted to OD0A */
data test1;
infile "%sysfunc(pathname(work))/crlf" lrecl=100 termstr=crlf;
input;
list;
run;
%put test1=%mf_nobs(test1);
%mp_assert(
iftrue=(%mf_nobs(work.test1)=5),
desc=Checking blank rows on CR formatted file,
outds=work.test_results
)