mirror of
https://github.com/sasjs/core.git
synced 2026-01-07 09:30:06 +00:00
fix: enable embedded blanks in mp_cleancsv, closes #289
This commit is contained in:
19
all.sas
19
all.sas
@@ -3775,7 +3775,7 @@ run;
|
|||||||
|
|
||||||
%mend mp_chop;
|
%mend mp_chop;
|
||||||
/**
|
/**
|
||||||
@file mp_cleancsv.sas
|
@file
|
||||||
@brief Fixes embedded cr / lf / crlf in CSV
|
@brief Fixes embedded cr / lf / crlf in CSV
|
||||||
@details CSVs will sometimes contain lf or crlf within quotes (eg when
|
@details CSVs will sometimes contain lf or crlf within quotes (eg when
|
||||||
saved by excel). When the termstr is ALSO lf or crlf that can be tricky
|
saved by excel). When the termstr is ALSO lf or crlf that can be tricky
|
||||||
@@ -3783,14 +3783,16 @@ run;
|
|||||||
This macro converts any csv to follow the convention of a windows excel file,
|
This macro converts any csv to follow the convention of a windows excel file,
|
||||||
applying CRLF line endings and converting embedded cr and crlf to lf.
|
applying CRLF line endings and converting embedded cr and crlf to lf.
|
||||||
|
|
||||||
usage:
|
Usage:
|
||||||
|
|
||||||
fileref mycsv "/path/your/csv";
|
fileref mycsv "/path/your/csv";
|
||||||
%mp_cleancsv(in=mycsv,out=/path/new.csv)
|
%mp_cleancsv(in=mycsv,out=/path/new.csv)
|
||||||
|
|
||||||
@param in= provide path or fileref to input csv
|
@param in= (NOTPROVIDED) Provide path or fileref to input csv. If a period is
|
||||||
@param out= output path or fileref to output csv
|
found, it is assumed to be a file.
|
||||||
@param qchar= quote char - hex code 22 is the double quote.
|
@param out= (NOTPROVIDED) Output path or fileref to output csv. If a period
|
||||||
|
is found, it is assumed to be a file.
|
||||||
|
@param qchar= ('22'x) Quote char - hex code 22 is the double quote.
|
||||||
|
|
||||||
@version 9.2
|
@version 9.2
|
||||||
@author Allan Bowe
|
@author Allan Bowe
|
||||||
@@ -3832,9 +3834,14 @@ run;
|
|||||||
else do;
|
else do;
|
||||||
/* outside a quote, change cr and lf to crlf */
|
/* outside a quote, change cr and lf to crlf */
|
||||||
if inchar='0D'x then do;
|
if inchar='0D'x then do;
|
||||||
|
crblank:
|
||||||
put '0D0A'x;
|
put '0D0A'x;
|
||||||
input inchar $char1.;
|
input inchar $char1.;
|
||||||
if inchar ne '0A'x then do;
|
if inchar='0D'x then do;
|
||||||
|
/* multiple CR indicates CR formatted file with blank lines */
|
||||||
|
goto crblank;
|
||||||
|
end;
|
||||||
|
else if inchar ne '0A'x then do;
|
||||||
put inchar $char1.;
|
put inchar $char1.;
|
||||||
if inchar=qchar then isq = mod(isq+1,2);
|
if inchar=qchar then isq = mod(isq+1,2);
|
||||||
end;
|
end;
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
@file mp_cleancsv.sas
|
@file
|
||||||
@brief Fixes embedded cr / lf / crlf in CSV
|
@brief Fixes embedded cr / lf / crlf in CSV
|
||||||
@details CSVs will sometimes contain lf or crlf within quotes (eg when
|
@details CSVs will sometimes contain lf or crlf within quotes (eg when
|
||||||
saved by excel). When the termstr is ALSO lf or crlf that can be tricky
|
saved by excel). When the termstr is ALSO lf or crlf that can be tricky
|
||||||
@@ -7,14 +7,16 @@
|
|||||||
This macro converts any csv to follow the convention of a windows excel file,
|
This macro converts any csv to follow the convention of a windows excel file,
|
||||||
applying CRLF line endings and converting embedded cr and crlf to lf.
|
applying CRLF line endings and converting embedded cr and crlf to lf.
|
||||||
|
|
||||||
usage:
|
Usage:
|
||||||
|
|
||||||
fileref mycsv "/path/your/csv";
|
fileref mycsv "/path/your/csv";
|
||||||
%mp_cleancsv(in=mycsv,out=/path/new.csv)
|
%mp_cleancsv(in=mycsv,out=/path/new.csv)
|
||||||
|
|
||||||
@param in= provide path or fileref to input csv
|
@param in= (NOTPROVIDED) Provide path or fileref to input csv. If a period is
|
||||||
@param out= output path or fileref to output csv
|
found, it is assumed to be a file.
|
||||||
@param qchar= quote char - hex code 22 is the double quote.
|
@param out= (NOTPROVIDED) Output path or fileref to output csv. If a period
|
||||||
|
is found, it is assumed to be a file.
|
||||||
|
@param qchar= ('22'x) Quote char - hex code 22 is the double quote.
|
||||||
|
|
||||||
@version 9.2
|
@version 9.2
|
||||||
@author Allan Bowe
|
@author Allan Bowe
|
||||||
@@ -56,9 +58,14 @@
|
|||||||
else do;
|
else do;
|
||||||
/* outside a quote, change cr and lf to crlf */
|
/* outside a quote, change cr and lf to crlf */
|
||||||
if inchar='0D'x then do;
|
if inchar='0D'x then do;
|
||||||
|
crblank:
|
||||||
put '0D0A'x;
|
put '0D0A'x;
|
||||||
input inchar $char1.;
|
input inchar $char1.;
|
||||||
if inchar ne '0A'x then do;
|
if inchar='0D'x then do;
|
||||||
|
/* multiple CR indicates CR formatted file with blank lines */
|
||||||
|
goto crblank;
|
||||||
|
end;
|
||||||
|
else if inchar ne '0A'x then do;
|
||||||
put inchar $char1.;
|
put inchar $char1.;
|
||||||
if inchar=qchar then isq = mod(isq+1,2);
|
if inchar=qchar then isq = mod(isq+1,2);
|
||||||
end;
|
end;
|
||||||
|
|||||||
43
tests/base/mp_cleancsv.test.sas
Normal file
43
tests/base/mp_cleancsv.test.sas
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
/**
|
||||||
|
@file
|
||||||
|
@brief Testing mp_cleancsv.sas macro
|
||||||
|
@details Credit for test 1 goes to
|
||||||
|
[Tom](https://communities.sas.com/t5/user/viewprofilepage/user-id/159) from
|
||||||
|
SAS Communities:
|
||||||
|
https://communities.sas.com/t5/SAS-Programming/Removing-embedded-carriage-returns/m-p/824790#M325761
|
||||||
|
|
||||||
|
<h4> SAS Macros </h4>
|
||||||
|
@li mf_nobs.sas
|
||||||
|
@li mp_cleancsv.sas
|
||||||
|
@li mp_assert.sas
|
||||||
|
@li mp_assertscope.sas
|
||||||
|
|
||||||
|
**/
|
||||||
|
|
||||||
|
/* test 1 - cope with empty rows on CR formatted file */
|
||||||
|
|
||||||
|
filename crlf "%sysfunc(pathname(work))/crlf";
|
||||||
|
filename cr "%sysfunc(pathname(work))/cr";
|
||||||
|
data _null_;
|
||||||
|
file cr termstr=cr ;
|
||||||
|
put 'line 1'///'line 4'/'line 5';
|
||||||
|
run;
|
||||||
|
|
||||||
|
%mp_assertscope(SNAPSHOT)
|
||||||
|
%mp_cleancsv(in=cr,out=crlf)
|
||||||
|
%mp_assertscope(COMPARE)
|
||||||
|
|
||||||
|
/* 5 rows as all converted to OD0A */
|
||||||
|
data test1;
|
||||||
|
infile "%sysfunc(pathname(work))/crlf" lrecl=100 termstr=crlf;
|
||||||
|
input;
|
||||||
|
list;
|
||||||
|
run;
|
||||||
|
|
||||||
|
%put test1=%mf_nobs(test1);
|
||||||
|
|
||||||
|
%mp_assert(
|
||||||
|
iftrue=(%mf_nobs(work.test1)=5),
|
||||||
|
desc=Checking blank rows on CR formatted file,
|
||||||
|
outds=work.test_results
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user