diff --git a/all.sas b/all.sas index d21d5e1..c2a2389 100644 --- a/all.sas +++ b/all.sas @@ -3775,7 +3775,7 @@ run; %mend mp_chop; /** - @file mp_cleancsv.sas + @file @brief Fixes embedded cr / lf / crlf in CSV @details CSVs will sometimes contain lf or crlf within quotes (eg when saved by excel). When the termstr is ALSO lf or crlf that can be tricky @@ -3783,14 +3783,16 @@ run; This macro converts any csv to follow the convention of a windows excel file, applying CRLF line endings and converting embedded cr and crlf to lf. - usage: + Usage: fileref mycsv "/path/your/csv"; %mp_cleancsv(in=mycsv,out=/path/new.csv) - @param in= provide path or fileref to input csv - @param out= output path or fileref to output csv - @param qchar= quote char - hex code 22 is the double quote. + @param in= (NOTPROVIDED) Provide path or fileref to input csv. If a period is + found, it is assumed to be a file. + @param out= (NOTPROVIDED) Output path or fileref to output csv. If a period + is found, it is assumed to be a file. + @param qchar= ('22'x) Quote char - hex code 22 is the double quote. @version 9.2 @author Allan Bowe @@ -3832,9 +3834,14 @@ run; else do; /* outside a quote, change cr and lf to crlf */ if inchar='0D'x then do; + crblank: put '0D0A'x; input inchar $char1.; - if inchar ne '0A'x then do; + if inchar='0D'x then do; + /* multiple CR indicates CR formatted file with blank lines */ + goto crblank; + end; + else if inchar ne '0A'x then do; put inchar $char1.; if inchar=qchar then isq = mod(isq+1,2); end; diff --git a/base/mp_cleancsv.sas b/base/mp_cleancsv.sas index 25b44e2..6209851 100644 --- a/base/mp_cleancsv.sas +++ b/base/mp_cleancsv.sas @@ -1,5 +1,5 @@ /** - @file mp_cleancsv.sas + @file @brief Fixes embedded cr / lf / crlf in CSV @details CSVs will sometimes contain lf or crlf within quotes (eg when saved by excel). When the termstr is ALSO lf or crlf that can be tricky @@ -7,14 +7,16 @@ This macro converts any csv to follow the convention of a windows excel file, applying CRLF line endings and converting embedded cr and crlf to lf. - usage: + Usage: fileref mycsv "/path/your/csv"; %mp_cleancsv(in=mycsv,out=/path/new.csv) - @param in= provide path or fileref to input csv - @param out= output path or fileref to output csv - @param qchar= quote char - hex code 22 is the double quote. + @param in= (NOTPROVIDED) Provide path or fileref to input csv. If a period is + found, it is assumed to be a file. + @param out= (NOTPROVIDED) Output path or fileref to output csv. If a period + is found, it is assumed to be a file. + @param qchar= ('22'x) Quote char - hex code 22 is the double quote. @version 9.2 @author Allan Bowe @@ -56,9 +58,14 @@ else do; /* outside a quote, change cr and lf to crlf */ if inchar='0D'x then do; + crblank: put '0D0A'x; input inchar $char1.; - if inchar ne '0A'x then do; + if inchar='0D'x then do; + /* multiple CR indicates CR formatted file with blank lines */ + goto crblank; + end; + else if inchar ne '0A'x then do; put inchar $char1.; if inchar=qchar then isq = mod(isq+1,2); end; diff --git a/tests/base/mp_cleancsv.test.sas b/tests/base/mp_cleancsv.test.sas new file mode 100644 index 0000000..dc9c2d0 --- /dev/null +++ b/tests/base/mp_cleancsv.test.sas @@ -0,0 +1,43 @@ +/** + @file + @brief Testing mp_cleancsv.sas macro + @details Credit for test 1 goes to + [Tom](https://communities.sas.com/t5/user/viewprofilepage/user-id/159) from + SAS Communities: +https://communities.sas.com/t5/SAS-Programming/Removing-embedded-carriage-returns/m-p/824790#M325761 + +

SAS Macros

+ @li mf_nobs.sas + @li mp_cleancsv.sas + @li mp_assert.sas + @li mp_assertscope.sas + +**/ + +/* test 1 - cope with empty rows on CR formatted file */ + +filename crlf "%sysfunc(pathname(work))/crlf"; +filename cr "%sysfunc(pathname(work))/cr"; +data _null_; + file cr termstr=cr ; + put 'line 1'///'line 4'/'line 5'; +run; + +%mp_assertscope(SNAPSHOT) +%mp_cleancsv(in=cr,out=crlf) +%mp_assertscope(COMPARE) + +/* 5 rows as all converted to OD0A */ +data test1; + infile "%sysfunc(pathname(work))/crlf" lrecl=100 termstr=crlf; + input; + list; +run; + +%put test1=%mf_nobs(test1); + +%mp_assert( + iftrue=(%mf_nobs(work.test1)=5), + desc=Checking blank rows on CR formatted file, + outds=work.test_results +)