diff --git a/base/mp_assertdsobs.sas b/base/mp_assertdsobs.sas new file mode 100644 index 0000000..7726a5a --- /dev/null +++ b/base/mp_assertdsobs.sas @@ -0,0 +1,67 @@ +/** + @file + @brief Asserts the number of observations in a dataset + @details Useful in the context of writing sasjs tests. The results of the + test are _appended_ to the &outds. table. + + Example usage: + + %mp_assertdsobs(sashelp.class) %* tests if any observations are present; + +

SAS Macros

+ @li mf_nobs.sas + + + @param [in] inds input dataset to test for presence of observations + @param [in] desc= (Testing observations) The user provided test description + @param [in] test= (HASOBS) The test to apply. Valid values are: + @li HASOBS Test is a PASS if the input dataset has any observations + @li EMPTY Test is a PASS if input dataset is empty + @param [out] outds= (work.test_results) The output dataset to contain the + results. If it does not exist, it will be created, with the following format: + |TEST_DESCRIPTION:$256|TEST_RESULT:$4|TEST_COMMENTS:$256| + |---|---|---| + |User Provided description|PASS|Dataset &inds has XX obs| + + + @version 9.2 + @author Allan Bowe + +**/ + +%macro mp_assertdsobs(inds, + test=HASOBS, + desc=Testing observations, + outds=work.test_results +)/*/STORE SOURCE*/; + + %local nobs; + %let nobs=%mf_nobs(&inds); + %let test=%upcase(&test); + + data; + length test_description $256 test_result $4 test_comments $256; + test_description=symget('desc'); + test_result='FAIL'; + test_comments="&sysmacroname: Dataset &inds has &nobs observations"; + %if &test=HASOBS %then %do; + if &nobs>0 then test_result='PASS'; + %end; + %else %if &test=EMPTY %then %do; + if &nobs=0 then test_result='PASS'; + %end; + %else %do; + test_comments="&sysmacroname: Unsatisfied test condition - &test"; + %end; + run; + + %local ds; + %let ds=&syslast; + + proc append base=&outds data=&ds; + run; + + proc sql; + drop table &ds; + +%mend; \ No newline at end of file diff --git a/base/mp_filtercheck.sas b/base/mp_filtercheck.sas new file mode 100644 index 0000000..49ab1dc --- /dev/null +++ b/base/mp_filtercheck.sas @@ -0,0 +1,143 @@ +/** + @file + @brief Checks an input filter table for validity + @details Performs checks on the input table to ensure it arrives in the + correct format. This is necessary to prevent code injection. Will update + SYSCC to 1008 if bad records are found. + + Used for dynamic filtering in [Data Controller for SAS®](https://datacontroller.io). + + Usage: + + %mp_filtercheck(work.filter,targetds=sashelp.class,outds=work.badrecords) + + The input table should have the following format: + + |GROUP_LOGIC:$3|SUBGROUP_LOGIC:$3|SUBGROUP_ID:8.|VARIABLE_NM:$32|OPERATOR_NM:$10|RAW_VALUE:$32767| + |---|---|---|---|---|---| + |AND|AND|1|AGE|=|12| + |AND|AND|1|SEX|<=|'M'| + |AND|OR|2|Name|NOT IN|('Jane','Alfred')| + |AND|OR|2|Weight|>=|7| + + Rules applied: + + @li GROUP_LOGIC - only AND/OR + @li SUBGROUP_LOGIC - only AND/OR + @li SUBGROUP_ID - only integers + @li VARIABLE_NM - must be in the target table + @li OPERATOR_NM - only =/>/=/BETWEEN/IN/NOT IN/NOT EQUAL/CONTAINS + @li RAW_VALUE - no unquoted values except integers, commas and spaces. + + @returns The &outds table containing any bad rows, plus a REASON_CD column. + + @param [in] inds The table to be checked, with the format above + @param [in] targetds= The target dataset against which to verify VARIABLE_NM + @param [out] outds= The output table, which is a copy of the &inds. table + plus a REASON_CD column, containing only bad records. If bad records found, + the SYSCC value will be set to 1008 (general data problem). Downstream + processes should check this table (and return code) before continuing. + +

SAS Macros

+ @li mp_abort.sas + @li mf_getvarlist.sas + @li mf_nobs.sas + +

Related Macros

+ @li mp_filtergenerate.sas + + @version 9.3 + @author Allan Bowe + + @todo Support date / hex / name literals and exponents in RAW_VALUE field +**/ + +%macro mp_filtercheck(inds,targetds=,outds=work.badrecords); + +%mp_abort(iftrue= (&syscc ne 0) + ,mac=&sysmacroname + ,msg=%str(syscc=&syscc - on macro entry) +) + +/** + * Sanitise the values based on valid value lists, then strip out + * quotes, commas, periods and spaces. + * Only numeric values should remain + */ + +data &outds; + set &inds; + length reason_cd $32; + + /* closed list checks */ + if GROUP_LOGIC not in ('AND','OR') then do; + REASON_CD='GROUP_LOGIC should be either AND or OR'; + putlog REASON_CD= GROUP_LOGIC=; + output; + end; + if SUBGROUP_LOGIC not in ('AND','OR') then do; + REASON_CD='SUBGROUP_LOGIC should be either AND or OR'; + putlog REASON_CD= SUBGROUP_LOGIC=; + output; + end; + if mod(SUBGROUP_ID,1) ne 0 then do; + REASON_CD='SUBGROUP_ID should be integer'; + putlog REASON_CD= SUBGROUP_ID=; + output; + end; + if upcase(VARIABLE_NM) not in + (%upcase(%mf_getvarlist(&targetds,dlm=%str(,),quote=SINGLE))) + then do; + REASON_CD="VARIABLE_NM not in &targetds"; + putlog REASON_CD= VARIABLE_NM=; + output; + end; + if OPERATOR_NM not in + ('=','>','<','<=','>=','BETWEEN','IN','NOT IN','NOT EQUAL','CONTAINS') + then do; + REASON_CD='Invalid OPERATOR_NM'; + putlog REASON_CD= OPERATOR_NM=; + output; + end; + + /* special logic */ + if OPERATOR_NM='BETWEEN' then raw_value1=tranwrd(raw_value,' AND ',''); + else if OPERATOR_NM in ('IN','NOT IN') then do; + if substr(raw_value,1,1) ne '(' + or substr(cats(reverse(raw_value)),1,1) ne ')' + then do; + REASON_CD='Missing brackets in RAW_VALUE'; + putlog REASON_CD= OPERATOR_NM= raw_value= raw_value1= ; + output; + end; + else raw_value1=substr(raw_value,2,max(length(raw_value)-2,0)); + end; + else raw_value1=raw_value; + + /* remove nested literals eg '' */ + raw_value1=tranwrd(raw_value1,"''",''); + + /* now match string literals (always single quotes) */ + raw_value2=raw_value1; + regex = prxparse("s/(\').*?(\')//"); + call prxchange(regex,-1,raw_value2); + + /* remove commas */ + raw_value3=compress(raw_value2,','); + + + + + /* output records that contain values other than digits and spaces */ + if notdigit(compress(raw_value3,' '))>0 then do; + putlog raw_value3= $hex32.; + REASON_CD='Invalid RAW_VALUE'; + putlog REASON_CD= raw_value= raw_value1= raw_value2= raw_value3=; + output; + end; + +run; + +%if %mf_nobs(&outds)>0 %then %let syscc=1008; + +%mend; diff --git a/base/mp_filtergenerate.sas b/base/mp_filtergenerate.sas new file mode 100644 index 0000000..ac7c202 --- /dev/null +++ b/base/mp_filtergenerate.sas @@ -0,0 +1,90 @@ +/** + @file + @brief Generates a filter clause from an input table, to a fileref + @details Uses the input table to generate an output filter clause. + This feature is used to create dynamic dropdowns in [Data Controller for SAS®]( + https://datacontroller.io). The input table should be in the format below: + + |GROUP_LOGIC:$3|SUBGROUP_LOGIC:$3|SUBGROUP_ID:8.|VARIABLE_NM:$32|OPERATOR_NM:$10|RAW_VALUE:$32767| + |---|---|---|---|---|---| + |AND|AND|1|AGE|=|12| + |AND|AND|1|SEX|<=|'M'| + |AND|OR|2|Name|NOT IN|('Jane','Alfred')| + |AND|OR|2|Weight|>=|7| + + Note - if the above table is received from an external client, the values + should first be validated using the mp_filtercheck.sas macro to avoid risk + of SQL injection. + + To generate the filter, run the following code: + + data work.filtertable; + infile datalines4 dsd; + input GROUP_LOGIC:$3. SUBGROUP_LOGIC:$3. SUBGROUP_ID:8. VARIABLE_NM:$32. + OPERATOR_NM:$10. RAW_VALUE:$32767.; + datalines4; + AND,AND,1,AGE,=,12 + AND,AND,1,SEX,<=,"'M'" + AND,OR,2,Name,NOT IN,"('Jane','Alfred')" + AND,OR,2,Weight,>=,7 + ;;;; + run; + + %mp_filtergenerate(work.filtertable,outref=myfilter) + + data _null_; + infile myfilter; + input; + put _infile_; + run; + + Will write the following query to the log: + + > ( + > AGE = 12 + > AND + > SEX <= 'M' + > ) AND ( + > Name NOT IN ('Jane','Alfred') + > OR + > Weight >= 7 + > ) + + @param [in] inds The input table with query values + @param [out] outref= The output fileref to contain the filter clause. Will + be created (or replaced). + +

Related Macros

+ @li mp_filtercheck.sas + +

SAS Macros

+ @li mp_abort.sas + + @version 9.3 + @author Allan Bowe + +**/ + +%macro mp_filtergenerate(inds,outref=filter); + +%mp_abort(iftrue= (&syscc ne 0) + ,mac=&sysmacroname + ,msg=%str(syscc=&syscc - on macro entry) +) + +filename &outref temp; + +data _null_; + file &outref lrecl=32800; + set &inds end=last; + by SUBGROUP_ID; + if _n_=1 then put '('; + else if first.SUBGROUP_ID then put +1 GROUP_LOGIC '('; + else put +2 SUBGROUP_LOGIC; + + put +4 VARIABLE_NM OPERATOR_NM RAW_VALUE; + + if last.SUBGROUP_ID then put ')'@; +run; + +%mend; diff --git a/sasjs/sasjsconfig.json b/sasjs/sasjsconfig.json index 192034e..2e10dd2 100644 --- a/sasjs/sasjsconfig.json +++ b/sasjs/sasjsconfig.json @@ -1,6 +1,12 @@ { "$schema": "https://cli.sasjs.io/sasjsconfig-schema.json", - "macroFolders": ["base", "meta", "metax", "viya", "lua"], + "macroFolders": [ + "base", + "meta", + "metax", + "viya", + "lua" + ], "docConfig": { "displayMacroCore": false, "enableLineage": false, @@ -21,14 +27,18 @@ "serverType": "SASVIYA", "appLoc": "/Public/temp/macrocore", "serviceConfig": { - "serviceFolders": ["tests/viya"], + "serviceFolders": [ + "tests/base", + "tests/viya" + ], "macroVars": { "mcTestAppLoc": "/Public/temp/macrocore" } }, "deployConfig": { "deployServicePack": true - } + }, + "contextName": "SAS Job Execution compute context" } ] -} +} \ No newline at end of file diff --git a/tests/base/mp_filtercheck.test.sas b/tests/base/mp_filtercheck.test.sas new file mode 100644 index 0000000..6c957ff --- /dev/null +++ b/tests/base/mp_filtercheck.test.sas @@ -0,0 +1,128 @@ +/** + @file + @brief Testing mp_filtercheck macro + +

SAS Macros

+ @li mp_filtercheck.sas + @li mp_assertdsobs.sas + +**/ + + +/* valid filter */ +data work.inds; + infile datalines4 dsd; + input GROUP_LOGIC:$3. SUBGROUP_LOGIC:$3. SUBGROUP_ID:8. VARIABLE_NM:$32. + OPERATOR_NM:$10. RAW_VALUE:$32767.; +datalines4; +AND,AND,1,AGE,=,12 +AND,AND,1,SEX,<=,"'M'" +AND,OR,2,Name,NOT IN,"('Jane','Alfred')" +AND,OR,2,Weight,>=,7 +;;;; +run; + +%mp_filtercheck(work.inds, + targetds=sashelp.class, + outds=work.badrecords +) +%let syscc=0; +%mp_assertdsobs(work.badrecords, + desc=Valid filter query, + test=EMPTY, + outds=work.test_results +) + +/* invalid column */ +data work.inds; + infile datalines4 dsd; + input GROUP_LOGIC:$3. SUBGROUP_LOGIC:$3. SUBGROUP_ID:8. VARIABLE_NM:$32. + OPERATOR_NM:$10. RAW_VALUE:$32767.; +datalines4; +AND,AND,1,invalid,=,12 +AND,AND,1,SEX,<=,"'M'" +AND,OR,2,Name,NOT IN,"('Jane','Alfred')" +AND,OR,2,Weight,>=,7 +;;;; +run; + +%mp_filtercheck(work.inds, + targetds=sashelp.class, + outds=work.badrecords +) +%let syscc=0; +%mp_assertdsobs(work.badrecords, + desc=Invalid column name, + test=HASOBS, + outds=work.test_results +) + +/* invalid raw value */ +data work.inds; + infile datalines4 dsd; + input GROUP_LOGIC:$3. SUBGROUP_LOGIC:$3. SUBGROUP_ID:8. VARIABLE_NM:$32. + OPERATOR_NM:$10. RAW_VALUE:$32767.; +datalines4; +AND,OR,2,Name,NOT IN,"(''''Jane','Alfred')" +;;;; +run; + +%mp_filtercheck(work.inds, + targetds=sashelp.class, + outds=work.badrecords +) +%let syscc=0; +%mp_assertdsobs(work.badrecords, + desc=Invalid raw value, + test=HASOBS, + outds=work.test_results +) + +/* Code injection - column name */ +data work.inds; + infile datalines4 dsd; + input GROUP_LOGIC:$3. SUBGROUP_LOGIC:$3. SUBGROUP_ID:8. VARIABLE_NM:$32. + OPERATOR_NM:$10. RAW_VALUE:$32767.; +datalines4; +AND,AND,1,%abort,=,12 +AND,OR,2,Weight,>=,7 +;;;; +run; + +%mp_filtercheck(work.inds, + targetds=sashelp.class, + outds=work.badrecords +) +%let syscc=0; +%mp_assertdsobs(work.badrecords, + desc=Code injection - column name, + test=HASOBS, + outds=work.test_results +) + +/* Code injection - raw values*/ +data work.inds; + infile datalines4 dsd; + input GROUP_LOGIC:$3. SUBGROUP_LOGIC:$3. SUBGROUP_ID:8. VARIABLE_NM:$32. + OPERATOR_NM:$10. RAW_VALUE:$32767.; +datalines4; +AND,AND,1,age,=,;;%abort +;;;; +run; + +%mp_filtercheck(work.inds, + targetds=sashelp.class, + outds=work.badrecords +) +%let syscc=0; +%mp_assertdsobs(work.badrecords, + desc=Code injection - raw value abort, + test=HASOBS, + outds=work.test_results +) + + + +%webout(OPEN) +%webout(OBJ, TEST_RESULTS) +%webout(CLOSE) \ No newline at end of file