From 514ae5ad3f79f12770bb580fac65c3d289bb4357 Mon Sep 17 00:00:00 2001 From: Akond Rahman Date: Fri, 25 Jun 2021 19:08:38 -0500 Subject: [PATCH] bug fix and test case --- ANSWERS-RQ2.py | 31 +++++++- ANSWERS-RQ3.py | 38 ++++++++-- .../TestArtifacts/http.scheme.afd.nagios.pp | 69 ++++++++++++++++++ .../TestArtifacts/http.scheme.gse.nagios.pp | 71 +++++++++++++++++++ TaintPupCode/_TestTaintTracking.py | 14 ++++ TaintPupCode/_test_constants.py | 4 ++ TaintPupCode/constants.py | 2 +- TaintPupCode/orchestra.py | 14 ++-- 8 files changed, 231 insertions(+), 12 deletions(-) create mode 100644 TaintPupCode/TestArtifacts/http.scheme.afd.nagios.pp create mode 100644 TaintPupCode/TestArtifacts/http.scheme.gse.nagios.pp diff --git a/ANSWERS-RQ2.py b/ANSWERS-RQ2.py index 302210c..231ff6d 100644 --- a/ANSWERS-RQ2.py +++ b/ANSWERS-RQ2.py @@ -66,6 +66,31 @@ def getValidTaints( file_ ): print('*'*50) + +def getDirectAttribs( file_ ): + print('*'*50) + print('D-I-R-E-C-T--A-T-T-R-I-B-S') + print('*'*50) + print( file_ ) + print('*'*50) + overall_direct_attrs = 0 + df_ = pd.read_csv( file_ ) + df_['DIRECT_ATTRIB_CNT'] = df_['TOTAL_AFFECTED_ATTRI'] - df_['AFFECTED_ATTRIB_BY_VARS'] + # print(df_.head()) + smells = np.unique( df_['SMELL_TYPE'].tolist() ) + for smell_ in smells: + smell_df_ = df_[df_['SMELL_TYPE'] == smell_] + per_smell_direct_attribs = sum( smell_df_['DIRECT_ATTRIB_CNT'].tolist() ) + overall_direct_attrs = overall_direct_attrs + per_smell_direct_attribs + print('*'*50) + print('SMELL:{}, TOTAL:{}'.format( smell_, per_smell_direct_attribs ) ) + print('*'*50) + print('ALL, TOTAL:{}'.format( overall_direct_attrs ) ) + print('*'*50) + + + + if __name__ == '__main__': # ORG_ = 'WIKI' # ORG_ = 'OSTK' @@ -76,5 +101,7 @@ def getValidTaints( file_ ): notused_file_name = '/Users/arahman/Documents/OneDriveWingUp/OneDrive-TennesseeTechUniversity/Research/IaC/FixFalsePositive/output/NOTUSED_' + ORG_ + '.csv' hopcnt_file_name = '/Users/arahman/Documents/OneDriveWingUp/OneDrive-TennesseeTechUniversity/Research/IaC/FixFalsePositive/output/HOPCOUNT_' + ORG_ + '.csv' - getHopCount( hopcnt_file_name ) - getValidTaints( notused_file_name ) \ No newline at end of file + # getHopCount( hopcnt_file_name ) + # getValidTaints( notused_file_name ) + + getDirectAttribs( notused_file_name ) \ No newline at end of file diff --git a/ANSWERS-RQ3.py b/ANSWERS-RQ3.py index 03b9f45..34b42c1 100644 --- a/ANSWERS-RQ3.py +++ b/ANSWERS-RQ3.py @@ -132,11 +132,33 @@ def getSemanticFreq( org_, file_ ): perc_pkg = round( float(tot_pkg) / float(tot_title_cnt) *100 , 3) print('='*100) + print('Total affected resources:', tot_title_cnt ) + print('='*100) print('USER:{}, DATA_STORAGE:{}, FILE:{}, WEB:{}, NET:{}, PKG:{}'.format( perc_usr, perc_dat, perc_fil, perc_web, perc_net, perc_pkg ) ) print('='*100) +def getLOCOnly( out_fil ): + print('-'*100) + print(out_fil) + print('-'*100) + out_df = pd.read_csv( out_fil ) + all_pp_scripts = np.unique( out_df['SCRIPT'].tolist() ) + print('Count:', len(all_pp_scripts) ) + print('-'*100) + tot_loc = 0 + for pp in all_pp_scripts: + try: + pp_loc = sum(1 for line in open( pp , 'r', encoding= 'latin-1' )) + except UnicodeDecodeError: + pp_loc = 10 + tot_loc = tot_loc + pp_loc + print('Size:', tot_loc ) + print('-'*100) + + + if __name__=='__main__': # org_name = 'GITHUB' @@ -145,11 +167,17 @@ def getSemanticFreq( org_, file_ ): # org_name = 'OSTK' # org_name = 'WIKI' - reso_file_name = '/Users/arahman/Documents/OneDriveWingUp/OneDrive-TennesseeTechUniversity/Research/IaC/FixFalsePositive/output/RESOURCE_' + org_name + '.csv' - attr_file_name = '/Users/arahman/Documents/OneDriveWingUp/OneDrive-TennesseeTechUniversity/Research/IaC/FixFalsePositive/output/NOTUSED_' + org_name + '.csv' + # reso_file_name = '/Users/arahman/Documents/OneDriveWingUp/OneDrive-TennesseeTechUniversity/Research/IaC/FixFalsePositive/output/RESOURCE_' + org_name + '.csv' + # attr_file_name = '/Users/arahman/Documents/OneDriveWingUp/OneDrive-TennesseeTechUniversity/Research/IaC/FixFalsePositive/output/NOTUSED_' + org_name + '.csv' + # output_file = '/Users/arahman/Documents/OneDriveWingUp/OneDrive-TennesseeTechUniversity/Research/IaC/FixFalsePositive/output/V9_WIKI.EVALUATION.csv' + # getLOCOnly( output_file ) ## for summary stats + # attribCountPerScript( attr_file_name ) - # resoSemantics( reso_file_name , attr_file_name ) - # resoCountPerScript( reso_file_name , attr_file_name ) + # resoSemantics( reso_file_name , attr_file_name ) ## for RQ3, part 2.a + # resoCountPerScript( reso_file_name , attr_file_name ) ## for RQ3, part 1 + # getSemanticFreq(org_name, reso_file_name ) ## for RQ3, part 2.b + + + - getSemanticFreq(org_name, reso_file_name ) diff --git a/TaintPupCode/TestArtifacts/http.scheme.afd.nagios.pp b/TaintPupCode/TestArtifacts/http.scheme.afd.nagios.pp new file mode 100644 index 0000000..7153222 --- /dev/null +++ b/TaintPupCode/TestArtifacts/http.scheme.afd.nagios.pp @@ -0,0 +1,69 @@ +# Copyright 2015 Mirantis, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +define lma_collector::afd_nagios( + $server, + $http_port, + $http_path, + $user, + $password, + $http_scheme = 'http', + $ensure = present, + $hostname = $::hostname, + $service_template = '%{node_role}.%{source}', + $message_type = 'afd_node_metric', +){ + include lma_collector::params + include lma_collector::service::metric + + validate_integer($http_port) + + $lua_modules_dir = $lma_collector::params::lua_modules_dir + $url = "${http_scheme}://${server}:${http_port}/${http_path}" + + $config = {'nagios_host' => $hostname, 'service_template' => $service_template} + heka::encoder::sandbox { "nagios_afd_${title}": + ensure => $ensure, + config_dir => $lma_collector::params::metric_config_dir, + filename => "${lma_collector::params::plugins_dir}/encoders/status_nagios.lua", + config => $config, + module_directory => $lua_modules_dir, + notify => Class['lma_collector::service::metric'], + } + + $matcher = join(["Fields[${lma_collector::params::aggregator_flag}] == NIL", + "Type == 'heka.sandbox.${message_type}'", + 'Fields[no_alerting] == NIL', + 'Fields[hostname] != NIL'], ' && ') + + heka::output::http { "nagios_afd_${title}": + ensure => $ensure, + config_dir => $lma_collector::params::metric_config_dir, + url => $url, + message_matcher => $matcher, + username => $user, + password => $password, + encoder => "nagios_afd_${title}", + timeout => $lma_collector::params::nagios_timeout, + headers => { + 'Content-Type' => 'application/x-www-form-urlencoded' + }, + use_buffering => $lma_collector::params::buffering_enabled, + max_buffer_size => $lma_collector::params::buffering_max_buffer_size_for_nagios, + max_file_size => $lma_collector::params::buffering_max_file_size_for_nagios, + queue_full_action => $lma_collector::params::queue_full_action_for_nagios, + require => Heka::Encoder::Sandbox["nagios_afd_${title}"], + notify => Class['lma_collector::service::metric'], + } +} diff --git a/TaintPupCode/TestArtifacts/http.scheme.gse.nagios.pp b/TaintPupCode/TestArtifacts/http.scheme.gse.nagios.pp new file mode 100644 index 0000000..ab5782f --- /dev/null +++ b/TaintPupCode/TestArtifacts/http.scheme.gse.nagios.pp @@ -0,0 +1,71 @@ +# Copyright 2015 Mirantis, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +define lma_collector::gse_nagios ( + $server, + $http_port, + $http_path, + $user, + $password, + $message_type, + $virtual_hostname, + $ensure = present, + $http_scheme = 'http', + $openstack_deployment_name = '', + $service_template = '%{cluster_name}', +) { + include lma_collector::params + include lma_collector::service::metric + + validate_integer($http_port) + + $lua_modules_dir = $lma_collector::params::lua_modules_dir + $url = "${http_scheme}://${server}:${http_port}/${http_path}" + + # This must be identical logic than in lma-infra-alerting-plugin + $_nagios_host = "${virtual_hostname}-env${openstack_deployment_name}" + + $config = { + 'nagios_host' => $_nagios_host, + 'service_template' => "${title}-${service_template}", + } + heka::encoder::sandbox { "nagios_gse_${title}": + ensure => $ensure, + config_dir => $lma_collector::params::metric_config_dir, + filename => "${lma_collector::params::plugins_dir}/encoders/status_nagios.lua", + config => $config, + module_directory => $lua_modules_dir, + notify => Class['lma_collector::service::metric'], + } + + heka::output::http { "nagios_gse_${title}": + ensure => $ensure, + config_dir => $lma_collector::params::metric_config_dir, + url => $url, + message_matcher => "Type == 'heka.sandbox.${message_type}' && Fields[no_alerting] == NIL", + username => $user, + password => $password, + encoder => "nagios_gse_${title}", + timeout => $lma_collector::params::nagios_timeout, + headers => { + 'Content-Type' => 'application/x-www-form-urlencoded' + }, + use_buffering => $lma_collector::params::buffering_enabled, + max_buffer_size => $lma_collector::params::buffering_max_buffer_size_for_nagios, + max_file_size => $lma_collector::params::buffering_max_file_size_for_nagios, + queue_full_action => $lma_collector::params::queue_full_action_for_nagios, + require => Heka::Encoder::Sandbox["nagios_gse_${title}"], + notify => Class['lma_collector::service::metric'], + } +} diff --git a/TaintPupCode/_TestTaintTracking.py b/TaintPupCode/_TestTaintTracking.py index 318f7c3..09ee06a 100644 --- a/TaintPupCode/_TestTaintTracking.py +++ b/TaintPupCode/_TestTaintTracking.py @@ -66,6 +66,20 @@ def testTaintedHTTP_V3(self): self.assertEqual( 9 , len(http_taint_dict['$cinder_protocol']) , _test_constants._tainted_http_msg_v3) graph.var_tracker_list.clear() + def testTaintedHTTP_V4(self): + _, _, dict_all_attr, dict_all_vari, _, _, _ = parser.executeParser( _test_constants._taintedHttp_script_v4 ) + _, http_dict_vars = orchestra.finalizeHTTP( dict_all_attr, dict_all_vari ) + http_taint_dict = graph.trackTaint( _test_constants.OUTPUT_HTTP_KW, http_dict_vars, dict_all_attr, dict_all_vari ) + self.assertEqual( 1 , len(http_taint_dict['http_scheme']) , _test_constants._tainted_http_msg_v4 ) + graph.var_tracker_list.clear() + + def testTaintedHTTP_V5(self): + _, _, dict_all_attr, dict_all_vari, _, _, _ = parser.executeParser( _test_constants._taintedHttp_script_v5 ) + _, http_dict_vars = orchestra.finalizeHTTP( dict_all_attr, dict_all_vari ) + http_taint_dict = graph.trackTaint( _test_constants.OUTPUT_HTTP_KW, http_dict_vars, dict_all_attr, dict_all_vari ) + self.assertEqual( 1 , len(http_taint_dict['http_scheme']) , _test_constants._tainted_http_msg_v5 ) + graph.var_tracker_list.clear() + def testTaintedHopCountV1(self): _, _, dict_all_attr, dict_all_vari, _, _, _ = parser.executeParser( _test_constants._multi_taint_script_name ) _, http_dict_vars = orchestra.finalizeHTTP( dict_all_attr, dict_all_vari ) diff --git a/TaintPupCode/_test_constants.py b/TaintPupCode/_test_constants.py index 11b8e57..7908305 100644 --- a/TaintPupCode/_test_constants.py +++ b/TaintPupCode/_test_constants.py @@ -88,6 +88,10 @@ _tainted_http_msg_v2 = common_error_string + '5' _taintedHttp_script_v3 = 'TestArtifacts/packstack.keystone.cinder.pp' _tainted_http_msg_v3 = common_error_string + '9' +_taintedHttp_script_v4 = 'TestArtifacts/http.scheme.gse.nagios.pp' +_tainted_http_msg_v4 = common_error_string + '1' +_taintedHttp_script_v5 = 'TestArtifacts/http.scheme.afd.nagios.pp' +_tainted_http_msg_v5 = common_error_string + '1' OUTPUT_HTTP_KW = 'INSECURE_HTTP' diff --git a/TaintPupCode/constants.py b/TaintPupCode/constants.py index 9ec7ca7..0543b1b 100644 --- a/TaintPupCode/constants.py +++ b/TaintPupCode/constants.py @@ -58,7 +58,7 @@ LOCALHOST_KEYWORD = 'localhost' LOCAL_IP_KEYWORD = '//1' XTRA_HTTP_PATTERN = 'http' -XTRA_HTTP_PROTO_KW = '_protocol' +XTRA_HTTP_VAR_KWS = ['_protocol', 'http_scheme'] EXAMPLE_DOMAIN_KEYWORD = '.example.com' YUM_KW = 'yum' INVALID_ATTRIBUTE_KEYWORDS = ['block', 'resource' , '(', ')'] diff --git a/TaintPupCode/orchestra.py b/TaintPupCode/orchestra.py index 7f040f0..d24a250 100644 --- a/TaintPupCode/orchestra.py +++ b/TaintPupCode/orchestra.py @@ -124,11 +124,14 @@ def finalizeHTTP(attr_dict, dict_vars): var_count += 1 var_value = var_data[-1] var_ascii = sanitizeConfigVals( var_value ) + # print( var_name, var_data, var_ascii) if (var_ascii >= 600) and ( constants.HTTP_PATTERN in var_value) and (extraHTTPCheck( var_value) ): # 600 is the total of 'http://' output_variable_dict[var_count] = (var_name, var_value, var_ascii) - elif constants.XTRA_HTTP_PROTO_KW in var_name and (var_ascii == 448 or var_ascii == 526) : ### need to handle $magnum_protocol = 'http', ascii for 'http' is 448 - output_variable_dict[var_count] = ( var_name, var_value, var_ascii) + elif ( any( z in var_name for z in constants.XTRA_HTTP_VAR_KWS) ) and (var_ascii == 448 or var_ascii == 526) : ### need to handle $magnum_protocol = 'http', ascii for 'http' is 448 + output_variable_dict[var_count] = ( var_name, var_value, var_ascii) + + # print( output_variable_dict ) return output_attrib_dict, output_variable_dict # dict will help in taint tracking def finalizeWeakEncrypt(func_dict): @@ -515,7 +518,7 @@ def doFullTaintForSingleScript( pupp_file ): default_admin_tuple = ( default_taint_dict, default_admin_dict ) weak_cryp_tuple = ( weak_cry_dic_taint, weak_crypt_dic ) - # print( secret_dict_attr, secret_taint_dict ) + # print( http_dict_vars, http_taint_dict ) return ( susp_cnt, switch_cnt, ip_tuple, http_tuple, secret_tuple, empty_pass_tuple, default_admin_tuple, weak_cryp_tuple, dict_reso ) @@ -551,7 +554,10 @@ def orchestrateWithTaint(dir_): if __name__=='__main__': - doFullTaintForSingleScript( '/Users/arahman/TAINTPUP_REPOS/GITLAB/simp@puppetlabs-mysql/manifests/backup/mysqldump.pp' ) + # doFullTaintForSingleScript( '/Users/arahman/TAINTPUP_REPOS/GITLAB/simp@puppetlabs-mysql/manifests/backup/mysqldump.pp' ) + # res_tup = doFullTaintForSingleScript( '/Users/arahman/TAINTPUP_REPOS/OPENSTACK/fuel-plugin-lma-collector-2018-06/deployment_scripts/puppet/modules/lma_collector/manifests/gse_nagios.pp' ) + # res_tup = doFullTaintForSingleScript( '/Users/arahman/TAINTPUP_REPOS/OPENSTACK/fuel-plugin-lma-collector-2018-06/deployment_scripts/puppet/modules/lma_collector/manifests/afd_nagios.pp' ) + print('='*50)