From 2b265f168868631c1cd34654e1924d660703b90e Mon Sep 17 00:00:00 2001 From: Alex Pop Date: Thu, 14 May 2020 07:39:05 +0100 Subject: [PATCH 1/2] Added RunTimeLimit option and runner code to strip profiles metadata Signed-off-by: Alex Pop Add unit tests for metadata strip Signed-off-by: Alex Pop --- .../cmd/compliance-service/cmd/run.go | 2 + components/compliance-service/compliance.go | 2 + components/compliance-service/config/types.go | 1 + .../inspec-agent/runner/runner.go | 75 +++++- .../inspec-agent/runner/runner_test.go | 214 ++++++++++++++++++ 5 files changed, 292 insertions(+), 2 deletions(-) diff --git a/components/compliance-service/cmd/compliance-service/cmd/run.go b/components/compliance-service/cmd/compliance-service/cmd/run.go index 381218ea18e..3814cf23372 100644 --- a/components/compliance-service/cmd/compliance-service/cmd/run.go +++ b/components/compliance-service/cmd/compliance-service/cmd/run.go @@ -34,6 +34,7 @@ var conf = config.Compliance{ TmpDir: os.Getenv("TMPDIR"), ResultMessageLimit: 10000, ControlResultsLimit: 50, + RunTimeLimit: 1.0, }, ElasticSearch: config.ElasticSearch{}, ElasticSearchSidecar: config.ElasticSearchSidecar{ @@ -124,6 +125,7 @@ func init() { runCmd.Flags().StringVar(&conf.InspecAgent.RemoteInspecVersion, "remote-inspec-version", conf.InspecAgent.RemoteInspecVersion, "Option to specify the version of inspec to use for remote(e.g. AWS SSM) scan jobs") runCmd.Flags().IntVar(&conf.InspecAgent.ResultMessageLimit, "result-message-limit", conf.InspecAgent.ResultMessageLimit, "A control result message that exceeds this character limit will be truncated") runCmd.Flags().IntVar(&conf.InspecAgent.ControlResultsLimit, "control-results-limit", conf.InspecAgent.ControlResultsLimit, "The array of results per control will be truncated at this limit to avoid large reports that cannot be processed") + runCmd.Flags().Float32Var(&conf.InspecAgent.RunTimeLimit, "run-time-limit", conf.InspecAgent.RunTimeLimit, "Control results that have a `run_time` below this limit will be stripped of the `start_time` and `run_time` fields") // Legacy Automate Headers/User Info runCmd.Flags().StringVar(&conf.Delivery.Enterprise, "delivery-ent", conf.Delivery.Enterprise, "Automate Enterprise") diff --git a/components/compliance-service/compliance.go b/components/compliance-service/compliance.go index 1c1e11519a9..9157c3c518f 100644 --- a/components/compliance-service/compliance.go +++ b/components/compliance-service/compliance.go @@ -122,6 +122,7 @@ func initBits(ctx context.Context, conf *config.Compliance) (db *pgdb.DB, connFa inspec.ResultMessageLimit = conf.InspecAgent.ResultMessageLimit runner.ControlResultsLimit = conf.InspecAgent.ControlResultsLimit + runner.RunTimeLimit = conf.InspecAgent.RunTimeLimit inspec.TmpDir = conf.InspecAgent.TmpDir // Let's have something sensible if the temp dir is not specified @@ -172,6 +173,7 @@ func serveGrpc(ctx context.Context, db *pgdb.DB, connFactory *secureconn.Factory nodeManagerServiceClient := getManagerConnection(connFactory, conf.Manager.Endpoint) ingesticESClient := ingestic.NewESClient(esClient) ingesticESClient.InitializeStore(context.Background()) + runner.ESClient = ingesticESClient s := connFactory.NewServer(tracing.GlobalServerInterceptor()) diff --git a/components/compliance-service/config/types.go b/components/compliance-service/config/types.go index 8f6cf753fd3..27847f4e723 100644 --- a/components/compliance-service/config/types.go +++ b/components/compliance-service/config/types.go @@ -83,6 +83,7 @@ type InspecAgent struct { RemoteInspecVersion string ResultMessageLimit int ControlResultsLimit int + RunTimeLimit float32 } // Postgres specific options diff --git a/components/compliance-service/inspec-agent/runner/runner.go b/components/compliance-service/inspec-agent/runner/runner.go index 8105b9b7763..f022e973a69 100644 --- a/components/compliance-service/inspec-agent/runner/runner.go +++ b/components/compliance-service/inspec-agent/runner/runner.go @@ -23,6 +23,7 @@ import ( ingest_inspec "github.com/chef/automate/api/interservice/compliance/ingest/events/inspec" "github.com/chef/automate/api/interservice/compliance/ingest/ingest" "github.com/chef/automate/api/interservice/compliance/jobs" + "github.com/chef/automate/components/compliance-service/ingest/ingestic" "github.com/chef/automate/components/compliance-service/ingest/ingestic/mappings" "github.com/chef/automate/components/compliance-service/inspec" "github.com/chef/automate/components/compliance-service/inspec-agent/remote" @@ -37,6 +38,12 @@ var ListenPort int = 2133 // ControlResultsLimit used for configuring inspec exec command, passed in via config flag var ControlResultsLimit int = 50 +// RunTimeLimit used for report post processing to reduce the size of the report. +var RunTimeLimit float32 = 1.0 + +// Set from compliance.go to call the ElasticSearch backend +var ESClient *ingestic.ESClient + var ( ScanJobWorkflowName = cereal.NewWorkflowName("scan-job-workflow") @@ -531,9 +538,22 @@ func (t *InspecJobTask) reportIt(ctx context.Context, job *types.InspecJob, cont logrus.Debugf("hand-over report to ingest service") removeResults(reportID, report.Profiles, ControlResultsLimit) - // strip profile meta if missing from the backend - _, err := t.ingestClient.ProcessComplianceReport(ctx, &report) + allReportProfileIds := make([]string, len(report.Profiles)) + for i, profile := range report.Profiles { + allReportProfileIds[i] = profile.Sha256 + } + esProfilesMissingMeta, err := ESClient.ProfilesMissing(allReportProfileIds) + if err != nil { + return errors.Wrap(err, "Report profiles processing error") + } + profilesMissingMeta := make(map[string]struct{}, 0) + for _, profileId := range esProfilesMissingMeta { + profilesMissingMeta[profileId] = struct{}{} + } + stripProfilesMetadata(&report, profilesMissingMeta, RunTimeLimit) + + _, err = t.ingestClient.ProcessComplianceReport(ctx, &report) if err != nil { return errors.Wrap(err, "Report processing error") } @@ -756,6 +776,8 @@ func doExec(job *types.InspecJob) (jsonBytes []byte, err *inspec.Error) { return jsonBytes, nil } +// The purpose of this function is to remove the results from a control once they +// exceed `limit`. This avoids large reports that cannot be ingested. func removeResults(reportId string, profiles []*ingest_inspec.Profile, limit int) { for _, profile := range profiles { if profile.Controls != nil { @@ -788,6 +810,55 @@ func removeResults(reportId string, profiles []*ingest_inspec.Profile, limit int } } +// The purpose of this function is to remove the profiles metadata (control title, desc, code, etc) +// for profiles that already exist in the Automate profiles index. +func stripProfilesMetadata(report *ingest_events_compliance_api.Report, missingProfiles map[string]struct{}, runTimeLimit float32) { + if report.Profiles == nil { + return + } + for _, profile := range report.Profiles { + // If the profile is not in the ones missing in the backend, we can proceed with the metadata removal + if _, ok := missingProfiles[profile.Sha256]; ok { + continue + } + // Profile 'Name' is a required property. By not sending it in the report, we make it clear to the ingestion backend that the profile metadata has been stripped from this profile in the report. + // Profile 'title' and 'version' are still kept for troubleshooting purposes in the backend. + profile.Name = "" + profile.Groups = nil + profile.CopyrightEmail = "" + profile.Copyright = "" + profile.Summary = "" + profile.Supports = nil + profile.License = "" + profile.Maintainer = "" + profile.Depends = nil + if profile.Controls == nil { + continue + } + for _, control := range profile.Controls { + control.Code = "" + control.Desc = "" + // TODO: Add here control.Descriptions when the field makes it into the structs + control.Impact = 0 + control.Refs = nil + control.Tags = nil + control.Title = "" + control.SourceLocation = nil + control.WaiverData = nil + if control.Results == nil { + continue + } + for _, result := range control.Results { + if result.RunTime < runTimeLimit { + result.RunTime = 0 + result.StartTime = "" + } + } + } + report.RunTimeLimit = runTimeLimit + } +} + func potentialTargetConfigs(job *types.InspecJob) []inspec.TargetConfig { // GCP profile requires the project_id to be passed in as an attribute. Used the SubscriptionId to get this value from the database if job.TargetConfig.Backend == "gcp" { diff --git a/components/compliance-service/inspec-agent/runner/runner_test.go b/components/compliance-service/inspec-agent/runner/runner_test.go index 5e7c9b7375e..4cde0ab1adb 100644 --- a/components/compliance-service/inspec-agent/runner/runner_test.go +++ b/components/compliance-service/inspec-agent/runner/runner_test.go @@ -3,9 +3,11 @@ package runner import ( "testing" + ingest_events_compliance_api "github.com/chef/automate/api/interservice/compliance/ingest/events/compliance" ingest_inspec "github.com/chef/automate/api/interservice/compliance/ingest/events/inspec" "github.com/chef/automate/components/compliance-service/inspec" "github.com/chef/automate/components/compliance-service/inspec-agent/types" + "github.com/golang/protobuf/ptypes/struct" "github.com/stretchr/testify/assert" ) @@ -197,3 +199,215 @@ func TestRemoveResults(t *testing.T) { }, profiles) } + +func TestStripProfilesMetadata(t *testing.T) { + report := ingest_events_compliance_api.Report{ + ReportUuid: "test1", + Profiles: []*ingest_inspec.Profile{ + { + Name: "test-profile-1", + Sha256: "test-profile-1-sha256-id", + Version: "4.5.6", + Maintainer: "Testus", + Copyright: "Testus Biz", + CopyrightEmail: "Testus Biz", + Title: "Test Profile 1", + Depends: []*ingest_inspec.Dependency{ + { + Name: "test", + }, + }, + Supports: []*ingest_inspec.Support{ + { + Inspec: "2.3.4", + }, + }, + Controls: []*ingest_inspec.Control{ + { + Id: "p1c1", + Title: "p1c1 title", + Impact: 1, + Code: "some code", + Desc: "some desc", + SourceLocation: &ingest_inspec.SourceLocation{ + Ref: "123", + Line: 456, + }, + Refs: []*structpb.Struct{}, + Tags: &structpb.Struct{}, + Results: []*ingest_inspec.Result{ + { + Message: "Resource is as expected", + Status: "passed", + RunTime: 0.12345, + StartTime: "2020-05-15T18:37:19+01:00", + }, + { + Message: "Resource is as expected", + Status: "passed", + RunTime: 1.12345, + StartTime: "2020-05-15T18:37:20+01:00", + }, + }, + }, + { + Id: "p1c2", + Title: "p1c2 title", + Impact: 1, + Code: "some code", + Desc: "some desc", + Results: []*ingest_inspec.Result{ + { + Message: "Resource is as expected", + Status: "skipped", + RunTime: 0.42345, + StartTime: "2020-05-15T18:37:21+01:00", + }, + }, + }, + }, + }, + { + Name: "test-profile-2", + Sha256: "test-profile-2-sha256-id", + Version: "4.5.6", + Maintainer: "Testus2", + Copyright: "Testus Biz2", + CopyrightEmail: "Testus Biz2", + Title: "Test Profile 2", + Depends: []*ingest_inspec.Dependency{ + { + Name: "test2", + }, + }, + Supports: []*ingest_inspec.Support{ + { + Inspec: "2.3.5", + }, + }, + Controls: []*ingest_inspec.Control{ + { + Id: "p2c1", + Title: "p2c1 title", + Impact: 0.6, + Code: "some code", + Desc: "some desc", + SourceLocation: &ingest_inspec.SourceLocation{ + Ref: "12", + Line: 45, + }, + Refs: []*structpb.Struct{}, + Tags: &structpb.Struct{}, + Results: []*ingest_inspec.Result{ + { + Message: "Resource is as expected", + Status: "passed", + RunTime: 0.12345, + StartTime: "2020-05-15T18:37:19+01:00", + }, + { + Message: "Resource is as expected", + Status: "passed", + RunTime: 1.12345, + StartTime: "2020-05-15T18:37:20+01:00", + }, + }, + }, + }, + }, + }, + } + var missingProfilesMap = map[string]struct{}{ + "test-profile-2-sha256-id": {}, + } + + stripProfilesMetadata(&report, missingProfilesMap, 1.1) + + expectedReport := ingest_events_compliance_api.Report{ + ReportUuid: "test1", + Profiles: []*ingest_inspec.Profile{ + { + Sha256: "test-profile-1-sha256-id", + Version: "4.5.6", + Title: "Test Profile 1", + Controls: []*ingest_inspec.Control{ + { + Id: "p1c1", + Results: []*ingest_inspec.Result{ + { + Message: "Resource is as expected", + Status: "passed", + }, + { + Message: "Resource is as expected", + Status: "passed", + RunTime: 1.12345, + StartTime: "2020-05-15T18:37:20+01:00", + }, + }, + }, + { + Id: "p1c2", + Results: []*ingest_inspec.Result{ + { + Message: "Resource is as expected", + Status: "skipped", + }, + }, + }, + }, + }, + { + Name: "test-profile-2", + Sha256: "test-profile-2-sha256-id", + Version: "4.5.6", + Maintainer: "Testus2", + Copyright: "Testus Biz2", + CopyrightEmail: "Testus Biz2", + Title: "Test Profile 2", + Depends: []*ingest_inspec.Dependency{ + { + Name: "test2", + }, + }, + Supports: []*ingest_inspec.Support{ + { + Inspec: "2.3.5", + }, + }, + Controls: []*ingest_inspec.Control{ + { + Id: "p2c1", + Title: "p2c1 title", + Impact: 0.6, + Code: "some code", + Desc: "some desc", + SourceLocation: &ingest_inspec.SourceLocation{ + Ref: "12", + Line: 45, + }, + Refs: []*structpb.Struct{}, + Tags: &structpb.Struct{}, + Results: []*ingest_inspec.Result{ + { + Message: "Resource is as expected", + Status: "passed", + RunTime: 0.12345, + StartTime: "2020-05-15T18:37:19+01:00", + }, + { + Message: "Resource is as expected", + Status: "passed", + RunTime: 1.12345, + StartTime: "2020-05-15T18:37:20+01:00", + }, + }, + }, + }, + }, + }, + RunTimeLimit: 1.1, + } + + assert.Equal(t, expectedReport, report) +} From 71d102f050739582b3c2ca1e11416edda35fd640 Mon Sep 17 00:00:00 2001 From: Alex Pop Date: Mon, 18 May 2020 18:00:06 +0100 Subject: [PATCH 2/2] Document RunTimeLimit as seconds Signed-off-by: Alex Pop --- .../compliance-service/cmd/compliance-service/cmd/run.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/compliance-service/cmd/compliance-service/cmd/run.go b/components/compliance-service/cmd/compliance-service/cmd/run.go index 3814cf23372..5a49b096c8d 100644 --- a/components/compliance-service/cmd/compliance-service/cmd/run.go +++ b/components/compliance-service/cmd/compliance-service/cmd/run.go @@ -34,7 +34,7 @@ var conf = config.Compliance{ TmpDir: os.Getenv("TMPDIR"), ResultMessageLimit: 10000, ControlResultsLimit: 50, - RunTimeLimit: 1.0, + RunTimeLimit: 1.0, // Value in seconds }, ElasticSearch: config.ElasticSearch{}, ElasticSearchSidecar: config.ElasticSearchSidecar{ @@ -125,7 +125,7 @@ func init() { runCmd.Flags().StringVar(&conf.InspecAgent.RemoteInspecVersion, "remote-inspec-version", conf.InspecAgent.RemoteInspecVersion, "Option to specify the version of inspec to use for remote(e.g. AWS SSM) scan jobs") runCmd.Flags().IntVar(&conf.InspecAgent.ResultMessageLimit, "result-message-limit", conf.InspecAgent.ResultMessageLimit, "A control result message that exceeds this character limit will be truncated") runCmd.Flags().IntVar(&conf.InspecAgent.ControlResultsLimit, "control-results-limit", conf.InspecAgent.ControlResultsLimit, "The array of results per control will be truncated at this limit to avoid large reports that cannot be processed") - runCmd.Flags().Float32Var(&conf.InspecAgent.RunTimeLimit, "run-time-limit", conf.InspecAgent.RunTimeLimit, "Control results that have a `run_time` below this limit will be stripped of the `start_time` and `run_time` fields") + runCmd.Flags().Float32Var(&conf.InspecAgent.RunTimeLimit, "run-time-limit", conf.InspecAgent.RunTimeLimit, "Control results that have a `run_time` (in seconds) below this limit will be stripped of the `start_time` and `run_time` fields") // Legacy Automate Headers/User Info runCmd.Flags().StringVar(&conf.Delivery.Enterprise, "delivery-ent", conf.Delivery.Enterprise, "Automate Enterprise")