Skip to content

Commit

Permalink
update certifier with specific package queries to keep state (#2163)
Browse files Browse the repository at this point in the history
* add new QueryVulnPackagesList and implement ENT backend

Signed-off-by: pxp928 <parth.psu@gmail.com>

* change to single query with enum

Signed-off-by: pxp928 <parth.psu@gmail.com>

* combine queries to conslidate on backend

Signed-off-by: pxp928 <parth.psu@gmail.com>

* add last-scan flag to certifier cli and update unit tests

Signed-off-by: pxp928 <parth.psu@gmail.com>

* implement keyvalue backend implementation for QueryPackagesListForType

Signed-off-by: pxp928 <parth.psu@gmail.com>

* update backend unit test for TestQueryPackagesListForType

Signed-off-by: pxp928 <parth.psu@gmail.com>

* add check if links are not set in keyvalue backend

Signed-off-by: pxp928 <parth.psu@gmail.com>

* update ent search to get latest vuln time to compare

Signed-off-by: pxp928 <parth.psu@gmail.com>

* add license and empty tests for backend

Signed-off-by: pxp928 <parth.psu@gmail.com>

* rename query and update comments

Signed-off-by: pxp928 <parth.psu@gmail.com>

* update graphql schema comments for QueryType

Signed-off-by: pxp928 <parth.psu@gmail.com>

* default last-scan to 4 hours in gauc.yaml

Signed-off-by: pxp928 <parth.psu@gmail.com>

* change cli default to 4 hours for last-scan

Signed-off-by: pxp928 <parth.psu@gmail.com>

---------

Signed-off-by: pxp928 <parth.psu@gmail.com>
  • Loading branch information
pxp928 authored Oct 3, 2024
1 parent 7f3e889 commit f13bed1
Show file tree
Hide file tree
Showing 31 changed files with 1,377 additions and 53 deletions.
19 changes: 13 additions & 6 deletions cmd/guaccollect/cmd/license.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"time"

"github.com/Khan/genqlient/graphql"
"github.com/guacsec/guac/pkg/assembler/clients/generated"
"github.com/guacsec/guac/pkg/certifier"
"github.com/guacsec/guac/pkg/certifier/certify"
"github.com/guacsec/guac/pkg/certifier/clearlydefined"
Expand Down Expand Up @@ -54,6 +55,9 @@ type cdOptions struct {
addedLatency *time.Duration
// sets the batch size for pagination query for the certifier
batchSize int
// last time the scan was done in hours, if not set it will return
// all packages to check
lastScan *int
}

var cdCmd = &cobra.Command{
Expand Down Expand Up @@ -85,6 +89,7 @@ you have access to read and write to the respective blob store.`,
viper.GetBool("publish-to-queue"),
viper.GetString("certifier-latency"),
viper.GetInt("certifier-batch-size"),
viper.GetInt("last-scan"),
)
if err != nil {
fmt.Printf("unable to validate flags: %v\n", err)
Expand All @@ -103,7 +108,7 @@ you have access to read and write to the respective blob store.`,
httpClient := http.Client{Transport: transport}
gqlclient := graphql.NewClient(opts.graphqlEndpoint, &httpClient)

packageQueryFunc, err := getCDPackageQuery(gqlclient, opts.batchSize, opts.addedLatency)
packageQueryFunc, err := getCDPackageQuery(gqlclient, opts.batchSize, opts.addedLatency, opts.lastScan)
if err != nil {
logger.Errorf("error: %v", err)
os.Exit(1)
Expand All @@ -113,9 +118,9 @@ you have access to read and write to the respective blob store.`,
},
}

func getCDPackageQuery(client graphql.Client, batchSize int, addedLatency *time.Duration) (func() certifier.QueryComponents, error) {
func getCDPackageQuery(client graphql.Client, batchSize int, addedLatency *time.Duration, lastScan *int) (func() certifier.QueryComponents, error) {
return func() certifier.QueryComponents {
packageQuery := root_package.NewPackageQuery(client, batchSize, cdQuerySize, addedLatency)
packageQuery := root_package.NewPackageQuery(client, generated.QueryTypeLicense, batchSize, cdQuerySize, addedLatency, lastScan)
return packageQuery
}, nil
}
Expand All @@ -129,7 +134,7 @@ func validateCDFlags(
poll bool,
pubToQueue bool,
certifierLatencyStr string,
batchSize int) (cdOptions, error) {
batchSize int, lastScan int) (cdOptions, error) {

var opts cdOptions

Expand Down Expand Up @@ -157,14 +162,16 @@ func validateCDFlags(
}

opts.batchSize = batchSize

if lastScan != 0 {
opts.lastScan = &lastScan
}
return opts, nil
}

func init() {
set, err := cli.BuildFlags([]string{"interval",
"header-file", "certifier-latency",
"certifier-batch-size"})
"certifier-batch-size", "last-scan"})
if err != nil {
fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err)
os.Exit(1)
Expand Down
19 changes: 13 additions & 6 deletions cmd/guaccollect/cmd/osv.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"time"

"github.com/Khan/genqlient/graphql"
"github.com/guacsec/guac/pkg/assembler/clients/generated"
"github.com/guacsec/guac/pkg/blob"
"github.com/guacsec/guac/pkg/certifier"
"github.com/guacsec/guac/pkg/certifier/certify"
Expand Down Expand Up @@ -62,6 +63,9 @@ type osvOptions struct {
addedLatency *time.Duration
// sets the batch size for pagination query for the certifier
batchSize int
// last time the scan was done in hours, if not set it will return
// all packages to check
lastScan *int
}

var osvCmd = &cobra.Command{
Expand Down Expand Up @@ -93,6 +97,7 @@ you have access to read and write to the respective blob store.`,
viper.GetBool("publish-to-queue"),
viper.GetString("certifier-latency"),
viper.GetInt("certifier-batch-size"),
viper.GetInt("last-scan"),
)
if err != nil {
fmt.Printf("unable to validate flags: %v\n", err)
Expand All @@ -111,7 +116,7 @@ you have access to read and write to the respective blob store.`,
httpClient := http.Client{Transport: transport}
gqlclient := graphql.NewClient(opts.graphqlEndpoint, &httpClient)

packageQueryFunc, err := getOSVPackageQuery(gqlclient, opts.batchSize, opts.addedLatency)
packageQueryFunc, err := getOSVPackageQuery(gqlclient, opts.batchSize, opts.addedLatency, opts.lastScan)
if err != nil {
logger.Errorf("error: %v", err)
os.Exit(1)
Expand All @@ -130,7 +135,7 @@ func validateOSVFlags(
poll bool,
pubToQueue bool,
certifierLatencyStr string,
batchSize int) (osvOptions, error) {
batchSize int, lastScan int) (osvOptions, error) {

var opts osvOptions

Expand Down Expand Up @@ -158,7 +163,9 @@ func validateOSVFlags(
}

opts.batchSize = batchSize

if lastScan != 0 {
opts.lastScan = &lastScan
}
return opts, nil
}

Expand All @@ -168,9 +175,9 @@ func getCertifierPublish(ctx context.Context, blobStore *blob.BlobStore, pubsub
}, nil
}

func getOSVPackageQuery(client graphql.Client, batchSize int, addedLatency *time.Duration) (func() certifier.QueryComponents, error) {
func getOSVPackageQuery(client graphql.Client, batchSize int, addedLatency *time.Duration, lastScan *int) (func() certifier.QueryComponents, error) {
return func() certifier.QueryComponents {
packageQuery := root_package.NewPackageQuery(client, batchSize, osvQuerySize, addedLatency)
packageQuery := root_package.NewPackageQuery(client, generated.QueryTypeVulnerability, batchSize, osvQuerySize, addedLatency, lastScan)
return packageQuery
}, nil
}
Expand Down Expand Up @@ -253,7 +260,7 @@ func initializeNATsandCertifier(ctx context.Context, blobAddr, pubsubAddr string
func init() {
set, err := cli.BuildFlags([]string{"interval",
"header-file", "certifier-latency",
"certifier-batch-size"})
"certifier-batch-size", "last-scan"})
if err != nil {
fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err)
os.Exit(1)
Expand Down
15 changes: 12 additions & 3 deletions cmd/guacone/cmd/license.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"time"

"github.com/Khan/genqlient/graphql"
"github.com/guacsec/guac/pkg/assembler/clients/generated"
"github.com/guacsec/guac/pkg/certifier"
"github.com/guacsec/guac/pkg/certifier/certify"
"github.com/guacsec/guac/pkg/certifier/clearlydefined"
Expand Down Expand Up @@ -56,6 +57,9 @@ type cdOptions struct {
addedLatency *time.Duration
// sets the batch size for pagination query for the certifier
batchSize int
// last time the scan was done in hours, if not set it will return
// all packages to check
lastScan *int
}

var cdCmd = &cobra.Command{
Expand All @@ -74,6 +78,7 @@ var cdCmd = &cobra.Command{
viper.GetBool("add-license-on-ingest"),
viper.GetString("certifier-latency"),
viper.GetInt("certifier-batch-size"),
viper.GetInt("last-scan"),
)
if err != nil {
fmt.Printf("unable to validate flags: %v\n", err)
Expand All @@ -100,7 +105,7 @@ var cdCmd = &cobra.Command{

httpClient := http.Client{Transport: transport}
gqlclient := graphql.NewClient(opts.graphqlEndpoint, &httpClient)
packageQuery := root_package.NewPackageQuery(gqlclient, opts.batchSize, cdQuerySize, opts.addedLatency)
packageQuery := root_package.NewPackageQuery(gqlclient, generated.QueryTypeLicense, opts.batchSize, cdQuerySize, opts.addedLatency, opts.lastScan)

totalNum := 0
docChan := make(chan *processor.Document)
Expand Down Expand Up @@ -230,7 +235,7 @@ func validateCDFlags(
queryVulnIngestion bool,
queryLicenseIngestion bool,
certifierLatencyStr string,
batchSize int,
batchSize int, lastScan int,
) (cdOptions, error) {
var opts cdOptions
opts.graphqlEndpoint = graphqlEndpoint
Expand All @@ -254,6 +259,10 @@ func validateCDFlags(

opts.batchSize = batchSize

if lastScan != 0 {
opts.lastScan = &lastScan
}

csubOpts, err := csub_client.ValidateCsubClientFlags(csubAddr, csubTls, csubTlsSkipVerify)
if err != nil {
return opts, fmt.Errorf("unable to validate csub client flags: %w", err)
Expand All @@ -267,7 +276,7 @@ func validateCDFlags(

func init() {
set, err := cli.BuildFlags([]string{"certifier-latency",
"certifier-batch-size"})
"certifier-batch-size", "last-scan"})
if err != nil {
fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err)
os.Exit(1)
Expand Down
15 changes: 12 additions & 3 deletions cmd/guacone/cmd/osv.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"time"

"github.com/Khan/genqlient/graphql"
"github.com/guacsec/guac/pkg/assembler/clients/generated"
"github.com/guacsec/guac/pkg/certifier"
"github.com/guacsec/guac/pkg/certifier/certify"
"github.com/guacsec/guac/pkg/certifier/components/root_package"
Expand Down Expand Up @@ -56,6 +57,9 @@ type osvOptions struct {
addedLatency *time.Duration
// sets the batch size for pagination query for the certifier
batchSize int
// last time the scan was done in hours, if not set it will return
// all packages to check
lastScan *int
}

var osvCmd = &cobra.Command{
Expand All @@ -74,6 +78,7 @@ var osvCmd = &cobra.Command{
viper.GetBool("add-license-on-ingest"),
viper.GetString("certifier-latency"),
viper.GetInt("certifier-batch-size"),
viper.GetInt("last-scan"),
)
if err != nil {
fmt.Printf("unable to validate flags: %v\n", err)
Expand All @@ -100,7 +105,7 @@ var osvCmd = &cobra.Command{

httpClient := http.Client{Transport: transport}
gqlclient := graphql.NewClient(opts.graphqlEndpoint, &httpClient)
packageQuery := root_package.NewPackageQuery(gqlclient, opts.batchSize, osvQuerySize, opts.addedLatency)
packageQuery := root_package.NewPackageQuery(gqlclient, generated.QueryTypeVulnerability, opts.batchSize, osvQuerySize, opts.addedLatency, opts.lastScan)

totalNum := 0
docChan := make(chan *processor.Document)
Expand Down Expand Up @@ -231,7 +236,7 @@ func validateOSVFlags(
queryVulnIngestion bool,
queryLicenseIngestion bool,
certifierLatencyStr string,
batchSize int,
batchSize int, lastScan int,
) (osvOptions, error) {
var opts osvOptions
opts.graphqlEndpoint = graphqlEndpoint
Expand All @@ -255,6 +260,10 @@ func validateOSVFlags(

opts.batchSize = batchSize

if lastScan != 0 {
opts.lastScan = &lastScan
}

csubOpts, err := csub_client.ValidateCsubClientFlags(csubAddr, csubTls, csubTlsSkipVerify)
if err != nil {
return opts, fmt.Errorf("unable to validate csub client flags: %w", err)
Expand All @@ -268,7 +277,7 @@ func validateOSVFlags(

func init() {
set, err := cli.BuildFlags([]string{"certifier-latency",
"certifier-batch-size"})
"certifier-batch-size", "last-scan"})
if err != nil {
fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err)
os.Exit(1)
Expand Down
3 changes: 3 additions & 0 deletions container_files/arango/guac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ use-csub: true
poll: true
interval: 5m

# number of hours since the last scan was run. 0 means run on all packages/sources
last-scan: 4

# arangodb
arango-user: root
arango-pass: test123
Expand Down
3 changes: 3 additions & 0 deletions container_files/ent/guac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ use-csub: true
poll: true
interval: 5m

# number of hours since the last scan was run. 0 means run on all packages/sources
last-scan: 4

# Ent config
db-driver: postgres
db-address: postgres://guac:guac@postgres:5432/guac?sslmode=disable
Expand Down
3 changes: 3 additions & 0 deletions container_files/guac/guac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ poll: true
# certifier interval
interval: 20m

# number of hours since the last scan was run. 0 means run on all packages/sources
last-scan: 4

# set the batch size for the package pagination query
certifier-batch-size: 60000
# add artificial latency to throttle the certifier
Expand Down
3 changes: 3 additions & 0 deletions container_files/neo4j/guac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ use-csub: true
poll: true
interval: 5m

# number of hours since the last scan was run. 0 means run on all packages/sources
last-scan: 4

# Neo4j details
neo4j-user: neo4j
neo4j-pass: s3cr3t
Expand Down
3 changes: 3 additions & 0 deletions container_files/redis/guac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ use-csub: true
# certifier polling
poll: true
interval: 5m

# number of hours since the last scan was run. 0 means run on all packages/sources
last-scan: 4
3 changes: 3 additions & 0 deletions container_files/tikv/guac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ use-csub: true
# certifier polling
poll: true
interval: 5m

# number of hours since the last scan was run. 0 means run on all packages/sources
last-scan: 4
3 changes: 3 additions & 0 deletions guac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ blob-addr: file:///tmp/blobstore?no_tmp_dir=true
# certifier interval
interval: 20m

# number of hours since the last scan was run. 0 means run on all packages/sources
last-scan: 4

# set the batch size for the package pagination query
certifier-batch-size: 60000
# add artificial latency to throttle the certifier
Expand Down
7 changes: 4 additions & 3 deletions internal/testing/backend/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@ var skipMatrix = map[string]map[string]bool{
"TestVEXBulkIngest": {arango: true, redis: true},
"TestFindSoftware": {redis: true, arango: true},
// remove these once its implemented for the other backends
"TestDeleteCertifyVuln": {arango: true, memmap: true, redis: true, tikv: true},
"TestDeleteHasSBOM": {arango: true, memmap: true, redis: true, tikv: true},
"TestDeleteHasSLSAs": {arango: true, memmap: true, redis: true, tikv: true},
"TestDeleteCertifyVuln": {arango: true, memmap: true, redis: true, tikv: true},
"TestDeleteHasSBOM": {arango: true, memmap: true, redis: true, tikv: true},
"TestDeleteHasSLSAs": {arango: true, memmap: true, redis: true, tikv: true},
"TestQueryPackagesListForScan": {arango: true, redis: true, tikv: true},
}

type backend interface {
Expand Down
Loading

0 comments on commit f13bed1

Please sign in to comment.